Added crawl page for organizations and services.
This commit is contained in:
parent
bf81404746
commit
68906ed88a
18 changed files with 371 additions and 149 deletions
|
@ -27,9 +27,6 @@ import fr.devinsy.statoolinfos.core.Factory;
|
||||||
import fr.devinsy.statoolinfos.core.Federation;
|
import fr.devinsy.statoolinfos.core.Federation;
|
||||||
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
||||||
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
||||||
import fr.devinsy.statoolinfos.crawl.CrawlJournal;
|
|
||||||
import fr.devinsy.statoolinfos.crawl.CrawlJournalFile;
|
|
||||||
import fr.devinsy.statoolinfos.crawl.Crawler;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Class Manager.
|
* The Class Manager.
|
||||||
|
@ -47,7 +44,6 @@ public class HtmlizerContext
|
||||||
private Federation federation;
|
private Federation federation;
|
||||||
private Categories categories;
|
private Categories categories;
|
||||||
private CrawlCache cache;
|
private CrawlCache cache;
|
||||||
private CrawlJournal crawlJournal;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates a new manager.
|
* Instantiates a new manager.
|
||||||
|
@ -73,23 +69,9 @@ public class HtmlizerContext
|
||||||
logger.info("Htmlize directory setting: {}", this.configuration.getHtmlizeDirectoryPath());
|
logger.info("Htmlize directory setting: {}", this.configuration.getHtmlizeDirectoryPath());
|
||||||
|
|
||||||
this.cache = new CrawlCache(this.configuration.getCrawlCacheDirectory());
|
this.cache = new CrawlCache(this.configuration.getCrawlCacheDirectory());
|
||||||
this.crawlJournal = CrawlJournalFile.load(this.cache.restoreFile(Crawler.getJournalURL()));
|
|
||||||
|
|
||||||
File htmlizeInputFile = this.cache.restoreFile(this.configuration.getHtmlizeInputURL());
|
|
||||||
File htmlizeDirectory = this.configuration.getHtmlizeDirectory();
|
File htmlizeDirectory = this.configuration.getHtmlizeDirectory();
|
||||||
if (htmlizeInputFile == null)
|
if (htmlizeDirectory == null)
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize input undefined.");
|
|
||||||
}
|
|
||||||
else if (!htmlizeInputFile.exists())
|
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize input is missing.");
|
|
||||||
}
|
|
||||||
else if (htmlizeInputFile.isDirectory())
|
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize input is a directory.");
|
|
||||||
}
|
|
||||||
else if (htmlizeDirectory == null)
|
|
||||||
{
|
{
|
||||||
throw new IllegalArgumentException("Htmlize directory undefined.");
|
throw new IllegalArgumentException("Htmlize directory undefined.");
|
||||||
}
|
}
|
||||||
|
@ -105,7 +87,7 @@ public class HtmlizerContext
|
||||||
{
|
{
|
||||||
if (this.configuration.isFederation())
|
if (this.configuration.isFederation())
|
||||||
{
|
{
|
||||||
this.federation = Factory.loadFederation(htmlizeInputFile, this.cache);
|
this.federation = Factory.loadFederation(this.configuration.getHtmlizeInputURL(), this.cache);
|
||||||
this.categories = Factory.loadCategories(this.configuration.getCategoryFile(), this.federation);
|
this.categories = Factory.loadCategories(this.configuration.getCategoryFile(), this.federation);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -160,11 +142,6 @@ public class HtmlizerContext
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CrawlJournal getCrawlJournal()
|
|
||||||
{
|
|
||||||
return this.crawlJournal;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the federation.
|
* Gets the federation.
|
||||||
*
|
*
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.slf4j.LoggerFactory;
|
||||||
import fr.devinsy.statoolinfos.checker.PropertyChecker;
|
import fr.devinsy.statoolinfos.checker.PropertyChecker;
|
||||||
import fr.devinsy.statoolinfos.checker.PropertyChecks;
|
import fr.devinsy.statoolinfos.checker.PropertyChecks;
|
||||||
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
||||||
|
import fr.devinsy.statoolinfos.crawl.CrawlJournal;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperties;
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperty;
|
import fr.devinsy.statoolinfos.properties.PathProperty;
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
||||||
|
@ -159,67 +160,63 @@ public class Factory
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* Signals that an I/O exception has occurred.
|
* Signals that an I/O exception has occurred.
|
||||||
*/
|
*/
|
||||||
public static Federation loadFederation(final File federationFile, final CrawlCache cache) throws StatoolInfosException, IOException
|
public static Federation loadFederation(final URL inputURL, final CrawlCache cache) throws StatoolInfosException, IOException
|
||||||
{
|
{
|
||||||
Federation result;
|
Federation result;
|
||||||
|
|
||||||
PathProperties properties = PathPropertyUtils.load(federationFile);
|
if (inputURL == null)
|
||||||
result = new Federation(properties);
|
|
||||||
result.setInputFile(federationFile);
|
|
||||||
result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
|
|
||||||
|
|
||||||
PropertyChecker checker = new PropertyChecker();
|
|
||||||
PropertyChecks checks = checker.checkFederation(result.getInputFile());
|
|
||||||
result.getInputChecks().addAll(checks);
|
|
||||||
result.getInputChecks().setFileName(result.getLocalFileName());
|
|
||||||
|
|
||||||
PathProperties subs = result.getByPrefix("subs");
|
|
||||||
for (PathProperty property : subs)
|
|
||||||
{
|
{
|
||||||
if (StringUtils.startsWith(property.getValue(), "http"))
|
throw new IllegalArgumentException("Null input URL.");
|
||||||
{
|
|
||||||
URL inputURL = new URL(property.getValue());
|
|
||||||
Organization organization = loadOrganization(inputURL, cache);
|
|
||||||
if (organization != null)
|
|
||||||
{
|
|
||||||
organization.setFederation(result);
|
|
||||||
result.getOrganizations().add(organization);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else if (cache == null)
|
||||||
//
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Load organization.
|
|
||||||
*
|
|
||||||
* @param organizationFile
|
|
||||||
* the organization file
|
|
||||||
* @param cache
|
|
||||||
* the cache
|
|
||||||
* @return the organization
|
|
||||||
* @throws IOException
|
|
||||||
* Signals that an I/O exception has occurred.
|
|
||||||
*/
|
|
||||||
public static Organization loadOrganization(final File organizationFile, final CrawlCache cache) throws IOException
|
|
||||||
{
|
|
||||||
Organization result;
|
|
||||||
|
|
||||||
PathProperties properties = PathPropertyUtils.load(organizationFile);
|
|
||||||
result = new Organization(properties);
|
|
||||||
result.setInputFile(organizationFile);
|
|
||||||
|
|
||||||
PathProperties subs = result.getByPrefix("subs");
|
|
||||||
for (PathProperty property : subs)
|
|
||||||
{
|
{
|
||||||
if (StringUtils.startsWith(property.getValue(), "http"))
|
throw new IllegalArgumentException("Null cache URL.");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
File federationFile = cache.restoreFile(inputURL);
|
||||||
|
if (federationFile == null)
|
||||||
{
|
{
|
||||||
URL serviceInputFile = new URL(property.getValue());
|
throw new IllegalArgumentException("Htmlize input file undefined.");
|
||||||
Service service = loadService(serviceInputFile, cache);
|
}
|
||||||
service.setOrganization(result);
|
else if (!federationFile.exists())
|
||||||
result.getServices().add(service);
|
{
|
||||||
|
throw new IllegalArgumentException("Htmlize input file is missing.");
|
||||||
|
}
|
||||||
|
else if (federationFile.isDirectory())
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Htmlize input file is a directory.");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PathProperties properties = PathPropertyUtils.load(federationFile);
|
||||||
|
result = new Federation(properties);
|
||||||
|
result.setInputURL(inputURL);
|
||||||
|
result.setInputFile(federationFile);
|
||||||
|
result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
|
||||||
|
|
||||||
|
PropertyChecker checker = new PropertyChecker();
|
||||||
|
PropertyChecks checks = checker.checkFederation(result.getInputFile());
|
||||||
|
result.getInputChecks().addAll(checks);
|
||||||
|
result.getInputChecks().setFileName(result.getLocalFileName());
|
||||||
|
|
||||||
|
PathProperties subs = result.getByPrefix("subs");
|
||||||
|
for (PathProperty property : subs)
|
||||||
|
{
|
||||||
|
if (StringUtils.startsWith(property.getValue(), "http"))
|
||||||
|
{
|
||||||
|
URL subInputURL = new URL(property.getValue());
|
||||||
|
Organization organization = loadOrganization(subInputURL, cache);
|
||||||
|
if (organization != null)
|
||||||
|
{
|
||||||
|
organization.setFederation(result);
|
||||||
|
result.getOrganizations().add(organization);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
result.getCrawlJournal().addAll(cache.restoreJournal());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -285,6 +282,14 @@ public class Factory
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
CrawlJournal journal = cache.restoreJournal();
|
||||||
|
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
|
||||||
|
for (Service service : result.getServices())
|
||||||
|
{
|
||||||
|
result.getCrawlJournal().addAll(journal.searchByParent(service.getInputURL()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -321,6 +326,10 @@ public class Factory
|
||||||
result.setInputFile(inputFile);
|
result.setInputFile(inputFile);
|
||||||
result.setInputURL(inputURL);
|
result.setInputURL(inputURL);
|
||||||
result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
|
result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
|
||||||
|
|
||||||
|
//
|
||||||
|
CrawlJournal journal = cache.restoreJournal();
|
||||||
|
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.time.LocalDateTime;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import fr.devinsy.statoolinfos.checker.PropertyChecks;
|
import fr.devinsy.statoolinfos.checker.PropertyChecks;
|
||||||
|
import fr.devinsy.statoolinfos.crawl.CrawlJournal;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperties;
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
||||||
|
|
||||||
|
@ -36,9 +37,11 @@ public class Federation extends PathPropertyList
|
||||||
{
|
{
|
||||||
private static final long serialVersionUID = -8970835291634661580L;
|
private static final long serialVersionUID = -8970835291634661580L;
|
||||||
private Organizations organizations;
|
private Organizations organizations;
|
||||||
|
private URL inputURL;
|
||||||
private File inputFile;
|
private File inputFile;
|
||||||
private String logoFileName;
|
private String logoFileName;
|
||||||
private PropertyChecks inputChecks;
|
private PropertyChecks inputChecks;
|
||||||
|
private CrawlJournal crawlJournal;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates a new federation.
|
* Instantiates a new federation.
|
||||||
|
@ -48,6 +51,7 @@ public class Federation extends PathPropertyList
|
||||||
super();
|
super();
|
||||||
this.inputChecks = new PropertyChecks();
|
this.inputChecks = new PropertyChecks();
|
||||||
this.organizations = new Organizations();
|
this.organizations = new Organizations();
|
||||||
|
this.crawlJournal = new CrawlJournal();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -68,7 +72,7 @@ public class Federation extends PathPropertyList
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
this.organizations = new Organizations();
|
this.organizations = new Organizations();
|
||||||
|
this.crawlJournal = new CrawlJournal();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,6 +161,11 @@ public class Federation extends PathPropertyList
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CrawlJournal getCrawlJournal()
|
||||||
|
{
|
||||||
|
return this.crawlJournal;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the description.
|
* Gets the description.
|
||||||
*
|
*
|
||||||
|
@ -204,6 +213,11 @@ public class Federation extends PathPropertyList
|
||||||
return this.inputFile;
|
return this.inputFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public URL getInputURL()
|
||||||
|
{
|
||||||
|
return this.inputURL;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the legal website.
|
* Gets the legal website.
|
||||||
*
|
*
|
||||||
|
@ -435,6 +449,11 @@ public class Federation extends PathPropertyList
|
||||||
this.inputFile = inputFile;
|
this.inputFile = inputFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setInputURL(final URL inputURL)
|
||||||
|
{
|
||||||
|
this.inputURL = inputURL;
|
||||||
|
}
|
||||||
|
|
||||||
public void setLogoFileName(final String logoFileName)
|
public void setLogoFileName(final String logoFileName)
|
||||||
{
|
{
|
||||||
this.logoFileName = logoFileName;
|
this.logoFileName = logoFileName;
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import fr.devinsy.statoolinfos.checker.PropertyChecks;
|
import fr.devinsy.statoolinfos.checker.PropertyChecks;
|
||||||
|
import fr.devinsy.statoolinfos.crawl.CrawlJournal;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperties;
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
||||||
|
|
||||||
|
@ -43,6 +44,7 @@ public class Organization extends PathPropertyList
|
||||||
private URL inputURL;
|
private URL inputURL;
|
||||||
private String logoFileName;
|
private String logoFileName;
|
||||||
private PropertyChecks inputChecks;
|
private PropertyChecks inputChecks;
|
||||||
|
private CrawlJournal crawlJournal;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates a new organization.
|
* Instantiates a new organization.
|
||||||
|
@ -52,6 +54,7 @@ public class Organization extends PathPropertyList
|
||||||
super();
|
super();
|
||||||
this.inputChecks = new PropertyChecks();
|
this.inputChecks = new PropertyChecks();
|
||||||
this.services = new Services();
|
this.services = new Services();
|
||||||
|
this.crawlJournal = new CrawlJournal();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -65,6 +68,7 @@ public class Organization extends PathPropertyList
|
||||||
super(properties);
|
super(properties);
|
||||||
this.inputChecks = new PropertyChecks();
|
this.inputChecks = new PropertyChecks();
|
||||||
this.services = new Services();
|
this.services = new Services();
|
||||||
|
this.crawlJournal = new CrawlJournal();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -165,6 +169,11 @@ public class Organization extends PathPropertyList
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CrawlJournal getCrawlJournal()
|
||||||
|
{
|
||||||
|
return this.crawlJournal;
|
||||||
|
}
|
||||||
|
|
||||||
public String getDescription()
|
public String getDescription()
|
||||||
{
|
{
|
||||||
String result;
|
String result;
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import fr.devinsy.statoolinfos.checker.PropertyChecks;
|
import fr.devinsy.statoolinfos.checker.PropertyChecks;
|
||||||
|
import fr.devinsy.statoolinfos.crawl.CrawlJournal;
|
||||||
import fr.devinsy.statoolinfos.metrics.Metric;
|
import fr.devinsy.statoolinfos.metrics.Metric;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperties;
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperty;
|
import fr.devinsy.statoolinfos.properties.PathProperty;
|
||||||
|
@ -104,6 +105,7 @@ public class Service extends PathPropertyList
|
||||||
private URL inputURL;
|
private URL inputURL;
|
||||||
private String logoFileName;
|
private String logoFileName;
|
||||||
private PropertyChecks inputChecks;
|
private PropertyChecks inputChecks;
|
||||||
|
private CrawlJournal crawlJournal;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates a new service.
|
* Instantiates a new service.
|
||||||
|
@ -123,6 +125,7 @@ public class Service extends PathPropertyList
|
||||||
{
|
{
|
||||||
super(properties);
|
super(properties);
|
||||||
this.inputChecks = new PropertyChecks();
|
this.inputChecks = new PropertyChecks();
|
||||||
|
this.crawlJournal = new CrawlJournal();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -223,6 +226,11 @@ public class Service extends PathPropertyList
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CrawlJournal getCrawlJournal()
|
||||||
|
{
|
||||||
|
return this.crawlJournal;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the description.
|
* Gets the description.
|
||||||
*
|
*
|
||||||
|
|
|
@ -20,7 +20,9 @@ package fr.devinsy.statoolinfos.crawl;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
|
||||||
import org.apache.commons.codec.digest.DigestUtils;
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
@ -189,6 +191,22 @@ public class CrawlCache
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public CrawlJournal restoreJournal() throws IOException
|
||||||
|
{
|
||||||
|
CrawlJournal result;
|
||||||
|
|
||||||
|
File journalFile = restoreFile(getJournalURL());
|
||||||
|
|
||||||
|
result = CrawlJournalFile.load(journalFile);
|
||||||
|
|
||||||
|
//
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Restore logo to.
|
* Restore logo to.
|
||||||
*
|
*
|
||||||
|
@ -292,6 +310,24 @@ public class CrawlCache
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store journal.
|
||||||
|
*/
|
||||||
|
public void storeJournal(final CrawlJournal journal)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
File file = Files.createTempFile("tmp-", ".statoolsinfos").toFile();
|
||||||
|
CrawlJournalFile.save(file, journal);
|
||||||
|
store(getJournalURL(), file);
|
||||||
|
file.delete();
|
||||||
|
}
|
||||||
|
catch (IOException exception)
|
||||||
|
{
|
||||||
|
exception.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Store.
|
* Store.
|
||||||
*
|
*
|
||||||
|
@ -356,4 +392,20 @@ public class CrawlCache
|
||||||
//
|
//
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the journal URL.
|
||||||
|
*
|
||||||
|
* @return the journal URL
|
||||||
|
* @throws MalformedURLException
|
||||||
|
*/
|
||||||
|
public static URL getJournalURL() throws MalformedURLException
|
||||||
|
{
|
||||||
|
URL result;
|
||||||
|
|
||||||
|
result = new URL("http://localhost/crawl.journal");
|
||||||
|
|
||||||
|
//
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,7 @@ import java.time.LocalDateTime;
|
||||||
import java.time.ZoneOffset;
|
import java.time.ZoneOffset;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -187,14 +188,33 @@ public class CrawlJournalFile
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
String[] tokens = line.split(" ", 2);
|
String[] tokens = line.split(" ", 3);
|
||||||
|
|
||||||
CrawlStatus status = CrawlStatus.valueOf(tokens[0].toUpperCase());
|
CrawlStatus status = CrawlStatus.valueOf(tokens[0].toUpperCase());
|
||||||
|
|
||||||
|
URL parentURL;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (StringUtils.equals(tokens[1], "null"))
|
||||||
|
{
|
||||||
|
parentURL = null;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parentURL = new URL(tokens[1].trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (MalformedURLException exception)
|
||||||
|
{
|
||||||
|
logger.error("Error valuing [{}]", line);
|
||||||
|
exception.printStackTrace();
|
||||||
|
parentURL = null;
|
||||||
|
}
|
||||||
|
|
||||||
URL url;
|
URL url;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
url = new URL(tokens[1].trim());
|
url = new URL(tokens[2].trim());
|
||||||
}
|
}
|
||||||
catch (MalformedURLException exception)
|
catch (MalformedURLException exception)
|
||||||
{
|
{
|
||||||
|
@ -203,7 +223,7 @@ public class CrawlJournalFile
|
||||||
url = null;
|
url = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = new CrawlLog(url, status);
|
result = new CrawlLog(url, parentURL, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -226,7 +246,7 @@ public class CrawlJournalFile
|
||||||
{
|
{
|
||||||
for (CrawlLog log : journal)
|
for (CrawlLog log : journal)
|
||||||
{
|
{
|
||||||
String line = log.getStatus() + " " + log.getUrl();
|
String line = String.format("%s %s %s", log.getStatus(), log.getParentUrl(), log.getUrl());
|
||||||
out.write(line);
|
out.write(line);
|
||||||
out.write("\n");
|
out.write("\n");
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.net.URL;
|
||||||
public class CrawlLog
|
public class CrawlLog
|
||||||
{
|
{
|
||||||
private URL url;
|
private URL url;
|
||||||
|
private URL parentUrl;
|
||||||
private CrawlStatus status;
|
private CrawlStatus status;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -36,12 +37,40 @@ public class CrawlLog
|
||||||
* @param status
|
* @param status
|
||||||
* the status
|
* the status
|
||||||
*/
|
*/
|
||||||
public CrawlLog(final URL url, final CrawlStatus status)
|
public CrawlLog(final URL url, final URL parentUrl, final CrawlStatus status)
|
||||||
{
|
{
|
||||||
this.url = url;
|
this.url = url;
|
||||||
|
this.parentUrl = parentUrl;
|
||||||
this.status = status;
|
this.status = status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public URL getParentUrl()
|
||||||
|
{
|
||||||
|
return this.parentUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the parent url value.
|
||||||
|
*
|
||||||
|
* @return the parent url value
|
||||||
|
*/
|
||||||
|
public String getParentUrlValue()
|
||||||
|
{
|
||||||
|
String result;
|
||||||
|
|
||||||
|
if (this.parentUrl == null)
|
||||||
|
{
|
||||||
|
result = null;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result = this.parentUrl.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
public CrawlStatus getStatus()
|
public CrawlStatus getStatus()
|
||||||
{
|
{
|
||||||
return this.status;
|
return this.status;
|
||||||
|
|
|
@ -24,6 +24,8 @@ import java.util.Collections;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import fr.devinsy.statoolinfos.util.URLUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Class CrawlLogs.
|
* The Class CrawlLogs.
|
||||||
*/
|
*/
|
||||||
|
@ -39,6 +41,27 @@ public class CrawlLogs extends ArrayList<CrawlLog>
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* (non-Javadoc)
|
||||||
|
* @see java.util.ArrayList#add(java.lang.Object)
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean add(final CrawlLog log)
|
||||||
|
{
|
||||||
|
boolean result;
|
||||||
|
|
||||||
|
if (log == null)
|
||||||
|
{
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result = super.add(log);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds the.
|
* Adds the.
|
||||||
*
|
*
|
||||||
|
@ -47,9 +70,9 @@ public class CrawlLogs extends ArrayList<CrawlLog>
|
||||||
* @param status
|
* @param status
|
||||||
* the status
|
* the status
|
||||||
*/
|
*/
|
||||||
public void add(final URL url, final CrawlStatus status)
|
public void add(final URL url, final URL parentUrl, final CrawlStatus status)
|
||||||
{
|
{
|
||||||
this.add(new CrawlLog(url, status));
|
this.add(new CrawlLog(url, parentUrl, status));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -139,4 +162,29 @@ public class CrawlLogs extends ArrayList<CrawlLog>
|
||||||
//
|
//
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the by parent.
|
||||||
|
*
|
||||||
|
* @param parentURL
|
||||||
|
* the parent URL
|
||||||
|
* @return the by parent
|
||||||
|
*/
|
||||||
|
public CrawlLogs searchByParent(final URL parentURL)
|
||||||
|
{
|
||||||
|
CrawlLogs result;
|
||||||
|
|
||||||
|
result = new CrawlLogs();
|
||||||
|
|
||||||
|
for (CrawlLog log : this)
|
||||||
|
{
|
||||||
|
if (URLUtils.equals(log.getParentUrl(), parentURL))
|
||||||
|
{
|
||||||
|
result.add(log);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,6 @@ package fr.devinsy.statoolinfos.crawl;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
@ -89,7 +88,7 @@ public class Crawler
|
||||||
*/
|
*/
|
||||||
public void crawl(final URL url) throws StatoolInfosException, IOException
|
public void crawl(final URL url) throws StatoolInfosException, IOException
|
||||||
{
|
{
|
||||||
crawl(url, null);
|
crawl(url, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -104,7 +103,7 @@ public class Crawler
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* Signals that an I/O exception has occurred.
|
* Signals that an I/O exception has occurred.
|
||||||
*/
|
*/
|
||||||
public void crawl(final URL url, final PropertyClassType parent)
|
public void crawl(final URL url, final URL parentURL, final PropertyClassType parent)
|
||||||
{
|
{
|
||||||
logger.info("Crawling {}", url);
|
logger.info("Crawling {}", url);
|
||||||
|
|
||||||
|
@ -118,21 +117,21 @@ public class Crawler
|
||||||
catch (java.net.ConnectException exception)
|
catch (java.net.ConnectException exception)
|
||||||
{
|
{
|
||||||
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
|
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
|
||||||
this.journal.add(url, CrawlStatus.CONNECTERROR);
|
this.journal.add(url, parentURL, CrawlStatus.CONNECTERROR);
|
||||||
downloadFile = null;
|
downloadFile = null;
|
||||||
exception.printStackTrace();
|
exception.printStackTrace();
|
||||||
}
|
}
|
||||||
catch (FileNotFoundException exception)
|
catch (FileNotFoundException exception)
|
||||||
{
|
{
|
||||||
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
|
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
|
||||||
this.journal.add(url, CrawlStatus.URLNOTFOUND);
|
this.journal.add(url, parentURL, CrawlStatus.URLNOTFOUND);
|
||||||
downloadFile = null;
|
downloadFile = null;
|
||||||
exception.printStackTrace();
|
exception.printStackTrace();
|
||||||
}
|
}
|
||||||
catch (IOException exception)
|
catch (IOException exception)
|
||||||
{
|
{
|
||||||
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
|
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
|
||||||
this.journal.add(url, CrawlStatus.DOWNLOADERROR);
|
this.journal.add(url, parentURL, CrawlStatus.DOWNLOADERROR);
|
||||||
downloadFile = null;
|
downloadFile = null;
|
||||||
exception.printStackTrace();
|
exception.printStackTrace();
|
||||||
}
|
}
|
||||||
|
@ -142,12 +141,12 @@ public class Crawler
|
||||||
if (!downloadFile.exists())
|
if (!downloadFile.exists())
|
||||||
{
|
{
|
||||||
logger.error("ERROR: download missing.");
|
logger.error("ERROR: download missing.");
|
||||||
this.journal.add(url, CrawlStatus.MISSING);
|
this.journal.add(url, parentURL, CrawlStatus.MISSING);
|
||||||
}
|
}
|
||||||
else if (downloadFile.length() == 0)
|
else if (downloadFile.length() == 0)
|
||||||
{
|
{
|
||||||
logger.error("ERROR: download empty.");
|
logger.error("ERROR: download empty.");
|
||||||
this.journal.add(url, CrawlStatus.EMPTY);
|
this.journal.add(url, parentURL, CrawlStatus.EMPTY);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -157,7 +156,7 @@ public class Crawler
|
||||||
if ((downloadClass == null) || (!downloadClass.isChildOf(parent)))
|
if ((downloadClass == null) || (!downloadClass.isChildOf(parent)))
|
||||||
{
|
{
|
||||||
logger.error("ERROR: bad child class [{}][{}].", downloadClass, parent);
|
logger.error("ERROR: bad child class [{}][{}].", downloadClass, parent);
|
||||||
this.journal.add(url, CrawlStatus.BADCHILDCLASS);
|
this.journal.add(url, parentURL, CrawlStatus.BADCHILDCLASS);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -176,7 +175,7 @@ public class Crawler
|
||||||
String downloadSha = StatoolInfosUtils.sha1sum(downloadFile);
|
String downloadSha = StatoolInfosUtils.sha1sum(downloadFile);
|
||||||
if (StringUtils.equals(downloadSha, storedSha))
|
if (StringUtils.equals(downloadSha, storedSha))
|
||||||
{
|
{
|
||||||
this.journal.add(url, CrawlStatus.SUCCESS);
|
this.journal.add(url, parentURL, CrawlStatus.SUCCESS);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -199,13 +198,13 @@ public class Crawler
|
||||||
downloadFile.delete();
|
downloadFile.delete();
|
||||||
|
|
||||||
//
|
//
|
||||||
this.journal.add(url, CrawlStatus.UPDATED);
|
this.journal.add(url, parentURL, CrawlStatus.UPDATED);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cache another resources.
|
// Cache another resources.
|
||||||
crawlLogo(downloadProperties.getURL("federation.logo"));
|
crawlLogo(downloadProperties.getURL("federation.logo"), url);
|
||||||
crawlLogo(downloadProperties.getURL("organization.logo"));
|
crawlLogo(downloadProperties.getURL("organization.logo"), url);
|
||||||
crawlLogo(downloadProperties.getURL("service.logo"));
|
crawlLogo(downloadProperties.getURL("service.logo"), url);
|
||||||
|
|
||||||
// Do subs.
|
// Do subs.
|
||||||
PathProperties subs = downloadProperties.getByPrefix("subs");
|
PathProperties subs = downloadProperties.getByPrefix("subs");
|
||||||
|
@ -216,12 +215,12 @@ public class Crawler
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
URL subUrl = new URL(property.getValue());
|
URL subUrl = new URL(property.getValue());
|
||||||
crawl(subUrl, downloadClass);
|
crawl(subUrl, url, downloadClass);
|
||||||
}
|
}
|
||||||
catch (java.net.MalformedURLException exception)
|
catch (java.net.MalformedURLException exception)
|
||||||
{
|
{
|
||||||
logger.error("ERROR: subcrawl failed for [{}][{}][{}]: {}", url.toString(), property.getPath(), property.getValue(), exception.getMessage());
|
logger.error("ERROR: subcrawl failed for [{}][{}][{}]: {}", url.toString(), property.getPath(), property.getValue(), exception.getMessage());
|
||||||
this.journal.add(url, CrawlStatus.BADURLFORMAT);
|
this.journal.add(url, parentURL, CrawlStatus.BADURLFORMAT);
|
||||||
exception.printStackTrace();
|
exception.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -232,7 +231,7 @@ public class Crawler
|
||||||
}
|
}
|
||||||
catch (IOException exception)
|
catch (IOException exception)
|
||||||
{
|
{
|
||||||
this.journal.add(url, CrawlStatus.IOERROR);
|
this.journal.add(url, parentURL, CrawlStatus.IOERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,7 +242,7 @@ public class Crawler
|
||||||
* the url
|
* the url
|
||||||
* @return the file
|
* @return the file
|
||||||
*/
|
*/
|
||||||
public File crawlLogo(final URL url)
|
public File crawlLogo(final URL url, final URL parentURL)
|
||||||
{
|
{
|
||||||
File result;
|
File result;
|
||||||
|
|
||||||
|
@ -265,19 +264,19 @@ public class Crawler
|
||||||
catch (java.net.ConnectException exception)
|
catch (java.net.ConnectException exception)
|
||||||
{
|
{
|
||||||
logger.error("ERROR: crawl failed (1) for [{}]: {}", url.toString(), exception.getMessage());
|
logger.error("ERROR: crawl failed (1) for [{}]: {}", url.toString(), exception.getMessage());
|
||||||
this.journal.add(url, CrawlStatus.CONNECTERROR);
|
this.journal.add(url, parentURL, CrawlStatus.CONNECTERROR);
|
||||||
logoFile = null;
|
logoFile = null;
|
||||||
}
|
}
|
||||||
catch (FileNotFoundException exception)
|
catch (FileNotFoundException exception)
|
||||||
{
|
{
|
||||||
logger.error("ERROR: crawl failed (2) for [{}]: {}", url.toString(), exception.getMessage());
|
logger.error("ERROR: crawl failed (2) for [{}]: {}", url.toString(), exception.getMessage());
|
||||||
this.journal.add(url, CrawlStatus.URLNOTFOUND);
|
this.journal.add(url, parentURL, CrawlStatus.URLNOTFOUND);
|
||||||
logoFile = null;
|
logoFile = null;
|
||||||
}
|
}
|
||||||
catch (IOException exception)
|
catch (IOException exception)
|
||||||
{
|
{
|
||||||
logger.error("ERROR: crawl failed (3) for [{}]: {}", url.toString(), exception.getMessage());
|
logger.error("ERROR: crawl failed (3) for [{}]: {}", url.toString(), exception.getMessage());
|
||||||
this.journal.add(url, CrawlStatus.DOWNLOADERROR);
|
this.journal.add(url, parentURL, CrawlStatus.DOWNLOADERROR);
|
||||||
logoFile = null;
|
logoFile = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,7 +287,7 @@ public class Crawler
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
result = this.cache.store(url, logoFile);
|
result = this.cache.store(url, logoFile);
|
||||||
this.journal.add(url, CrawlStatus.SUCCESS);
|
this.journal.add(url, parentURL, CrawlStatus.SUCCESS);
|
||||||
logoFile.delete();
|
logoFile.delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -344,9 +343,7 @@ public class Crawler
|
||||||
|
|
||||||
logger.info("Restoring crawl journal.");
|
logger.info("Restoring crawl journal.");
|
||||||
|
|
||||||
File journalFile = this.cache.restoreFile(getJournalURL());
|
result = this.cache.restoreJournal();
|
||||||
|
|
||||||
result = CrawlJournalFile.load(journalFile);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
return result;
|
return result;
|
||||||
|
@ -357,34 +354,7 @@ public class Crawler
|
||||||
*/
|
*/
|
||||||
public void storeJournal()
|
public void storeJournal()
|
||||||
{
|
{
|
||||||
try
|
logger.info("Storing crawl journal.");
|
||||||
{
|
this.cache.storeJournal(this.journal);
|
||||||
logger.info("Storing crawl journal.");
|
|
||||||
File file = Files.createTempFile("tmp-", ".statoolsinfos").toFile();
|
|
||||||
|
|
||||||
CrawlJournalFile.save(file, this.journal);
|
|
||||||
this.cache.store(getJournalURL(), file);
|
|
||||||
file.delete();
|
|
||||||
}
|
|
||||||
catch (IOException exception)
|
|
||||||
{
|
|
||||||
exception.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the journal URL.
|
|
||||||
*
|
|
||||||
* @return the journal URL
|
|
||||||
* @throws MalformedURLException
|
|
||||||
*/
|
|
||||||
public static URL getJournalURL() throws MalformedURLException
|
|
||||||
{
|
|
||||||
URL result;
|
|
||||||
|
|
||||||
result = new URL("http://localhost/crawl.journal");
|
|
||||||
|
|
||||||
//
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,13 +21,17 @@ package fr.devinsy.statoolinfos.htmlize;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import fr.devinsy.statoolinfos.HtmlizerContext;
|
import fr.devinsy.statoolinfos.HtmlizerContext;
|
||||||
import fr.devinsy.statoolinfos.core.Federation;
|
import fr.devinsy.statoolinfos.core.Federation;
|
||||||
|
import fr.devinsy.statoolinfos.core.Organization;
|
||||||
|
import fr.devinsy.statoolinfos.core.Service;
|
||||||
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
||||||
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
||||||
import fr.devinsy.statoolinfos.crawl.CrawlJournal;
|
import fr.devinsy.statoolinfos.crawl.CrawlJournal;
|
||||||
|
@ -56,9 +60,20 @@ public class CrawlJournalPage
|
||||||
File htmlizeDirectory = HtmlizerContext.instance().getHtmlizeDirectory();
|
File htmlizeDirectory = HtmlizerContext.instance().getHtmlizeDirectory();
|
||||||
|
|
||||||
logger.info("Htmlize Crawl Journal pages.");
|
logger.info("Htmlize Crawl Journal pages.");
|
||||||
CrawlJournal journal = HtmlizerContext.instance().getCrawlJournal();
|
String page = htmlize("Journal des téléchargements", federation.getCrawlJournal());
|
||||||
String page = htmlize("Journal des téléchargements", journal);
|
|
||||||
FileUtils.write(new File(htmlizeDirectory, federation.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8);
|
FileUtils.write(new File(htmlizeDirectory, federation.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8);
|
||||||
|
|
||||||
|
for (Organization organization : federation.getOrganizations())
|
||||||
|
{
|
||||||
|
page = htmlize("Journal des téléchargements de " + organization.getName(), organization.getCrawlJournal());
|
||||||
|
FileUtils.write(new File(htmlizeDirectory, organization.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Service service : federation.getAllServices())
|
||||||
|
{
|
||||||
|
page = htmlize("Journal des téléchargements de " + service.getName(), service.getCrawlJournal());
|
||||||
|
FileUtils.write(new File(htmlizeDirectory, service.getOrganization().getTechnicalName() + "-" + service.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -83,7 +98,7 @@ public class CrawlJournalPage
|
||||||
TagDataManager data = new TagDataManager();
|
TagDataManager data = new TagDataManager();
|
||||||
|
|
||||||
data.setEscapedContent("title", title);
|
data.setEscapedContent("title", title);
|
||||||
data.setContent("date", journal.getDatetime().toString());
|
data.setContent("date", journal.getDatetime().format(DateTimeFormatter.ofPattern("dd/MM/YYYY HH:mm")));
|
||||||
data.setContent("totalCount", journal.size());
|
data.setContent("totalCount", journal.size());
|
||||||
data.setContent("errorCount", journal.getErrors().size());
|
data.setContent("errorCount", journal.getErrors().size());
|
||||||
|
|
||||||
|
@ -92,6 +107,8 @@ public class CrawlJournalPage
|
||||||
{
|
{
|
||||||
data.setEscapedContent("crawlLogLine", index, "crawlLogLineUrlLink", log.getUrl().toString());
|
data.setEscapedContent("crawlLogLine", index, "crawlLogLineUrlLink", log.getUrl().toString());
|
||||||
data.setEscapedAttribute("crawlLogLine", index, "crawlLogLineUrlLink", "href", log.getUrl().toString());
|
data.setEscapedAttribute("crawlLogLine", index, "crawlLogLineUrlLink", "href", log.getUrl().toString());
|
||||||
|
data.setEscapedContent("crawlLogLine", index, "crawlLogLineParentUrlLink", StringUtils.abbreviate(log.getParentUrlValue(), 35));
|
||||||
|
data.setEscapedAttribute("crawlLogLine", index, "crawlLogLineParentUrlLink", "href", StringUtils.defaultString(log.getParentUrlValue(), "#"));
|
||||||
data.setContent("crawlLogLine", index, "crawlLogLineStatus", log.getStatus().toString());
|
data.setContent("crawlLogLine", index, "crawlLogLineStatus", log.getStatus().toString());
|
||||||
|
|
||||||
if (log.getStatus().isError())
|
if (log.getStatus().isError())
|
||||||
|
|
|
@ -85,8 +85,9 @@ public class FederationPage
|
||||||
* @return the string
|
* @return the string
|
||||||
* @throws StatoolInfosException
|
* @throws StatoolInfosException
|
||||||
* the statool infos exception
|
* the statool infos exception
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public static String htmlize(final Federation federation) throws StatoolInfosException
|
public static String htmlize(final Federation federation) throws StatoolInfosException, IOException
|
||||||
{
|
{
|
||||||
String result;
|
String result;
|
||||||
|
|
||||||
|
@ -110,8 +111,7 @@ public class FederationPage
|
||||||
data.setAttribute("statsLink", "href", federation.getTechnicalName() + "-stats.xhtml");
|
data.setAttribute("statsLink", "href", federation.getTechnicalName() + "-stats.xhtml");
|
||||||
|
|
||||||
data.setAttribute("crawlLink", "href", federation.getTechnicalName() + "-crawl.xhtml");
|
data.setAttribute("crawlLink", "href", federation.getTechnicalName() + "-crawl.xhtml");
|
||||||
|
if (federation.getCrawlJournal().getErrors().isEmpty())
|
||||||
if (HtmlizerContext.instance().getCrawlJournal().getErrors().isEmpty())
|
|
||||||
{
|
{
|
||||||
data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg");
|
data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg");
|
||||||
}
|
}
|
||||||
|
|
|
@ -152,6 +152,16 @@ public class OrganizationPage
|
||||||
|
|
||||||
data.setAttribute("statsLink", "href", organization.getTechnicalName() + "-stats.xhtml");
|
data.setAttribute("statsLink", "href", organization.getTechnicalName() + "-stats.xhtml");
|
||||||
|
|
||||||
|
data.setAttribute("crawlLink", "href", organization.getTechnicalName() + "-crawl.xhtml");
|
||||||
|
if (organization.getCrawlJournal().getErrors().isEmpty())
|
||||||
|
{
|
||||||
|
data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
data.setAttribute("crawlLinkImg", "src", "circle-icons/download.svg");
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
PropertyChecks checks = organization.getInputChecksAll();
|
PropertyChecks checks = organization.getInputChecksAll();
|
||||||
|
|
||||||
|
@ -239,4 +249,5 @@ public class OrganizationPage
|
||||||
FileUtils.copyFile(logoFile, target);
|
FileUtils.copyFile(logoFile, target);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -221,6 +221,16 @@ public class ServicePage
|
||||||
data.getIdData("softwareSourceLinkImg").getAttribute("class").setMode(DisplayMode.REPLACE);
|
data.getIdData("softwareSourceLinkImg").getAttribute("class").setMode(DisplayMode.REPLACE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data.setAttribute("crawlLink", "href", service.getOrganization().getTechnicalName() + "-" + service.getTechnicalName() + "-crawl.xhtml");
|
||||||
|
if (service.getCrawlJournal().getErrors().isEmpty())
|
||||||
|
{
|
||||||
|
data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
data.setAttribute("crawlLinkImg", "src", "circle-icons/download.svg");
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
PropertyChecks checks = service.getInputChecks();
|
PropertyChecks checks = service.getInputChecks();
|
||||||
data.setContent("errorCount", checks.getErrorCount());
|
data.setContent("errorCount", checks.getErrorCount());
|
||||||
|
|
|
@ -20,16 +20,18 @@
|
||||||
<div>Date : <span id="date">n/a</span></div>
|
<div>Date : <span id="date">n/a</span></div>
|
||||||
</div>
|
</div>
|
||||||
<br/>
|
<br/>
|
||||||
<div class="center_table" style="width: 900px;">
|
<div class="center_table" style="width: 1000px;">
|
||||||
<table id="crawlLogs" class="table_classic left">
|
<table id="crawlLogs" class="table_classic left">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
|
<th style="width: 200px;">Parent</th>
|
||||||
<th>URL</th>
|
<th>URL</th>
|
||||||
<th style="width: 200px;">Statut</th>
|
<th style="width: 150px;">Statut</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr id="crawlLogLine">
|
<tr id="crawlLogLine">
|
||||||
|
<td id="crawlLogLineParentUrl"><a href="#" id="crawlLogLineParentUrlLink">n/a</a></td>
|
||||||
<td id="crawlLogLineUrl"><a href="#" id="crawlLogLineUrlLink">n/a</a></td>
|
<td id="crawlLogLineUrl"><a href="#" id="crawlLogLineUrlLink">n/a</a></td>
|
||||||
<td id="crawlLogLineStatus" class="td_center center">n/a</td>
|
<td id="crawlLogLineStatus" class="td_center center">n/a</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
<a id="technicalDocLink" href="#"><img id="technicalDocLinkImg" src="circle-icons/tools.svg" class="disabled" title="Documentation technique"/></a>
|
<a id="technicalDocLink" href="#"><img id="technicalDocLinkImg" src="circle-icons/tools.svg" class="disabled" title="Documentation technique"/></a>
|
||||||
<a id="rawCheckLink" href="#"><img id="rawCheckLinkImg" src="circle-icons/clipboard-mono.svg" title="Fichier propriétés analysé"/></a>
|
<a id="rawCheckLink" href="#"><img id="rawCheckLinkImg" src="circle-icons/clipboard-mono.svg" title="Fichier propriétés analysé"/></a>
|
||||||
<a id="rawLink" href="#"><img id="rawLinkImg" src="circle-icons/document-mono.svg" title="Fichier propriétés"/></a>
|
<a id="rawLink" href="#"><img id="rawLinkImg" src="circle-icons/document-mono.svg" title="Fichier propriétés"/></a>
|
||||||
|
<a id="crawlLink" href="#"><img id="crawlLinkImg" src="circle-icons/download-mono.svg" title="Statut des téléchargements"/></a>
|
||||||
<a id="statsLink" href="#"><img id="statsLinkImg" src="circle-icons/barchart-mono.svg" title="Statistiques"/></a>
|
<a id="statsLink" href="#"><img id="statsLinkImg" src="circle-icons/barchart-mono.svg" title="Statistiques"/></a>
|
||||||
<div style="display: inline-block; vertical-align: middle; font-size: smaller; margin-left: 2px; width: 35px;">
|
<div style="display: inline-block; vertical-align: middle; font-size: smaller; margin-left: 2px; width: 35px;">
|
||||||
<a id="alertLink" href="#" style="text-decoration: none;">
|
<a id="alertLink" href="#" style="text-decoration: none;">
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
<a id="technicalDocLink" href="#"><img id="technicalDocLinkImg" src="circle-icons/tools.svg" class="disabled" title="Documentation technique"/></a>
|
<a id="technicalDocLink" href="#"><img id="technicalDocLinkImg" src="circle-icons/tools.svg" class="disabled" title="Documentation technique"/></a>
|
||||||
<a id="rawCheckLink" href="#"><img id="rawCheckLinkImg" src="circle-icons/clipboard-mono.svg" title="Fichier propriétés analysé"/></a>
|
<a id="rawCheckLink" href="#"><img id="rawCheckLinkImg" src="circle-icons/clipboard-mono.svg" title="Fichier propriétés analysé"/></a>
|
||||||
<a id="rawLink" href="#"><img id="rawLinkImg" src="circle-icons/document-mono.svg" title="Fichier propriétés"/></a>
|
<a id="rawLink" href="#"><img id="rawLinkImg" src="circle-icons/document-mono.svg" title="Fichier propriétés"/></a>
|
||||||
|
<a id="crawlLink" href="#"><img id="crawlLinkImg" src="circle-icons/download-mono.svg" title="Statut des téléchargements"/></a>
|
||||||
<a id="statsLink" href="#"><img id="statsLinkImg" src="circle-icons/barchart-mono.svg" title="Statistiques"/></a>
|
<a id="statsLink" href="#"><img id="statsLinkImg" src="circle-icons/barchart-mono.svg" title="Statistiques"/></a>
|
||||||
<div style="display: inline-block; vertical-align: middle; font-size: smaller; margin-left: 2px; width: 35px;">
|
<div style="display: inline-block; vertical-align: middle; font-size: smaller; margin-left: 2px; width: 35px;">
|
||||||
<a id="alertLink" href="#" style="text-decoration: none;">
|
<a id="alertLink" href="#" style="text-decoration: none;">
|
||||||
|
|
|
@ -172,4 +172,43 @@ public final class URLUtils
|
||||||
//
|
//
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equals.
|
||||||
|
*
|
||||||
|
* @param alpha
|
||||||
|
* the alpha
|
||||||
|
* @param bravo
|
||||||
|
* the bravo
|
||||||
|
* @return true, if successful
|
||||||
|
*/
|
||||||
|
public static boolean equals(final URL alpha, final URL bravo)
|
||||||
|
{
|
||||||
|
boolean result;
|
||||||
|
|
||||||
|
String alphaValue;
|
||||||
|
if (alpha == null)
|
||||||
|
{
|
||||||
|
alphaValue = null;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
alphaValue = alpha.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
String bravoValue;
|
||||||
|
if (bravo == null)
|
||||||
|
{
|
||||||
|
bravoValue = null;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bravoValue = bravo.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
result = StringUtils.equals(alphaValue, bravoValue);
|
||||||
|
|
||||||
|
//
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in a new issue