diff --git a/src/fr/devinsy/statoolinfos/HtmlizerContext.java b/src/fr/devinsy/statoolinfos/HtmlizerContext.java
index 2081f42..bafd15a 100644
--- a/src/fr/devinsy/statoolinfos/HtmlizerContext.java
+++ b/src/fr/devinsy/statoolinfos/HtmlizerContext.java
@@ -27,9 +27,6 @@ import fr.devinsy.statoolinfos.core.Factory;
import fr.devinsy.statoolinfos.core.Federation;
import fr.devinsy.statoolinfos.core.StatoolInfosException;
import fr.devinsy.statoolinfos.crawl.CrawlCache;
-import fr.devinsy.statoolinfos.crawl.CrawlJournal;
-import fr.devinsy.statoolinfos.crawl.CrawlJournalFile;
-import fr.devinsy.statoolinfos.crawl.Crawler;
/**
* The Class Manager.
@@ -47,7 +44,6 @@ public class HtmlizerContext
private Federation federation;
private Categories categories;
private CrawlCache cache;
- private CrawlJournal crawlJournal;
/**
* Instantiates a new manager.
@@ -73,23 +69,9 @@ public class HtmlizerContext
logger.info("Htmlize directory setting: {}", this.configuration.getHtmlizeDirectoryPath());
this.cache = new CrawlCache(this.configuration.getCrawlCacheDirectory());
- this.crawlJournal = CrawlJournalFile.load(this.cache.restoreFile(Crawler.getJournalURL()));
- File htmlizeInputFile = this.cache.restoreFile(this.configuration.getHtmlizeInputURL());
File htmlizeDirectory = this.configuration.getHtmlizeDirectory();
- if (htmlizeInputFile == null)
- {
- throw new IllegalArgumentException("Htmlize input undefined.");
- }
- else if (!htmlizeInputFile.exists())
- {
- throw new IllegalArgumentException("Htmlize input is missing.");
- }
- else if (htmlizeInputFile.isDirectory())
- {
- throw new IllegalArgumentException("Htmlize input is a directory.");
- }
- else if (htmlizeDirectory == null)
+ if (htmlizeDirectory == null)
{
throw new IllegalArgumentException("Htmlize directory undefined.");
}
@@ -105,7 +87,7 @@ public class HtmlizerContext
{
if (this.configuration.isFederation())
{
- this.federation = Factory.loadFederation(htmlizeInputFile, this.cache);
+ this.federation = Factory.loadFederation(this.configuration.getHtmlizeInputURL(), this.cache);
this.categories = Factory.loadCategories(this.configuration.getCategoryFile(), this.federation);
}
else
@@ -160,11 +142,6 @@ public class HtmlizerContext
return result;
}
- public CrawlJournal getCrawlJournal()
- {
- return this.crawlJournal;
- }
-
/**
* Gets the federation.
*
diff --git a/src/fr/devinsy/statoolinfos/core/Factory.java b/src/fr/devinsy/statoolinfos/core/Factory.java
index 306ff00..7c67110 100644
--- a/src/fr/devinsy/statoolinfos/core/Factory.java
+++ b/src/fr/devinsy/statoolinfos/core/Factory.java
@@ -29,6 +29,7 @@ import org.slf4j.LoggerFactory;
import fr.devinsy.statoolinfos.checker.PropertyChecker;
import fr.devinsy.statoolinfos.checker.PropertyChecks;
import fr.devinsy.statoolinfos.crawl.CrawlCache;
+import fr.devinsy.statoolinfos.crawl.CrawlJournal;
import fr.devinsy.statoolinfos.properties.PathProperties;
import fr.devinsy.statoolinfos.properties.PathProperty;
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
@@ -159,67 +160,63 @@ public class Factory
* @throws IOException
* Signals that an I/O exception has occurred.
*/
- public static Federation loadFederation(final File federationFile, final CrawlCache cache) throws StatoolInfosException, IOException
+ public static Federation loadFederation(final URL inputURL, final CrawlCache cache) throws StatoolInfosException, IOException
{
Federation result;
- PathProperties properties = PathPropertyUtils.load(federationFile);
- result = new Federation(properties);
- result.setInputFile(federationFile);
- result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
-
- PropertyChecker checker = new PropertyChecker();
- PropertyChecks checks = checker.checkFederation(result.getInputFile());
- result.getInputChecks().addAll(checks);
- result.getInputChecks().setFileName(result.getLocalFileName());
-
- PathProperties subs = result.getByPrefix("subs");
- for (PathProperty property : subs)
+ if (inputURL == null)
{
- if (StringUtils.startsWith(property.getValue(), "http"))
- {
- URL inputURL = new URL(property.getValue());
- Organization organization = loadOrganization(inputURL, cache);
- if (organization != null)
- {
- organization.setFederation(result);
- result.getOrganizations().add(organization);
- }
- }
+ throw new IllegalArgumentException("Null input URL.");
}
-
- //
- return result;
- }
-
- /**
- * Load organization.
- *
- * @param organizationFile
- * the organization file
- * @param cache
- * the cache
- * @return the organization
- * @throws IOException
- * Signals that an I/O exception has occurred.
- */
- public static Organization loadOrganization(final File organizationFile, final CrawlCache cache) throws IOException
- {
- Organization result;
-
- PathProperties properties = PathPropertyUtils.load(organizationFile);
- result = new Organization(properties);
- result.setInputFile(organizationFile);
-
- PathProperties subs = result.getByPrefix("subs");
- for (PathProperty property : subs)
+ else if (cache == null)
{
- if (StringUtils.startsWith(property.getValue(), "http"))
+ throw new IllegalArgumentException("Null cache URL.");
+ }
+ else
+ {
+ File federationFile = cache.restoreFile(inputURL);
+ if (federationFile == null)
{
- URL serviceInputFile = new URL(property.getValue());
- Service service = loadService(serviceInputFile, cache);
- service.setOrganization(result);
- result.getServices().add(service);
+ throw new IllegalArgumentException("Htmlize input file undefined.");
+ }
+ else if (!federationFile.exists())
+ {
+ throw new IllegalArgumentException("Htmlize input file is missing.");
+ }
+ else if (federationFile.isDirectory())
+ {
+ throw new IllegalArgumentException("Htmlize input file is a directory.");
+ }
+ else
+ {
+ PathProperties properties = PathPropertyUtils.load(federationFile);
+ result = new Federation(properties);
+ result.setInputURL(inputURL);
+ result.setInputFile(federationFile);
+ result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
+
+ PropertyChecker checker = new PropertyChecker();
+ PropertyChecks checks = checker.checkFederation(result.getInputFile());
+ result.getInputChecks().addAll(checks);
+ result.getInputChecks().setFileName(result.getLocalFileName());
+
+ PathProperties subs = result.getByPrefix("subs");
+ for (PathProperty property : subs)
+ {
+ if (StringUtils.startsWith(property.getValue(), "http"))
+ {
+ URL subInputURL = new URL(property.getValue());
+ Organization organization = loadOrganization(subInputURL, cache);
+ if (organization != null)
+ {
+ organization.setFederation(result);
+ result.getOrganizations().add(organization);
+ }
+ }
+ }
+
+ //
+ result.getCrawlJournal().addAll(cache.restoreJournal());
}
}
@@ -285,6 +282,14 @@ public class Factory
}
}
}
+
+ //
+ CrawlJournal journal = cache.restoreJournal();
+ result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
+ for (Service service : result.getServices())
+ {
+ result.getCrawlJournal().addAll(journal.searchByParent(service.getInputURL()));
+ }
}
else
{
@@ -321,6 +326,10 @@ public class Factory
result.setInputFile(inputFile);
result.setInputURL(inputURL);
result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
+
+ //
+ CrawlJournal journal = cache.restoreJournal();
+ result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
}
//
diff --git a/src/fr/devinsy/statoolinfos/core/Federation.java b/src/fr/devinsy/statoolinfos/core/Federation.java
index c071616..65d9eb1 100644
--- a/src/fr/devinsy/statoolinfos/core/Federation.java
+++ b/src/fr/devinsy/statoolinfos/core/Federation.java
@@ -26,6 +26,7 @@ import java.time.LocalDateTime;
import org.apache.commons.lang3.StringUtils;
import fr.devinsy.statoolinfos.checker.PropertyChecks;
+import fr.devinsy.statoolinfos.crawl.CrawlJournal;
import fr.devinsy.statoolinfos.properties.PathProperties;
import fr.devinsy.statoolinfos.properties.PathPropertyList;
@@ -36,9 +37,11 @@ public class Federation extends PathPropertyList
{
private static final long serialVersionUID = -8970835291634661580L;
private Organizations organizations;
+ private URL inputURL;
private File inputFile;
private String logoFileName;
private PropertyChecks inputChecks;
+ private CrawlJournal crawlJournal;
/**
* Instantiates a new federation.
@@ -48,6 +51,7 @@ public class Federation extends PathPropertyList
super();
this.inputChecks = new PropertyChecks();
this.organizations = new Organizations();
+ this.crawlJournal = new CrawlJournal();
}
/**
@@ -68,7 +72,7 @@ public class Federation extends PathPropertyList
else
{
this.organizations = new Organizations();
-
+ this.crawlJournal = new CrawlJournal();
}
}
@@ -157,6 +161,11 @@ public class Federation extends PathPropertyList
return result;
}
+ public CrawlJournal getCrawlJournal()
+ {
+ return this.crawlJournal;
+ }
+
/**
* Gets the description.
*
@@ -204,6 +213,11 @@ public class Federation extends PathPropertyList
return this.inputFile;
}
+ public URL getInputURL()
+ {
+ return this.inputURL;
+ }
+
/**
* Gets the legal website.
*
@@ -435,6 +449,11 @@ public class Federation extends PathPropertyList
this.inputFile = inputFile;
}
+ public void setInputURL(final URL inputURL)
+ {
+ this.inputURL = inputURL;
+ }
+
public void setLogoFileName(final String logoFileName)
{
this.logoFileName = logoFileName;
diff --git a/src/fr/devinsy/statoolinfos/core/Organization.java b/src/fr/devinsy/statoolinfos/core/Organization.java
index 9ccc376..8d3703d 100644
--- a/src/fr/devinsy/statoolinfos/core/Organization.java
+++ b/src/fr/devinsy/statoolinfos/core/Organization.java
@@ -28,6 +28,7 @@ import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import fr.devinsy.statoolinfos.checker.PropertyChecks;
+import fr.devinsy.statoolinfos.crawl.CrawlJournal;
import fr.devinsy.statoolinfos.properties.PathProperties;
import fr.devinsy.statoolinfos.properties.PathPropertyList;
@@ -43,6 +44,7 @@ public class Organization extends PathPropertyList
private URL inputURL;
private String logoFileName;
private PropertyChecks inputChecks;
+ private CrawlJournal crawlJournal;
/**
* Instantiates a new organization.
@@ -52,6 +54,7 @@ public class Organization extends PathPropertyList
super();
this.inputChecks = new PropertyChecks();
this.services = new Services();
+ this.crawlJournal = new CrawlJournal();
}
/**
@@ -65,6 +68,7 @@ public class Organization extends PathPropertyList
super(properties);
this.inputChecks = new PropertyChecks();
this.services = new Services();
+ this.crawlJournal = new CrawlJournal();
}
/**
@@ -165,6 +169,11 @@ public class Organization extends PathPropertyList
return result;
}
+ public CrawlJournal getCrawlJournal()
+ {
+ return this.crawlJournal;
+ }
+
public String getDescription()
{
String result;
diff --git a/src/fr/devinsy/statoolinfos/core/Service.java b/src/fr/devinsy/statoolinfos/core/Service.java
index 450e09d..4372545 100644
--- a/src/fr/devinsy/statoolinfos/core/Service.java
+++ b/src/fr/devinsy/statoolinfos/core/Service.java
@@ -33,6 +33,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import fr.devinsy.statoolinfos.checker.PropertyChecks;
+import fr.devinsy.statoolinfos.crawl.CrawlJournal;
import fr.devinsy.statoolinfos.metrics.Metric;
import fr.devinsy.statoolinfos.properties.PathProperties;
import fr.devinsy.statoolinfos.properties.PathProperty;
@@ -104,6 +105,7 @@ public class Service extends PathPropertyList
private URL inputURL;
private String logoFileName;
private PropertyChecks inputChecks;
+ private CrawlJournal crawlJournal;
/**
* Instantiates a new service.
@@ -123,6 +125,7 @@ public class Service extends PathPropertyList
{
super(properties);
this.inputChecks = new PropertyChecks();
+ this.crawlJournal = new CrawlJournal();
}
/**
@@ -223,6 +226,11 @@ public class Service extends PathPropertyList
return result;
}
+ public CrawlJournal getCrawlJournal()
+ {
+ return this.crawlJournal;
+ }
+
/**
* Gets the description.
*
diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlCache.java b/src/fr/devinsy/statoolinfos/crawl/CrawlCache.java
index 641dc98..ede5177 100644
--- a/src/fr/devinsy/statoolinfos/crawl/CrawlCache.java
+++ b/src/fr/devinsy/statoolinfos/crawl/CrawlCache.java
@@ -20,7 +20,9 @@ package fr.devinsy.statoolinfos.crawl;
import java.io.File;
import java.io.IOException;
+import java.net.MalformedURLException;
import java.net.URL;
+import java.nio.file.Files;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils;
@@ -189,6 +191,22 @@ public class CrawlCache
}
}
+ /**
+ * @return
+ * @throws IOException
+ */
+ public CrawlJournal restoreJournal() throws IOException
+ {
+ CrawlJournal result;
+
+ File journalFile = restoreFile(getJournalURL());
+
+ result = CrawlJournalFile.load(journalFile);
+
+ //
+ return result;
+ }
+
/**
* Restore logo to.
*
@@ -292,6 +310,24 @@ public class CrawlCache
return result;
}
+ /**
+ * Store journal.
+ */
+ public void storeJournal(final CrawlJournal journal)
+ {
+ try
+ {
+ File file = Files.createTempFile("tmp-", ".statoolsinfos").toFile();
+ CrawlJournalFile.save(file, journal);
+ store(getJournalURL(), file);
+ file.delete();
+ }
+ catch (IOException exception)
+ {
+ exception.printStackTrace();
+ }
+ }
+
/**
* Store.
*
@@ -356,4 +392,20 @@ public class CrawlCache
//
return result;
}
+
+ /**
+ * Gets the journal URL.
+ *
+ * @return the journal URL
+ * @throws MalformedURLException
+ */
+ public static URL getJournalURL() throws MalformedURLException
+ {
+ URL result;
+
+ result = new URL("http://localhost/crawl.journal");
+
+ //
+ return result;
+ }
}
diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java b/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java
index bb0d25b..cfceae0 100644
--- a/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java
+++ b/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java
@@ -34,6 +34,7 @@ import java.time.LocalDateTime;
import java.time.ZoneOffset;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -187,14 +188,33 @@ public class CrawlJournalFile
}
else
{
- String[] tokens = line.split(" ", 2);
+ String[] tokens = line.split(" ", 3);
CrawlStatus status = CrawlStatus.valueOf(tokens[0].toUpperCase());
+ URL parentURL;
+ try
+ {
+ if (StringUtils.equals(tokens[1], "null"))
+ {
+ parentURL = null;
+ }
+ else
+ {
+ parentURL = new URL(tokens[1].trim());
+ }
+ }
+ catch (MalformedURLException exception)
+ {
+ logger.error("Error valuing [{}]", line);
+ exception.printStackTrace();
+ parentURL = null;
+ }
+
URL url;
try
{
- url = new URL(tokens[1].trim());
+ url = new URL(tokens[2].trim());
}
catch (MalformedURLException exception)
{
@@ -203,7 +223,7 @@ public class CrawlJournalFile
url = null;
}
- result = new CrawlLog(url, status);
+ result = new CrawlLog(url, parentURL, status);
}
//
@@ -226,7 +246,7 @@ public class CrawlJournalFile
{
for (CrawlLog log : journal)
{
- String line = log.getStatus() + " " + log.getUrl();
+ String line = String.format("%s %s %s", log.getStatus(), log.getParentUrl(), log.getUrl());
out.write(line);
out.write("\n");
}
diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlLog.java b/src/fr/devinsy/statoolinfos/crawl/CrawlLog.java
index fba147f..9395a21 100644
--- a/src/fr/devinsy/statoolinfos/crawl/CrawlLog.java
+++ b/src/fr/devinsy/statoolinfos/crawl/CrawlLog.java
@@ -26,6 +26,7 @@ import java.net.URL;
public class CrawlLog
{
private URL url;
+ private URL parentUrl;
private CrawlStatus status;
/**
@@ -36,12 +37,40 @@ public class CrawlLog
* @param status
* the status
*/
- public CrawlLog(final URL url, final CrawlStatus status)
+ public CrawlLog(final URL url, final URL parentUrl, final CrawlStatus status)
{
this.url = url;
+ this.parentUrl = parentUrl;
this.status = status;
}
+ public URL getParentUrl()
+ {
+ return this.parentUrl;
+ }
+
+ /**
+ * Gets the parent url value.
+ *
+ * @return the parent url value
+ */
+ public String getParentUrlValue()
+ {
+ String result;
+
+ if (this.parentUrl == null)
+ {
+ result = null;
+ }
+ else
+ {
+ result = this.parentUrl.toString();
+ }
+
+ //
+ return result;
+ }
+
public CrawlStatus getStatus()
{
return this.status;
diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java b/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java
index f80f898..4e11f6d 100644
--- a/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java
+++ b/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java
@@ -24,6 +24,8 @@ import java.util.Collections;
import org.apache.commons.lang3.StringUtils;
+import fr.devinsy.statoolinfos.util.URLUtils;
+
/**
* The Class CrawlLogs.
*/
@@ -39,6 +41,27 @@ public class CrawlLogs extends ArrayList
super();
}
+ /* (non-Javadoc)
+ * @see java.util.ArrayList#add(java.lang.Object)
+ */
+ @Override
+ public boolean add(final CrawlLog log)
+ {
+ boolean result;
+
+ if (log == null)
+ {
+ result = false;
+ }
+ else
+ {
+ result = super.add(log);
+ }
+
+ //
+ return result;
+ }
+
/**
* Adds the.
*
@@ -47,9 +70,9 @@ public class CrawlLogs extends ArrayList
* @param status
* the status
*/
- public void add(final URL url, final CrawlStatus status)
+ public void add(final URL url, final URL parentUrl, final CrawlStatus status)
{
- this.add(new CrawlLog(url, status));
+ this.add(new CrawlLog(url, parentUrl, status));
}
/**
@@ -139,4 +162,29 @@ public class CrawlLogs extends ArrayList
//
return result;
}
+
+ /**
+ * Gets the by parent.
+ *
+ * @param parentURL
+ * the parent URL
+ * @return the by parent
+ */
+ public CrawlLogs searchByParent(final URL parentURL)
+ {
+ CrawlLogs result;
+
+ result = new CrawlLogs();
+
+ for (CrawlLog log : this)
+ {
+ if (URLUtils.equals(log.getParentUrl(), parentURL))
+ {
+ result.add(log);
+ }
+ }
+
+ //
+ return result;
+ }
}
diff --git a/src/fr/devinsy/statoolinfos/crawl/Crawler.java b/src/fr/devinsy/statoolinfos/crawl/Crawler.java
index d8384d2..729915e 100644
--- a/src/fr/devinsy/statoolinfos/crawl/Crawler.java
+++ b/src/fr/devinsy/statoolinfos/crawl/Crawler.java
@@ -21,7 +21,6 @@ package fr.devinsy.statoolinfos.crawl;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
@@ -89,7 +88,7 @@ public class Crawler
*/
public void crawl(final URL url) throws StatoolInfosException, IOException
{
- crawl(url, null);
+ crawl(url, null, null);
}
/**
@@ -104,7 +103,7 @@ public class Crawler
* @throws IOException
* Signals that an I/O exception has occurred.
*/
- public void crawl(final URL url, final PropertyClassType parent)
+ public void crawl(final URL url, final URL parentURL, final PropertyClassType parent)
{
logger.info("Crawling {}", url);
@@ -118,21 +117,21 @@ public class Crawler
catch (java.net.ConnectException exception)
{
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
- this.journal.add(url, CrawlStatus.CONNECTERROR);
+ this.journal.add(url, parentURL, CrawlStatus.CONNECTERROR);
downloadFile = null;
exception.printStackTrace();
}
catch (FileNotFoundException exception)
{
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
- this.journal.add(url, CrawlStatus.URLNOTFOUND);
+ this.journal.add(url, parentURL, CrawlStatus.URLNOTFOUND);
downloadFile = null;
exception.printStackTrace();
}
catch (IOException exception)
{
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
- this.journal.add(url, CrawlStatus.DOWNLOADERROR);
+ this.journal.add(url, parentURL, CrawlStatus.DOWNLOADERROR);
downloadFile = null;
exception.printStackTrace();
}
@@ -142,12 +141,12 @@ public class Crawler
if (!downloadFile.exists())
{
logger.error("ERROR: download missing.");
- this.journal.add(url, CrawlStatus.MISSING);
+ this.journal.add(url, parentURL, CrawlStatus.MISSING);
}
else if (downloadFile.length() == 0)
{
logger.error("ERROR: download empty.");
- this.journal.add(url, CrawlStatus.EMPTY);
+ this.journal.add(url, parentURL, CrawlStatus.EMPTY);
}
else
{
@@ -157,7 +156,7 @@ public class Crawler
if ((downloadClass == null) || (!downloadClass.isChildOf(parent)))
{
logger.error("ERROR: bad child class [{}][{}].", downloadClass, parent);
- this.journal.add(url, CrawlStatus.BADCHILDCLASS);
+ this.journal.add(url, parentURL, CrawlStatus.BADCHILDCLASS);
}
else
{
@@ -176,7 +175,7 @@ public class Crawler
String downloadSha = StatoolInfosUtils.sha1sum(downloadFile);
if (StringUtils.equals(downloadSha, storedSha))
{
- this.journal.add(url, CrawlStatus.SUCCESS);
+ this.journal.add(url, parentURL, CrawlStatus.SUCCESS);
}
else
{
@@ -199,13 +198,13 @@ public class Crawler
downloadFile.delete();
//
- this.journal.add(url, CrawlStatus.UPDATED);
+ this.journal.add(url, parentURL, CrawlStatus.UPDATED);
}
// Cache another resources.
- crawlLogo(downloadProperties.getURL("federation.logo"));
- crawlLogo(downloadProperties.getURL("organization.logo"));
- crawlLogo(downloadProperties.getURL("service.logo"));
+ crawlLogo(downloadProperties.getURL("federation.logo"), url);
+ crawlLogo(downloadProperties.getURL("organization.logo"), url);
+ crawlLogo(downloadProperties.getURL("service.logo"), url);
// Do subs.
PathProperties subs = downloadProperties.getByPrefix("subs");
@@ -216,12 +215,12 @@ public class Crawler
try
{
URL subUrl = new URL(property.getValue());
- crawl(subUrl, downloadClass);
+ crawl(subUrl, url, downloadClass);
}
catch (java.net.MalformedURLException exception)
{
logger.error("ERROR: subcrawl failed for [{}][{}][{}]: {}", url.toString(), property.getPath(), property.getValue(), exception.getMessage());
- this.journal.add(url, CrawlStatus.BADURLFORMAT);
+ this.journal.add(url, parentURL, CrawlStatus.BADURLFORMAT);
exception.printStackTrace();
}
}
@@ -232,7 +231,7 @@ public class Crawler
}
catch (IOException exception)
{
- this.journal.add(url, CrawlStatus.IOERROR);
+ this.journal.add(url, parentURL, CrawlStatus.IOERROR);
}
}
@@ -243,7 +242,7 @@ public class Crawler
* the url
* @return the file
*/
- public File crawlLogo(final URL url)
+ public File crawlLogo(final URL url, final URL parentURL)
{
File result;
@@ -265,19 +264,19 @@ public class Crawler
catch (java.net.ConnectException exception)
{
logger.error("ERROR: crawl failed (1) for [{}]: {}", url.toString(), exception.getMessage());
- this.journal.add(url, CrawlStatus.CONNECTERROR);
+ this.journal.add(url, parentURL, CrawlStatus.CONNECTERROR);
logoFile = null;
}
catch (FileNotFoundException exception)
{
logger.error("ERROR: crawl failed (2) for [{}]: {}", url.toString(), exception.getMessage());
- this.journal.add(url, CrawlStatus.URLNOTFOUND);
+ this.journal.add(url, parentURL, CrawlStatus.URLNOTFOUND);
logoFile = null;
}
catch (IOException exception)
{
logger.error("ERROR: crawl failed (3) for [{}]: {}", url.toString(), exception.getMessage());
- this.journal.add(url, CrawlStatus.DOWNLOADERROR);
+ this.journal.add(url, parentURL, CrawlStatus.DOWNLOADERROR);
logoFile = null;
}
@@ -288,7 +287,7 @@ public class Crawler
else
{
result = this.cache.store(url, logoFile);
- this.journal.add(url, CrawlStatus.SUCCESS);
+ this.journal.add(url, parentURL, CrawlStatus.SUCCESS);
logoFile.delete();
}
}
@@ -344,9 +343,7 @@ public class Crawler
logger.info("Restoring crawl journal.");
- File journalFile = this.cache.restoreFile(getJournalURL());
-
- result = CrawlJournalFile.load(journalFile);
+ result = this.cache.restoreJournal();
//
return result;
@@ -357,34 +354,7 @@ public class Crawler
*/
public void storeJournal()
{
- try
- {
- logger.info("Storing crawl journal.");
- File file = Files.createTempFile("tmp-", ".statoolsinfos").toFile();
-
- CrawlJournalFile.save(file, this.journal);
- this.cache.store(getJournalURL(), file);
- file.delete();
- }
- catch (IOException exception)
- {
- exception.printStackTrace();
- }
- }
-
- /**
- * Gets the journal URL.
- *
- * @return the journal URL
- * @throws MalformedURLException
- */
- public static URL getJournalURL() throws MalformedURLException
- {
- URL result;
-
- result = new URL("http://localhost/crawl.journal");
-
- //
- return result;
+ logger.info("Storing crawl journal.");
+ this.cache.storeJournal(this.journal);
}
}
diff --git a/src/fr/devinsy/statoolinfos/htmlize/CrawlJournalPage.java b/src/fr/devinsy/statoolinfos/htmlize/CrawlJournalPage.java
index 3b649e7..acf6af1 100644
--- a/src/fr/devinsy/statoolinfos/htmlize/CrawlJournalPage.java
+++ b/src/fr/devinsy/statoolinfos/htmlize/CrawlJournalPage.java
@@ -21,13 +21,17 @@ package fr.devinsy.statoolinfos.htmlize;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
+import java.time.format.DateTimeFormatter;
import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import fr.devinsy.statoolinfos.HtmlizerContext;
import fr.devinsy.statoolinfos.core.Federation;
+import fr.devinsy.statoolinfos.core.Organization;
+import fr.devinsy.statoolinfos.core.Service;
import fr.devinsy.statoolinfos.core.StatoolInfosException;
import fr.devinsy.statoolinfos.crawl.CrawlCache;
import fr.devinsy.statoolinfos.crawl.CrawlJournal;
@@ -56,9 +60,20 @@ public class CrawlJournalPage
File htmlizeDirectory = HtmlizerContext.instance().getHtmlizeDirectory();
logger.info("Htmlize Crawl Journal pages.");
- CrawlJournal journal = HtmlizerContext.instance().getCrawlJournal();
- String page = htmlize("Journal des téléchargements", journal);
+ String page = htmlize("Journal des téléchargements", federation.getCrawlJournal());
FileUtils.write(new File(htmlizeDirectory, federation.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8);
+
+ for (Organization organization : federation.getOrganizations())
+ {
+ page = htmlize("Journal des téléchargements de " + organization.getName(), organization.getCrawlJournal());
+ FileUtils.write(new File(htmlizeDirectory, organization.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8);
+ }
+
+ for (Service service : federation.getAllServices())
+ {
+ page = htmlize("Journal des téléchargements de " + service.getName(), service.getCrawlJournal());
+ FileUtils.write(new File(htmlizeDirectory, service.getOrganization().getTechnicalName() + "-" + service.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8);
+ }
}
/**
@@ -83,7 +98,7 @@ public class CrawlJournalPage
TagDataManager data = new TagDataManager();
data.setEscapedContent("title", title);
- data.setContent("date", journal.getDatetime().toString());
+ data.setContent("date", journal.getDatetime().format(DateTimeFormatter.ofPattern("dd/MM/YYYY HH:mm")));
data.setContent("totalCount", journal.size());
data.setContent("errorCount", journal.getErrors().size());
@@ -92,6 +107,8 @@ public class CrawlJournalPage
{
data.setEscapedContent("crawlLogLine", index, "crawlLogLineUrlLink", log.getUrl().toString());
data.setEscapedAttribute("crawlLogLine", index, "crawlLogLineUrlLink", "href", log.getUrl().toString());
+ data.setEscapedContent("crawlLogLine", index, "crawlLogLineParentUrlLink", StringUtils.abbreviate(log.getParentUrlValue(), 35));
+ data.setEscapedAttribute("crawlLogLine", index, "crawlLogLineParentUrlLink", "href", StringUtils.defaultString(log.getParentUrlValue(), "#"));
data.setContent("crawlLogLine", index, "crawlLogLineStatus", log.getStatus().toString());
if (log.getStatus().isError())
diff --git a/src/fr/devinsy/statoolinfos/htmlize/FederationPage.java b/src/fr/devinsy/statoolinfos/htmlize/FederationPage.java
index 6540f5d..590ab59 100644
--- a/src/fr/devinsy/statoolinfos/htmlize/FederationPage.java
+++ b/src/fr/devinsy/statoolinfos/htmlize/FederationPage.java
@@ -85,8 +85,9 @@ public class FederationPage
* @return the string
* @throws StatoolInfosException
* the statool infos exception
+ * @throws IOException
*/
- public static String htmlize(final Federation federation) throws StatoolInfosException
+ public static String htmlize(final Federation federation) throws StatoolInfosException, IOException
{
String result;
@@ -110,8 +111,7 @@ public class FederationPage
data.setAttribute("statsLink", "href", federation.getTechnicalName() + "-stats.xhtml");
data.setAttribute("crawlLink", "href", federation.getTechnicalName() + "-crawl.xhtml");
-
- if (HtmlizerContext.instance().getCrawlJournal().getErrors().isEmpty())
+ if (federation.getCrawlJournal().getErrors().isEmpty())
{
data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg");
}
diff --git a/src/fr/devinsy/statoolinfos/htmlize/OrganizationPage.java b/src/fr/devinsy/statoolinfos/htmlize/OrganizationPage.java
index 4ba9db4..2a1508b 100644
--- a/src/fr/devinsy/statoolinfos/htmlize/OrganizationPage.java
+++ b/src/fr/devinsy/statoolinfos/htmlize/OrganizationPage.java
@@ -152,6 +152,16 @@ public class OrganizationPage
data.setAttribute("statsLink", "href", organization.getTechnicalName() + "-stats.xhtml");
+ data.setAttribute("crawlLink", "href", organization.getTechnicalName() + "-crawl.xhtml");
+ if (organization.getCrawlJournal().getErrors().isEmpty())
+ {
+ data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg");
+ }
+ else
+ {
+ data.setAttribute("crawlLinkImg", "src", "circle-icons/download.svg");
+ }
+
{
PropertyChecks checks = organization.getInputChecksAll();
@@ -239,4 +249,5 @@ public class OrganizationPage
FileUtils.copyFile(logoFile, target);
}
}
+
}
diff --git a/src/fr/devinsy/statoolinfos/htmlize/ServicePage.java b/src/fr/devinsy/statoolinfos/htmlize/ServicePage.java
index 929019a..df46d50 100644
--- a/src/fr/devinsy/statoolinfos/htmlize/ServicePage.java
+++ b/src/fr/devinsy/statoolinfos/htmlize/ServicePage.java
@@ -221,6 +221,16 @@ public class ServicePage
data.getIdData("softwareSourceLinkImg").getAttribute("class").setMode(DisplayMode.REPLACE);
}
+ data.setAttribute("crawlLink", "href", service.getOrganization().getTechnicalName() + "-" + service.getTechnicalName() + "-crawl.xhtml");
+ if (service.getCrawlJournal().getErrors().isEmpty())
+ {
+ data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg");
+ }
+ else
+ {
+ data.setAttribute("crawlLinkImg", "src", "circle-icons/download.svg");
+ }
+
{
PropertyChecks checks = service.getInputChecks();
data.setContent("errorCount", checks.getErrorCount());
diff --git a/src/fr/devinsy/statoolinfos/htmlize/crawlJournal.xhtml b/src/fr/devinsy/statoolinfos/htmlize/crawlJournal.xhtml
index d7dd066..103691b 100644
--- a/src/fr/devinsy/statoolinfos/htmlize/crawlJournal.xhtml
+++ b/src/fr/devinsy/statoolinfos/htmlize/crawlJournal.xhtml
@@ -20,16 +20,18 @@
Date : n/a
-