From 68906ed88ac0f72e6e4e6783ac975c8ba8efbb5e Mon Sep 17 00:00:00 2001 From: "Christian P. MOMON" Date: Tue, 18 May 2021 19:40:29 +0200 Subject: [PATCH] Added crawl page for organizations and services. --- .../devinsy/statoolinfos/HtmlizerContext.java | 27 +--- src/fr/devinsy/statoolinfos/core/Factory.java | 117 ++++++++++-------- .../devinsy/statoolinfos/core/Federation.java | 21 +++- .../statoolinfos/core/Organization.java | 9 ++ src/fr/devinsy/statoolinfos/core/Service.java | 8 ++ .../statoolinfos/crawl/CrawlCache.java | 52 ++++++++ .../statoolinfos/crawl/CrawlJournalFile.java | 28 ++++- .../devinsy/statoolinfos/crawl/CrawlLog.java | 31 ++++- .../devinsy/statoolinfos/crawl/CrawlLogs.java | 52 +++++++- .../devinsy/statoolinfos/crawl/Crawler.java | 78 ++++-------- .../htmlize/CrawlJournalPage.java | 23 +++- .../statoolinfos/htmlize/FederationPage.java | 6 +- .../htmlize/OrganizationPage.java | 11 ++ .../statoolinfos/htmlize/ServicePage.java | 10 ++ .../statoolinfos/htmlize/crawlJournal.xhtml | 6 +- .../statoolinfos/htmlize/organization.xhtml | 1 + .../statoolinfos/htmlize/service.xhtml | 1 + .../devinsy/statoolinfos/util/URLUtils.java | 39 ++++++ 18 files changed, 371 insertions(+), 149 deletions(-) diff --git a/src/fr/devinsy/statoolinfos/HtmlizerContext.java b/src/fr/devinsy/statoolinfos/HtmlizerContext.java index 2081f42..bafd15a 100644 --- a/src/fr/devinsy/statoolinfos/HtmlizerContext.java +++ b/src/fr/devinsy/statoolinfos/HtmlizerContext.java @@ -27,9 +27,6 @@ import fr.devinsy.statoolinfos.core.Factory; import fr.devinsy.statoolinfos.core.Federation; import fr.devinsy.statoolinfos.core.StatoolInfosException; import fr.devinsy.statoolinfos.crawl.CrawlCache; -import fr.devinsy.statoolinfos.crawl.CrawlJournal; -import fr.devinsy.statoolinfos.crawl.CrawlJournalFile; -import fr.devinsy.statoolinfos.crawl.Crawler; /** * The Class Manager. @@ -47,7 +44,6 @@ public class HtmlizerContext private Federation federation; private Categories categories; private CrawlCache cache; - private CrawlJournal crawlJournal; /** * Instantiates a new manager. @@ -73,23 +69,9 @@ public class HtmlizerContext logger.info("Htmlize directory setting: {}", this.configuration.getHtmlizeDirectoryPath()); this.cache = new CrawlCache(this.configuration.getCrawlCacheDirectory()); - this.crawlJournal = CrawlJournalFile.load(this.cache.restoreFile(Crawler.getJournalURL())); - File htmlizeInputFile = this.cache.restoreFile(this.configuration.getHtmlizeInputURL()); File htmlizeDirectory = this.configuration.getHtmlizeDirectory(); - if (htmlizeInputFile == null) - { - throw new IllegalArgumentException("Htmlize input undefined."); - } - else if (!htmlizeInputFile.exists()) - { - throw new IllegalArgumentException("Htmlize input is missing."); - } - else if (htmlizeInputFile.isDirectory()) - { - throw new IllegalArgumentException("Htmlize input is a directory."); - } - else if (htmlizeDirectory == null) + if (htmlizeDirectory == null) { throw new IllegalArgumentException("Htmlize directory undefined."); } @@ -105,7 +87,7 @@ public class HtmlizerContext { if (this.configuration.isFederation()) { - this.federation = Factory.loadFederation(htmlizeInputFile, this.cache); + this.federation = Factory.loadFederation(this.configuration.getHtmlizeInputURL(), this.cache); this.categories = Factory.loadCategories(this.configuration.getCategoryFile(), this.federation); } else @@ -160,11 +142,6 @@ public class HtmlizerContext return result; } - public CrawlJournal getCrawlJournal() - { - return this.crawlJournal; - } - /** * Gets the federation. * diff --git a/src/fr/devinsy/statoolinfos/core/Factory.java b/src/fr/devinsy/statoolinfos/core/Factory.java index 306ff00..7c67110 100644 --- a/src/fr/devinsy/statoolinfos/core/Factory.java +++ b/src/fr/devinsy/statoolinfos/core/Factory.java @@ -29,6 +29,7 @@ import org.slf4j.LoggerFactory; import fr.devinsy.statoolinfos.checker.PropertyChecker; import fr.devinsy.statoolinfos.checker.PropertyChecks; import fr.devinsy.statoolinfos.crawl.CrawlCache; +import fr.devinsy.statoolinfos.crawl.CrawlJournal; import fr.devinsy.statoolinfos.properties.PathProperties; import fr.devinsy.statoolinfos.properties.PathProperty; import fr.devinsy.statoolinfos.properties.PathPropertyUtils; @@ -159,67 +160,63 @@ public class Factory * @throws IOException * Signals that an I/O exception has occurred. */ - public static Federation loadFederation(final File federationFile, final CrawlCache cache) throws StatoolInfosException, IOException + public static Federation loadFederation(final URL inputURL, final CrawlCache cache) throws StatoolInfosException, IOException { Federation result; - PathProperties properties = PathPropertyUtils.load(federationFile); - result = new Federation(properties); - result.setInputFile(federationFile); - result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png")); - - PropertyChecker checker = new PropertyChecker(); - PropertyChecks checks = checker.checkFederation(result.getInputFile()); - result.getInputChecks().addAll(checks); - result.getInputChecks().setFileName(result.getLocalFileName()); - - PathProperties subs = result.getByPrefix("subs"); - for (PathProperty property : subs) + if (inputURL == null) { - if (StringUtils.startsWith(property.getValue(), "http")) - { - URL inputURL = new URL(property.getValue()); - Organization organization = loadOrganization(inputURL, cache); - if (organization != null) - { - organization.setFederation(result); - result.getOrganizations().add(organization); - } - } + throw new IllegalArgumentException("Null input URL."); } - - // - return result; - } - - /** - * Load organization. - * - * @param organizationFile - * the organization file - * @param cache - * the cache - * @return the organization - * @throws IOException - * Signals that an I/O exception has occurred. - */ - public static Organization loadOrganization(final File organizationFile, final CrawlCache cache) throws IOException - { - Organization result; - - PathProperties properties = PathPropertyUtils.load(organizationFile); - result = new Organization(properties); - result.setInputFile(organizationFile); - - PathProperties subs = result.getByPrefix("subs"); - for (PathProperty property : subs) + else if (cache == null) { - if (StringUtils.startsWith(property.getValue(), "http")) + throw new IllegalArgumentException("Null cache URL."); + } + else + { + File federationFile = cache.restoreFile(inputURL); + if (federationFile == null) { - URL serviceInputFile = new URL(property.getValue()); - Service service = loadService(serviceInputFile, cache); - service.setOrganization(result); - result.getServices().add(service); + throw new IllegalArgumentException("Htmlize input file undefined."); + } + else if (!federationFile.exists()) + { + throw new IllegalArgumentException("Htmlize input file is missing."); + } + else if (federationFile.isDirectory()) + { + throw new IllegalArgumentException("Htmlize input file is a directory."); + } + else + { + PathProperties properties = PathPropertyUtils.load(federationFile); + result = new Federation(properties); + result.setInputURL(inputURL); + result.setInputFile(federationFile); + result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png")); + + PropertyChecker checker = new PropertyChecker(); + PropertyChecks checks = checker.checkFederation(result.getInputFile()); + result.getInputChecks().addAll(checks); + result.getInputChecks().setFileName(result.getLocalFileName()); + + PathProperties subs = result.getByPrefix("subs"); + for (PathProperty property : subs) + { + if (StringUtils.startsWith(property.getValue(), "http")) + { + URL subInputURL = new URL(property.getValue()); + Organization organization = loadOrganization(subInputURL, cache); + if (organization != null) + { + organization.setFederation(result); + result.getOrganizations().add(organization); + } + } + } + + // + result.getCrawlJournal().addAll(cache.restoreJournal()); } } @@ -285,6 +282,14 @@ public class Factory } } } + + // + CrawlJournal journal = cache.restoreJournal(); + result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL())); + for (Service service : result.getServices()) + { + result.getCrawlJournal().addAll(journal.searchByParent(service.getInputURL())); + } } else { @@ -321,6 +326,10 @@ public class Factory result.setInputFile(inputFile); result.setInputURL(inputURL); result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png")); + + // + CrawlJournal journal = cache.restoreJournal(); + result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL())); } // diff --git a/src/fr/devinsy/statoolinfos/core/Federation.java b/src/fr/devinsy/statoolinfos/core/Federation.java index c071616..65d9eb1 100644 --- a/src/fr/devinsy/statoolinfos/core/Federation.java +++ b/src/fr/devinsy/statoolinfos/core/Federation.java @@ -26,6 +26,7 @@ import java.time.LocalDateTime; import org.apache.commons.lang3.StringUtils; import fr.devinsy.statoolinfos.checker.PropertyChecks; +import fr.devinsy.statoolinfos.crawl.CrawlJournal; import fr.devinsy.statoolinfos.properties.PathProperties; import fr.devinsy.statoolinfos.properties.PathPropertyList; @@ -36,9 +37,11 @@ public class Federation extends PathPropertyList { private static final long serialVersionUID = -8970835291634661580L; private Organizations organizations; + private URL inputURL; private File inputFile; private String logoFileName; private PropertyChecks inputChecks; + private CrawlJournal crawlJournal; /** * Instantiates a new federation. @@ -48,6 +51,7 @@ public class Federation extends PathPropertyList super(); this.inputChecks = new PropertyChecks(); this.organizations = new Organizations(); + this.crawlJournal = new CrawlJournal(); } /** @@ -68,7 +72,7 @@ public class Federation extends PathPropertyList else { this.organizations = new Organizations(); - + this.crawlJournal = new CrawlJournal(); } } @@ -157,6 +161,11 @@ public class Federation extends PathPropertyList return result; } + public CrawlJournal getCrawlJournal() + { + return this.crawlJournal; + } + /** * Gets the description. * @@ -204,6 +213,11 @@ public class Federation extends PathPropertyList return this.inputFile; } + public URL getInputURL() + { + return this.inputURL; + } + /** * Gets the legal website. * @@ -435,6 +449,11 @@ public class Federation extends PathPropertyList this.inputFile = inputFile; } + public void setInputURL(final URL inputURL) + { + this.inputURL = inputURL; + } + public void setLogoFileName(final String logoFileName) { this.logoFileName = logoFileName; diff --git a/src/fr/devinsy/statoolinfos/core/Organization.java b/src/fr/devinsy/statoolinfos/core/Organization.java index 9ccc376..8d3703d 100644 --- a/src/fr/devinsy/statoolinfos/core/Organization.java +++ b/src/fr/devinsy/statoolinfos/core/Organization.java @@ -28,6 +28,7 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import fr.devinsy.statoolinfos.checker.PropertyChecks; +import fr.devinsy.statoolinfos.crawl.CrawlJournal; import fr.devinsy.statoolinfos.properties.PathProperties; import fr.devinsy.statoolinfos.properties.PathPropertyList; @@ -43,6 +44,7 @@ public class Organization extends PathPropertyList private URL inputURL; private String logoFileName; private PropertyChecks inputChecks; + private CrawlJournal crawlJournal; /** * Instantiates a new organization. @@ -52,6 +54,7 @@ public class Organization extends PathPropertyList super(); this.inputChecks = new PropertyChecks(); this.services = new Services(); + this.crawlJournal = new CrawlJournal(); } /** @@ -65,6 +68,7 @@ public class Organization extends PathPropertyList super(properties); this.inputChecks = new PropertyChecks(); this.services = new Services(); + this.crawlJournal = new CrawlJournal(); } /** @@ -165,6 +169,11 @@ public class Organization extends PathPropertyList return result; } + public CrawlJournal getCrawlJournal() + { + return this.crawlJournal; + } + public String getDescription() { String result; diff --git a/src/fr/devinsy/statoolinfos/core/Service.java b/src/fr/devinsy/statoolinfos/core/Service.java index 450e09d..4372545 100644 --- a/src/fr/devinsy/statoolinfos/core/Service.java +++ b/src/fr/devinsy/statoolinfos/core/Service.java @@ -33,6 +33,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import fr.devinsy.statoolinfos.checker.PropertyChecks; +import fr.devinsy.statoolinfos.crawl.CrawlJournal; import fr.devinsy.statoolinfos.metrics.Metric; import fr.devinsy.statoolinfos.properties.PathProperties; import fr.devinsy.statoolinfos.properties.PathProperty; @@ -104,6 +105,7 @@ public class Service extends PathPropertyList private URL inputURL; private String logoFileName; private PropertyChecks inputChecks; + private CrawlJournal crawlJournal; /** * Instantiates a new service. @@ -123,6 +125,7 @@ public class Service extends PathPropertyList { super(properties); this.inputChecks = new PropertyChecks(); + this.crawlJournal = new CrawlJournal(); } /** @@ -223,6 +226,11 @@ public class Service extends PathPropertyList return result; } + public CrawlJournal getCrawlJournal() + { + return this.crawlJournal; + } + /** * Gets the description. * diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlCache.java b/src/fr/devinsy/statoolinfos/crawl/CrawlCache.java index 641dc98..ede5177 100644 --- a/src/fr/devinsy/statoolinfos/crawl/CrawlCache.java +++ b/src/fr/devinsy/statoolinfos/crawl/CrawlCache.java @@ -20,7 +20,9 @@ package fr.devinsy.statoolinfos.crawl; import java.io.File; import java.io.IOException; +import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.Files; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.FileUtils; @@ -189,6 +191,22 @@ public class CrawlCache } } + /** + * @return + * @throws IOException + */ + public CrawlJournal restoreJournal() throws IOException + { + CrawlJournal result; + + File journalFile = restoreFile(getJournalURL()); + + result = CrawlJournalFile.load(journalFile); + + // + return result; + } + /** * Restore logo to. * @@ -292,6 +310,24 @@ public class CrawlCache return result; } + /** + * Store journal. + */ + public void storeJournal(final CrawlJournal journal) + { + try + { + File file = Files.createTempFile("tmp-", ".statoolsinfos").toFile(); + CrawlJournalFile.save(file, journal); + store(getJournalURL(), file); + file.delete(); + } + catch (IOException exception) + { + exception.printStackTrace(); + } + } + /** * Store. * @@ -356,4 +392,20 @@ public class CrawlCache // return result; } + + /** + * Gets the journal URL. + * + * @return the journal URL + * @throws MalformedURLException + */ + public static URL getJournalURL() throws MalformedURLException + { + URL result; + + result = new URL("http://localhost/crawl.journal"); + + // + return result; + } } diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java b/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java index bb0d25b..cfceae0 100644 --- a/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java +++ b/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java @@ -34,6 +34,7 @@ import java.time.LocalDateTime; import java.time.ZoneOffset; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -187,14 +188,33 @@ public class CrawlJournalFile } else { - String[] tokens = line.split(" ", 2); + String[] tokens = line.split(" ", 3); CrawlStatus status = CrawlStatus.valueOf(tokens[0].toUpperCase()); + URL parentURL; + try + { + if (StringUtils.equals(tokens[1], "null")) + { + parentURL = null; + } + else + { + parentURL = new URL(tokens[1].trim()); + } + } + catch (MalformedURLException exception) + { + logger.error("Error valuing [{}]", line); + exception.printStackTrace(); + parentURL = null; + } + URL url; try { - url = new URL(tokens[1].trim()); + url = new URL(tokens[2].trim()); } catch (MalformedURLException exception) { @@ -203,7 +223,7 @@ public class CrawlJournalFile url = null; } - result = new CrawlLog(url, status); + result = new CrawlLog(url, parentURL, status); } // @@ -226,7 +246,7 @@ public class CrawlJournalFile { for (CrawlLog log : journal) { - String line = log.getStatus() + " " + log.getUrl(); + String line = String.format("%s %s %s", log.getStatus(), log.getParentUrl(), log.getUrl()); out.write(line); out.write("\n"); } diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlLog.java b/src/fr/devinsy/statoolinfos/crawl/CrawlLog.java index fba147f..9395a21 100644 --- a/src/fr/devinsy/statoolinfos/crawl/CrawlLog.java +++ b/src/fr/devinsy/statoolinfos/crawl/CrawlLog.java @@ -26,6 +26,7 @@ import java.net.URL; public class CrawlLog { private URL url; + private URL parentUrl; private CrawlStatus status; /** @@ -36,12 +37,40 @@ public class CrawlLog * @param status * the status */ - public CrawlLog(final URL url, final CrawlStatus status) + public CrawlLog(final URL url, final URL parentUrl, final CrawlStatus status) { this.url = url; + this.parentUrl = parentUrl; this.status = status; } + public URL getParentUrl() + { + return this.parentUrl; + } + + /** + * Gets the parent url value. + * + * @return the parent url value + */ + public String getParentUrlValue() + { + String result; + + if (this.parentUrl == null) + { + result = null; + } + else + { + result = this.parentUrl.toString(); + } + + // + return result; + } + public CrawlStatus getStatus() { return this.status; diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java b/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java index f80f898..4e11f6d 100644 --- a/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java +++ b/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java @@ -24,6 +24,8 @@ import java.util.Collections; import org.apache.commons.lang3.StringUtils; +import fr.devinsy.statoolinfos.util.URLUtils; + /** * The Class CrawlLogs. */ @@ -39,6 +41,27 @@ public class CrawlLogs extends ArrayList super(); } + /* (non-Javadoc) + * @see java.util.ArrayList#add(java.lang.Object) + */ + @Override + public boolean add(final CrawlLog log) + { + boolean result; + + if (log == null) + { + result = false; + } + else + { + result = super.add(log); + } + + // + return result; + } + /** * Adds the. * @@ -47,9 +70,9 @@ public class CrawlLogs extends ArrayList * @param status * the status */ - public void add(final URL url, final CrawlStatus status) + public void add(final URL url, final URL parentUrl, final CrawlStatus status) { - this.add(new CrawlLog(url, status)); + this.add(new CrawlLog(url, parentUrl, status)); } /** @@ -139,4 +162,29 @@ public class CrawlLogs extends ArrayList // return result; } + + /** + * Gets the by parent. + * + * @param parentURL + * the parent URL + * @return the by parent + */ + public CrawlLogs searchByParent(final URL parentURL) + { + CrawlLogs result; + + result = new CrawlLogs(); + + for (CrawlLog log : this) + { + if (URLUtils.equals(log.getParentUrl(), parentURL)) + { + result.add(log); + } + } + + // + return result; + } } diff --git a/src/fr/devinsy/statoolinfos/crawl/Crawler.java b/src/fr/devinsy/statoolinfos/crawl/Crawler.java index d8384d2..729915e 100644 --- a/src/fr/devinsy/statoolinfos/crawl/Crawler.java +++ b/src/fr/devinsy/statoolinfos/crawl/Crawler.java @@ -21,7 +21,6 @@ package fr.devinsy.statoolinfos.crawl; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -89,7 +88,7 @@ public class Crawler */ public void crawl(final URL url) throws StatoolInfosException, IOException { - crawl(url, null); + crawl(url, null, null); } /** @@ -104,7 +103,7 @@ public class Crawler * @throws IOException * Signals that an I/O exception has occurred. */ - public void crawl(final URL url, final PropertyClassType parent) + public void crawl(final URL url, final URL parentURL, final PropertyClassType parent) { logger.info("Crawling {}", url); @@ -118,21 +117,21 @@ public class Crawler catch (java.net.ConnectException exception) { logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage()); - this.journal.add(url, CrawlStatus.CONNECTERROR); + this.journal.add(url, parentURL, CrawlStatus.CONNECTERROR); downloadFile = null; exception.printStackTrace(); } catch (FileNotFoundException exception) { logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage()); - this.journal.add(url, CrawlStatus.URLNOTFOUND); + this.journal.add(url, parentURL, CrawlStatus.URLNOTFOUND); downloadFile = null; exception.printStackTrace(); } catch (IOException exception) { logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage()); - this.journal.add(url, CrawlStatus.DOWNLOADERROR); + this.journal.add(url, parentURL, CrawlStatus.DOWNLOADERROR); downloadFile = null; exception.printStackTrace(); } @@ -142,12 +141,12 @@ public class Crawler if (!downloadFile.exists()) { logger.error("ERROR: download missing."); - this.journal.add(url, CrawlStatus.MISSING); + this.journal.add(url, parentURL, CrawlStatus.MISSING); } else if (downloadFile.length() == 0) { logger.error("ERROR: download empty."); - this.journal.add(url, CrawlStatus.EMPTY); + this.journal.add(url, parentURL, CrawlStatus.EMPTY); } else { @@ -157,7 +156,7 @@ public class Crawler if ((downloadClass == null) || (!downloadClass.isChildOf(parent))) { logger.error("ERROR: bad child class [{}][{}].", downloadClass, parent); - this.journal.add(url, CrawlStatus.BADCHILDCLASS); + this.journal.add(url, parentURL, CrawlStatus.BADCHILDCLASS); } else { @@ -176,7 +175,7 @@ public class Crawler String downloadSha = StatoolInfosUtils.sha1sum(downloadFile); if (StringUtils.equals(downloadSha, storedSha)) { - this.journal.add(url, CrawlStatus.SUCCESS); + this.journal.add(url, parentURL, CrawlStatus.SUCCESS); } else { @@ -199,13 +198,13 @@ public class Crawler downloadFile.delete(); // - this.journal.add(url, CrawlStatus.UPDATED); + this.journal.add(url, parentURL, CrawlStatus.UPDATED); } // Cache another resources. - crawlLogo(downloadProperties.getURL("federation.logo")); - crawlLogo(downloadProperties.getURL("organization.logo")); - crawlLogo(downloadProperties.getURL("service.logo")); + crawlLogo(downloadProperties.getURL("federation.logo"), url); + crawlLogo(downloadProperties.getURL("organization.logo"), url); + crawlLogo(downloadProperties.getURL("service.logo"), url); // Do subs. PathProperties subs = downloadProperties.getByPrefix("subs"); @@ -216,12 +215,12 @@ public class Crawler try { URL subUrl = new URL(property.getValue()); - crawl(subUrl, downloadClass); + crawl(subUrl, url, downloadClass); } catch (java.net.MalformedURLException exception) { logger.error("ERROR: subcrawl failed for [{}][{}][{}]: {}", url.toString(), property.getPath(), property.getValue(), exception.getMessage()); - this.journal.add(url, CrawlStatus.BADURLFORMAT); + this.journal.add(url, parentURL, CrawlStatus.BADURLFORMAT); exception.printStackTrace(); } } @@ -232,7 +231,7 @@ public class Crawler } catch (IOException exception) { - this.journal.add(url, CrawlStatus.IOERROR); + this.journal.add(url, parentURL, CrawlStatus.IOERROR); } } @@ -243,7 +242,7 @@ public class Crawler * the url * @return the file */ - public File crawlLogo(final URL url) + public File crawlLogo(final URL url, final URL parentURL) { File result; @@ -265,19 +264,19 @@ public class Crawler catch (java.net.ConnectException exception) { logger.error("ERROR: crawl failed (1) for [{}]: {}", url.toString(), exception.getMessage()); - this.journal.add(url, CrawlStatus.CONNECTERROR); + this.journal.add(url, parentURL, CrawlStatus.CONNECTERROR); logoFile = null; } catch (FileNotFoundException exception) { logger.error("ERROR: crawl failed (2) for [{}]: {}", url.toString(), exception.getMessage()); - this.journal.add(url, CrawlStatus.URLNOTFOUND); + this.journal.add(url, parentURL, CrawlStatus.URLNOTFOUND); logoFile = null; } catch (IOException exception) { logger.error("ERROR: crawl failed (3) for [{}]: {}", url.toString(), exception.getMessage()); - this.journal.add(url, CrawlStatus.DOWNLOADERROR); + this.journal.add(url, parentURL, CrawlStatus.DOWNLOADERROR); logoFile = null; } @@ -288,7 +287,7 @@ public class Crawler else { result = this.cache.store(url, logoFile); - this.journal.add(url, CrawlStatus.SUCCESS); + this.journal.add(url, parentURL, CrawlStatus.SUCCESS); logoFile.delete(); } } @@ -344,9 +343,7 @@ public class Crawler logger.info("Restoring crawl journal."); - File journalFile = this.cache.restoreFile(getJournalURL()); - - result = CrawlJournalFile.load(journalFile); + result = this.cache.restoreJournal(); // return result; @@ -357,34 +354,7 @@ public class Crawler */ public void storeJournal() { - try - { - logger.info("Storing crawl journal."); - File file = Files.createTempFile("tmp-", ".statoolsinfos").toFile(); - - CrawlJournalFile.save(file, this.journal); - this.cache.store(getJournalURL(), file); - file.delete(); - } - catch (IOException exception) - { - exception.printStackTrace(); - } - } - - /** - * Gets the journal URL. - * - * @return the journal URL - * @throws MalformedURLException - */ - public static URL getJournalURL() throws MalformedURLException - { - URL result; - - result = new URL("http://localhost/crawl.journal"); - - // - return result; + logger.info("Storing crawl journal."); + this.cache.storeJournal(this.journal); } } diff --git a/src/fr/devinsy/statoolinfos/htmlize/CrawlJournalPage.java b/src/fr/devinsy/statoolinfos/htmlize/CrawlJournalPage.java index 3b649e7..acf6af1 100644 --- a/src/fr/devinsy/statoolinfos/htmlize/CrawlJournalPage.java +++ b/src/fr/devinsy/statoolinfos/htmlize/CrawlJournalPage.java @@ -21,13 +21,17 @@ package fr.devinsy.statoolinfos.htmlize; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.time.format.DateTimeFormatter; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import fr.devinsy.statoolinfos.HtmlizerContext; import fr.devinsy.statoolinfos.core.Federation; +import fr.devinsy.statoolinfos.core.Organization; +import fr.devinsy.statoolinfos.core.Service; import fr.devinsy.statoolinfos.core.StatoolInfosException; import fr.devinsy.statoolinfos.crawl.CrawlCache; import fr.devinsy.statoolinfos.crawl.CrawlJournal; @@ -56,9 +60,20 @@ public class CrawlJournalPage File htmlizeDirectory = HtmlizerContext.instance().getHtmlizeDirectory(); logger.info("Htmlize Crawl Journal pages."); - CrawlJournal journal = HtmlizerContext.instance().getCrawlJournal(); - String page = htmlize("Journal des téléchargements", journal); + String page = htmlize("Journal des téléchargements", federation.getCrawlJournal()); FileUtils.write(new File(htmlizeDirectory, federation.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8); + + for (Organization organization : federation.getOrganizations()) + { + page = htmlize("Journal des téléchargements de " + organization.getName(), organization.getCrawlJournal()); + FileUtils.write(new File(htmlizeDirectory, organization.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8); + } + + for (Service service : federation.getAllServices()) + { + page = htmlize("Journal des téléchargements de " + service.getName(), service.getCrawlJournal()); + FileUtils.write(new File(htmlizeDirectory, service.getOrganization().getTechnicalName() + "-" + service.getTechnicalName() + "-crawl.xhtml"), page, StandardCharsets.UTF_8); + } } /** @@ -83,7 +98,7 @@ public class CrawlJournalPage TagDataManager data = new TagDataManager(); data.setEscapedContent("title", title); - data.setContent("date", journal.getDatetime().toString()); + data.setContent("date", journal.getDatetime().format(DateTimeFormatter.ofPattern("dd/MM/YYYY HH:mm"))); data.setContent("totalCount", journal.size()); data.setContent("errorCount", journal.getErrors().size()); @@ -92,6 +107,8 @@ public class CrawlJournalPage { data.setEscapedContent("crawlLogLine", index, "crawlLogLineUrlLink", log.getUrl().toString()); data.setEscapedAttribute("crawlLogLine", index, "crawlLogLineUrlLink", "href", log.getUrl().toString()); + data.setEscapedContent("crawlLogLine", index, "crawlLogLineParentUrlLink", StringUtils.abbreviate(log.getParentUrlValue(), 35)); + data.setEscapedAttribute("crawlLogLine", index, "crawlLogLineParentUrlLink", "href", StringUtils.defaultString(log.getParentUrlValue(), "#")); data.setContent("crawlLogLine", index, "crawlLogLineStatus", log.getStatus().toString()); if (log.getStatus().isError()) diff --git a/src/fr/devinsy/statoolinfos/htmlize/FederationPage.java b/src/fr/devinsy/statoolinfos/htmlize/FederationPage.java index 6540f5d..590ab59 100644 --- a/src/fr/devinsy/statoolinfos/htmlize/FederationPage.java +++ b/src/fr/devinsy/statoolinfos/htmlize/FederationPage.java @@ -85,8 +85,9 @@ public class FederationPage * @return the string * @throws StatoolInfosException * the statool infos exception + * @throws IOException */ - public static String htmlize(final Federation federation) throws StatoolInfosException + public static String htmlize(final Federation federation) throws StatoolInfosException, IOException { String result; @@ -110,8 +111,7 @@ public class FederationPage data.setAttribute("statsLink", "href", federation.getTechnicalName() + "-stats.xhtml"); data.setAttribute("crawlLink", "href", federation.getTechnicalName() + "-crawl.xhtml"); - - if (HtmlizerContext.instance().getCrawlJournal().getErrors().isEmpty()) + if (federation.getCrawlJournal().getErrors().isEmpty()) { data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg"); } diff --git a/src/fr/devinsy/statoolinfos/htmlize/OrganizationPage.java b/src/fr/devinsy/statoolinfos/htmlize/OrganizationPage.java index 4ba9db4..2a1508b 100644 --- a/src/fr/devinsy/statoolinfos/htmlize/OrganizationPage.java +++ b/src/fr/devinsy/statoolinfos/htmlize/OrganizationPage.java @@ -152,6 +152,16 @@ public class OrganizationPage data.setAttribute("statsLink", "href", organization.getTechnicalName() + "-stats.xhtml"); + data.setAttribute("crawlLink", "href", organization.getTechnicalName() + "-crawl.xhtml"); + if (organization.getCrawlJournal().getErrors().isEmpty()) + { + data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg"); + } + else + { + data.setAttribute("crawlLinkImg", "src", "circle-icons/download.svg"); + } + { PropertyChecks checks = organization.getInputChecksAll(); @@ -239,4 +249,5 @@ public class OrganizationPage FileUtils.copyFile(logoFile, target); } } + } diff --git a/src/fr/devinsy/statoolinfos/htmlize/ServicePage.java b/src/fr/devinsy/statoolinfos/htmlize/ServicePage.java index 929019a..df46d50 100644 --- a/src/fr/devinsy/statoolinfos/htmlize/ServicePage.java +++ b/src/fr/devinsy/statoolinfos/htmlize/ServicePage.java @@ -221,6 +221,16 @@ public class ServicePage data.getIdData("softwareSourceLinkImg").getAttribute("class").setMode(DisplayMode.REPLACE); } + data.setAttribute("crawlLink", "href", service.getOrganization().getTechnicalName() + "-" + service.getTechnicalName() + "-crawl.xhtml"); + if (service.getCrawlJournal().getErrors().isEmpty()) + { + data.setAttribute("crawlLinkImg", "src", "circle-icons/download-mono.svg"); + } + else + { + data.setAttribute("crawlLinkImg", "src", "circle-icons/download.svg"); + } + { PropertyChecks checks = service.getInputChecks(); data.setContent("errorCount", checks.getErrorCount()); diff --git a/src/fr/devinsy/statoolinfos/htmlize/crawlJournal.xhtml b/src/fr/devinsy/statoolinfos/htmlize/crawlJournal.xhtml index d7dd066..103691b 100644 --- a/src/fr/devinsy/statoolinfos/htmlize/crawlJournal.xhtml +++ b/src/fr/devinsy/statoolinfos/htmlize/crawlJournal.xhtml @@ -20,16 +20,18 @@
Date : n/a

-
+