From e262bf2b7bd8397e82e97134a072574f1b91fcdf Mon Sep 17 00:00:00 2001 From: "Christian P. MOMON" Date: Wed, 19 May 2021 15:22:37 +0200 Subject: [PATCH] Added inputURL in local crawl journal. --- src/fr/devinsy/statoolinfos/core/Factory.java | 8 +++- .../statoolinfos/crawl/CrawlJournalFile.java | 4 +- .../devinsy/statoolinfos/crawl/CrawlLogs.java | 45 +++++++++++++++++-- .../devinsy/statoolinfos/crawl/Crawler.java | 12 +++-- 4 files changed, 54 insertions(+), 15 deletions(-) diff --git a/src/fr/devinsy/statoolinfos/core/Factory.java b/src/fr/devinsy/statoolinfos/core/Factory.java index 7c67110..238d83b 100644 --- a/src/fr/devinsy/statoolinfos/core/Factory.java +++ b/src/fr/devinsy/statoolinfos/core/Factory.java @@ -152,8 +152,10 @@ public class Factory /** * Load federation. * - * @param federationFile - * the federation file + * @param inputURL + * the input URL + * @param cache + * the cache * @return the federation * @throws StatoolInfosException * the statool infos exception @@ -285,6 +287,7 @@ public class Factory // CrawlJournal journal = cache.restoreJournal(); + result.getCrawlJournal().add(journal.getByUrl(inputURL)); result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL())); for (Service service : result.getServices()) { @@ -329,6 +332,7 @@ public class Factory // CrawlJournal journal = cache.restoreJournal(); + result.getCrawlJournal().add(journal.getByUrl(inputURL)); result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL())); } diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java b/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java index cfceae0..4839954 100644 --- a/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java +++ b/src/fr/devinsy/statoolinfos/crawl/CrawlJournalFile.java @@ -235,8 +235,8 @@ public class CrawlJournalFile * * @param out * the out - * @param source - * the source + * @param journal + * the journal * @throws IOException * Signals that an I/O exception has occurred. */ diff --git a/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java b/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java index 4e11f6d..da4c9ac 100644 --- a/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java +++ b/src/fr/devinsy/statoolinfos/crawl/CrawlLogs.java @@ -21,6 +21,7 @@ package fr.devinsy.statoolinfos.crawl; import java.net.URL; import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import org.apache.commons.lang3.StringUtils; @@ -76,11 +77,11 @@ public class CrawlLogs extends ArrayList } /** - * Find by software. + * Find by url. * - * @param softwareName - * the software name - * @return the category + * @param url + * the url + * @return the crawl logs */ public CrawlLogs findByUrl(final URL url) { @@ -100,6 +101,42 @@ public class CrawlLogs extends ArrayList return result; } + /** + * Gets the by url. + * + * @param url + * the url + * @return the by url + */ + public CrawlLog getByUrl(final URL url) + { + CrawlLog result; + + boolean ended = false; + Iterator iterator = iterator(); + result = null; + while (!ended) + { + if (iterator.hasNext()) + { + CrawlLog log = iterator.next(); + + if (URLUtils.equals(log.getUrl(), url)) + { + ended = true; + result = log; + } + } + else + { + ended = true; + } + } + + // + return result; + } + /** * Gets the errors. * diff --git a/src/fr/devinsy/statoolinfos/crawl/Crawler.java b/src/fr/devinsy/statoolinfos/crawl/Crawler.java index 729915e..4259797 100644 --- a/src/fr/devinsy/statoolinfos/crawl/Crawler.java +++ b/src/fr/devinsy/statoolinfos/crawl/Crawler.java @@ -95,13 +95,11 @@ public class Crawler * Crawl. * * @param url - * the input - * @param cache - * the cache - * @throws StatoolInfosException - * the statool infos exception - * @throws IOException - * Signals that an I/O exception has occurred. + * the url + * @param parentURL + * the parent URL + * @param parent + * the parent */ public void crawl(final URL url, final URL parentURL, final PropertyClassType parent) {