Added inputURL in local crawl journal.

This commit is contained in:
Christian P. MOMON 2021-05-19 15:22:37 +02:00
parent 68906ed88a
commit e262bf2b7b
4 changed files with 54 additions and 15 deletions

View file

@ -152,8 +152,10 @@ public class Factory
/** /**
* Load federation. * Load federation.
* *
* @param federationFile * @param inputURL
* the federation file * the input URL
* @param cache
* the cache
* @return the federation * @return the federation
* @throws StatoolInfosException * @throws StatoolInfosException
* the statool infos exception * the statool infos exception
@ -285,6 +287,7 @@ public class Factory
// //
CrawlJournal journal = cache.restoreJournal(); CrawlJournal journal = cache.restoreJournal();
result.getCrawlJournal().add(journal.getByUrl(inputURL));
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL())); result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
for (Service service : result.getServices()) for (Service service : result.getServices())
{ {
@ -329,6 +332,7 @@ public class Factory
// //
CrawlJournal journal = cache.restoreJournal(); CrawlJournal journal = cache.restoreJournal();
result.getCrawlJournal().add(journal.getByUrl(inputURL));
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL())); result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
} }

View file

@ -235,8 +235,8 @@ public class CrawlJournalFile
* *
* @param out * @param out
* the out * the out
* @param source * @param journal
* the source * the journal
* @throws IOException * @throws IOException
* Signals that an I/O exception has occurred. * Signals that an I/O exception has occurred.
*/ */

View file

@ -21,6 +21,7 @@ package fr.devinsy.statoolinfos.crawl;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Iterator;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -76,11 +77,11 @@ public class CrawlLogs extends ArrayList<CrawlLog>
} }
/** /**
* Find by software. * Find by url.
* *
* @param softwareName * @param url
* the software name * the url
* @return the category * @return the crawl logs
*/ */
public CrawlLogs findByUrl(final URL url) public CrawlLogs findByUrl(final URL url)
{ {
@ -100,6 +101,42 @@ public class CrawlLogs extends ArrayList<CrawlLog>
return result; return result;
} }
/**
* Gets the by url.
*
* @param url
* the url
* @return the by url
*/
public CrawlLog getByUrl(final URL url)
{
CrawlLog result;
boolean ended = false;
Iterator<CrawlLog> iterator = iterator();
result = null;
while (!ended)
{
if (iterator.hasNext())
{
CrawlLog log = iterator.next();
if (URLUtils.equals(log.getUrl(), url))
{
ended = true;
result = log;
}
}
else
{
ended = true;
}
}
//
return result;
}
/** /**
* Gets the errors. * Gets the errors.
* *

View file

@ -95,13 +95,11 @@ public class Crawler
* Crawl. * Crawl.
* *
* @param url * @param url
* the input * the url
* @param cache * @param parentURL
* the cache * the parent URL
* @throws StatoolInfosException * @param parent
* the statool infos exception * the parent
* @throws IOException
* Signals that an I/O exception has occurred.
*/ */
public void crawl(final URL url, final URL parentURL, final PropertyClassType parent) public void crawl(final URL url, final URL parentURL, final PropertyClassType parent)
{ {