Added inputURL in local crawl journal.

This commit is contained in:
Christian P. MOMON 2021-05-19 15:22:37 +02:00
parent 68906ed88a
commit e262bf2b7b
4 changed files with 54 additions and 15 deletions

View file

@ -152,8 +152,10 @@ public class Factory
/**
* Load federation.
*
* @param federationFile
* the federation file
* @param inputURL
* the input URL
* @param cache
* the cache
* @return the federation
* @throws StatoolInfosException
* the statool infos exception
@ -285,6 +287,7 @@ public class Factory
//
CrawlJournal journal = cache.restoreJournal();
result.getCrawlJournal().add(journal.getByUrl(inputURL));
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
for (Service service : result.getServices())
{
@ -329,6 +332,7 @@ public class Factory
//
CrawlJournal journal = cache.restoreJournal();
result.getCrawlJournal().add(journal.getByUrl(inputURL));
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
}

View file

@ -235,8 +235,8 @@ public class CrawlJournalFile
*
* @param out
* the out
* @param source
* the source
* @param journal
* the journal
* @throws IOException
* Signals that an I/O exception has occurred.
*/

View file

@ -21,6 +21,7 @@ package fr.devinsy.statoolinfos.crawl;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import org.apache.commons.lang3.StringUtils;
@ -76,11 +77,11 @@ public class CrawlLogs extends ArrayList<CrawlLog>
}
/**
* Find by software.
* Find by url.
*
* @param softwareName
* the software name
* @return the category
* @param url
* the url
* @return the crawl logs
*/
public CrawlLogs findByUrl(final URL url)
{
@ -100,6 +101,42 @@ public class CrawlLogs extends ArrayList<CrawlLog>
return result;
}
/**
* Gets the by url.
*
* @param url
* the url
* @return the by url
*/
public CrawlLog getByUrl(final URL url)
{
CrawlLog result;
boolean ended = false;
Iterator<CrawlLog> iterator = iterator();
result = null;
while (!ended)
{
if (iterator.hasNext())
{
CrawlLog log = iterator.next();
if (URLUtils.equals(log.getUrl(), url))
{
ended = true;
result = log;
}
}
else
{
ended = true;
}
}
//
return result;
}
/**
* Gets the errors.
*

View file

@ -95,13 +95,11 @@ public class Crawler
* Crawl.
*
* @param url
* the input
* @param cache
* the cache
* @throws StatoolInfosException
* the statool infos exception
* @throws IOException
* Signals that an I/O exception has occurred.
* the url
* @param parentURL
* the parent URL
* @param parent
* the parent
*/
public void crawl(final URL url, final URL parentURL, final PropertyClassType parent)
{