Added inputURL in local crawl journal.
This commit is contained in:
parent
68906ed88a
commit
e262bf2b7b
4 changed files with 54 additions and 15 deletions
|
@ -152,8 +152,10 @@ public class Factory
|
|||
/**
|
||||
* Load federation.
|
||||
*
|
||||
* @param federationFile
|
||||
* the federation file
|
||||
* @param inputURL
|
||||
* the input URL
|
||||
* @param cache
|
||||
* the cache
|
||||
* @return the federation
|
||||
* @throws StatoolInfosException
|
||||
* the statool infos exception
|
||||
|
@ -285,6 +287,7 @@ public class Factory
|
|||
|
||||
//
|
||||
CrawlJournal journal = cache.restoreJournal();
|
||||
result.getCrawlJournal().add(journal.getByUrl(inputURL));
|
||||
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
|
||||
for (Service service : result.getServices())
|
||||
{
|
||||
|
@ -329,6 +332,7 @@ public class Factory
|
|||
|
||||
//
|
||||
CrawlJournal journal = cache.restoreJournal();
|
||||
result.getCrawlJournal().add(journal.getByUrl(inputURL));
|
||||
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
|
||||
}
|
||||
|
||||
|
|
|
@ -235,8 +235,8 @@ public class CrawlJournalFile
|
|||
*
|
||||
* @param out
|
||||
* the out
|
||||
* @param source
|
||||
* the source
|
||||
* @param journal
|
||||
* the journal
|
||||
* @throws IOException
|
||||
* Signals that an I/O exception has occurred.
|
||||
*/
|
||||
|
|
|
@ -21,6 +21,7 @@ package fr.devinsy.statoolinfos.crawl;
|
|||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
|
@ -76,11 +77,11 @@ public class CrawlLogs extends ArrayList<CrawlLog>
|
|||
}
|
||||
|
||||
/**
|
||||
* Find by software.
|
||||
* Find by url.
|
||||
*
|
||||
* @param softwareName
|
||||
* the software name
|
||||
* @return the category
|
||||
* @param url
|
||||
* the url
|
||||
* @return the crawl logs
|
||||
*/
|
||||
public CrawlLogs findByUrl(final URL url)
|
||||
{
|
||||
|
@ -100,6 +101,42 @@ public class CrawlLogs extends ArrayList<CrawlLog>
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the by url.
|
||||
*
|
||||
* @param url
|
||||
* the url
|
||||
* @return the by url
|
||||
*/
|
||||
public CrawlLog getByUrl(final URL url)
|
||||
{
|
||||
CrawlLog result;
|
||||
|
||||
boolean ended = false;
|
||||
Iterator<CrawlLog> iterator = iterator();
|
||||
result = null;
|
||||
while (!ended)
|
||||
{
|
||||
if (iterator.hasNext())
|
||||
{
|
||||
CrawlLog log = iterator.next();
|
||||
|
||||
if (URLUtils.equals(log.getUrl(), url))
|
||||
{
|
||||
ended = true;
|
||||
result = log;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ended = true;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the errors.
|
||||
*
|
||||
|
|
|
@ -95,13 +95,11 @@ public class Crawler
|
|||
* Crawl.
|
||||
*
|
||||
* @param url
|
||||
* the input
|
||||
* @param cache
|
||||
* the cache
|
||||
* @throws StatoolInfosException
|
||||
* the statool infos exception
|
||||
* @throws IOException
|
||||
* Signals that an I/O exception has occurred.
|
||||
* the url
|
||||
* @param parentURL
|
||||
* the parent URL
|
||||
* @param parent
|
||||
* the parent
|
||||
*/
|
||||
public void crawl(final URL url, final URL parentURL, final PropertyClassType parent)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue