Added inputURL in local crawl journal.
This commit is contained in:
parent
68906ed88a
commit
e262bf2b7b
4 changed files with 54 additions and 15 deletions
|
@ -152,8 +152,10 @@ public class Factory
|
||||||
/**
|
/**
|
||||||
* Load federation.
|
* Load federation.
|
||||||
*
|
*
|
||||||
* @param federationFile
|
* @param inputURL
|
||||||
* the federation file
|
* the input URL
|
||||||
|
* @param cache
|
||||||
|
* the cache
|
||||||
* @return the federation
|
* @return the federation
|
||||||
* @throws StatoolInfosException
|
* @throws StatoolInfosException
|
||||||
* the statool infos exception
|
* the statool infos exception
|
||||||
|
@ -285,6 +287,7 @@ public class Factory
|
||||||
|
|
||||||
//
|
//
|
||||||
CrawlJournal journal = cache.restoreJournal();
|
CrawlJournal journal = cache.restoreJournal();
|
||||||
|
result.getCrawlJournal().add(journal.getByUrl(inputURL));
|
||||||
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
|
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
|
||||||
for (Service service : result.getServices())
|
for (Service service : result.getServices())
|
||||||
{
|
{
|
||||||
|
@ -329,6 +332,7 @@ public class Factory
|
||||||
|
|
||||||
//
|
//
|
||||||
CrawlJournal journal = cache.restoreJournal();
|
CrawlJournal journal = cache.restoreJournal();
|
||||||
|
result.getCrawlJournal().add(journal.getByUrl(inputURL));
|
||||||
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
|
result.getCrawlJournal().addAll(journal.searchByParent(result.getInputURL()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -235,8 +235,8 @@ public class CrawlJournalFile
|
||||||
*
|
*
|
||||||
* @param out
|
* @param out
|
||||||
* the out
|
* the out
|
||||||
* @param source
|
* @param journal
|
||||||
* the source
|
* the journal
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* Signals that an I/O exception has occurred.
|
* Signals that an I/O exception has occurred.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -21,6 +21,7 @@ package fr.devinsy.statoolinfos.crawl;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
@ -76,11 +77,11 @@ public class CrawlLogs extends ArrayList<CrawlLog>
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find by software.
|
* Find by url.
|
||||||
*
|
*
|
||||||
* @param softwareName
|
* @param url
|
||||||
* the software name
|
* the url
|
||||||
* @return the category
|
* @return the crawl logs
|
||||||
*/
|
*/
|
||||||
public CrawlLogs findByUrl(final URL url)
|
public CrawlLogs findByUrl(final URL url)
|
||||||
{
|
{
|
||||||
|
@ -100,6 +101,42 @@ public class CrawlLogs extends ArrayList<CrawlLog>
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the by url.
|
||||||
|
*
|
||||||
|
* @param url
|
||||||
|
* the url
|
||||||
|
* @return the by url
|
||||||
|
*/
|
||||||
|
public CrawlLog getByUrl(final URL url)
|
||||||
|
{
|
||||||
|
CrawlLog result;
|
||||||
|
|
||||||
|
boolean ended = false;
|
||||||
|
Iterator<CrawlLog> iterator = iterator();
|
||||||
|
result = null;
|
||||||
|
while (!ended)
|
||||||
|
{
|
||||||
|
if (iterator.hasNext())
|
||||||
|
{
|
||||||
|
CrawlLog log = iterator.next();
|
||||||
|
|
||||||
|
if (URLUtils.equals(log.getUrl(), url))
|
||||||
|
{
|
||||||
|
ended = true;
|
||||||
|
result = log;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ended = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the errors.
|
* Gets the errors.
|
||||||
*
|
*
|
||||||
|
|
|
@ -95,13 +95,11 @@ public class Crawler
|
||||||
* Crawl.
|
* Crawl.
|
||||||
*
|
*
|
||||||
* @param url
|
* @param url
|
||||||
* the input
|
* the url
|
||||||
* @param cache
|
* @param parentURL
|
||||||
* the cache
|
* the parent URL
|
||||||
* @throws StatoolInfosException
|
* @param parent
|
||||||
* the statool infos exception
|
* the parent
|
||||||
* @throws IOException
|
|
||||||
* Signals that an I/O exception has occurred.
|
|
||||||
*/
|
*/
|
||||||
public void crawl(final URL url, final URL parentURL, final PropertyClassType parent)
|
public void crawl(final URL url, final URL parentURL, final PropertyClassType parent)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue