Improved logs and sub error management.

This commit is contained in:
Christian P. MOMON 2021-01-07 09:18:09 +01:00
parent 35bddb9f26
commit a653627030
3 changed files with 60 additions and 42 deletions

View file

@ -173,11 +173,7 @@ public class Factory
{
URL inputURL = new URL(property.getValue());
Organization organization = loadOrganization(inputURL, cache);
if (organization == null)
{
logger.error("Loading organization failed for [{}]", property.getValue());
}
else
if (organization != null)
{
result.getOrganizations().add(organization);
}
@ -243,6 +239,7 @@ public class Factory
if (inputFile == null)
{
result = null;
logger.warn("WARNING: organization not found in cache [{}]", inputURL);
}
else
{

View file

@ -487,7 +487,7 @@ public class CrawlCache
}
catch (IOException exception)
{
logger.info("Store faile for {}: {}", url, exception.getMessage());
logger.info("Store failed for {}: {}", url, exception.getMessage());
result = null;
}

View file

@ -110,8 +110,24 @@ public class Crawler
PathProperties subs = input.getByPrefix("subs");
for (PathProperty property : subs)
{
URL url = new URL(property.getValue());
crawl(url, cache);
if (StringUtils.isNotBlank(property.getValue()))
{
try
{
URL subUrl = new URL(property.getValue());
crawl(subUrl, cache);
}
catch (java.net.MalformedURLException exception)
{
logger.error("ERROR: subcrawl failed for [{}][{}]: {}", property.getPath(), property.getValue(), exception.getMessage());
exception.printStackTrace();
}
catch (java.net.ConnectException | FileNotFoundException exception)
{
logger.error("ERROR: subcrawl failed for [{}][{}]: {}", property.getPath(), property.getValue(), exception.getMessage());
exception.printStackTrace();
}
}
}
}
@ -148,47 +164,52 @@ public class Crawler
*/
public static void crawl(final URL url, final CrawlCache cache) throws StatoolInfosException, IOException
{
try
logger.info("Crawling " + url);
// Crawl.
File file = cache.store(url);
// Build crawl data.
PathProperties crawlSection = new PathPropertyList();
crawlSection.put("crawl.crawler", "StatoolInfos");
crawlSection.put("crawl.datetime", LocalDateTime.now().toString());
crawlSection.put("crawl.url", url.toString());
crawlSection.put("crawl.file.size", FileUtils.sizeOf(file));
crawlSection.put("crawl.file.datetime", StatoolInfosUtils.urlLastModified(url).toString());
crawlSection.put("crawl.file.sha1", DigestUtils.sha1Hex(FileUtils.readFileToByteArray(file)));
// Add crawl data in crawled file.
String lines = crawlSection.toStringListFormatted().toStringSeparatedBy('\n');
FileUtils.write(file, FileUtils.readFileToString(file, StandardCharsets.UTF_8) + "\n" + lines, StandardCharsets.UTF_8);
// Crawl another resources.
PathProperties properties = PathPropertyUtils.load(file);
cache.storeQuietly(properties.getURL("organization.logo"));
cache.storeQuietly(properties.getURL("service.logo"));
// Crawl subs.
PathProperties subs = properties.getByPrefix("subs");
for (PathProperty property : subs)
{
logger.info("Crawling " + url);
// Crawl.
File file = cache.store(url);
// Build crawl data.
PathProperties crawlSection = new PathPropertyList();
crawlSection.put("crawl.crawler", "StatoolInfos");
crawlSection.put("crawl.datetime", LocalDateTime.now().toString());
crawlSection.put("crawl.url", url.toString());
crawlSection.put("crawl.file.size", FileUtils.sizeOf(file));
crawlSection.put("crawl.file.datetime", StatoolInfosUtils.urlLastModified(url).toString());
crawlSection.put("crawl.file.sha1", DigestUtils.sha1Hex(FileUtils.readFileToByteArray(file)));
// Add crawl data in crawled file.
String lines = crawlSection.toStringListFormatted().toStringSeparatedBy('\n');
FileUtils.write(file, FileUtils.readFileToString(file, StandardCharsets.UTF_8) + "\n" + lines, StandardCharsets.UTF_8);
// Crawl another resources.
PathProperties properties = PathPropertyUtils.load(file);
cache.storeQuietly(properties.getURL("organization.logo"));
cache.storeQuietly(properties.getURL("service.logo"));
// Crawl subs.
PathProperties subs = properties.getByPrefix("subs");
for (PathProperty property : subs)
if (StringUtils.isNotBlank(property.getValue()))
{
if (StringUtils.isNotBlank(property.getValue()))
try
{
URL subUrl = new URL(property.getValue());
crawl(subUrl, cache);
}
catch (java.net.MalformedURLException exception)
{
logger.error("ERROR: subcrawl failed for [{}][{}][{}]: {}", url.toString(), property.getPath(), property.getValue(), exception.getMessage());
exception.printStackTrace();
}
catch (java.net.ConnectException | FileNotFoundException exception)
{
logger.error("ERROR: subcrawl failed for [{}][{}][{}]: {}", url.toString(), property.getPath(), property.getValue(), exception.getMessage());
exception.printStackTrace();
}
}
}
catch (java.net.ConnectException | FileNotFoundException exception)
{
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
exception.printStackTrace();
}
}
}