Added URL connexion error management.

This commit is contained in:
Christian P. MOMON 2020-11-23 05:34:12 +01:00
parent 2166819c24
commit 0a5fba2ea5
3 changed files with 84 additions and 53 deletions

View file

@ -173,7 +173,14 @@ public class Factory
{ {
URL inputURL = new URL(property.getValue()); URL inputURL = new URL(property.getValue());
Organization organization = loadOrganization(inputURL, cache); Organization organization = loadOrganization(inputURL, cache);
result.getOrganizations().add(organization); if (organization == null)
{
logger.error("Loading organization failed for [{}]", property.getValue());
}
else
{
result.getOrganizations().add(organization);
}
} }
} }
@ -233,22 +240,29 @@ public class Factory
File inputFile = cache.restoreFile(inputURL); File inputFile = cache.restoreFile(inputURL);
PathProperties properties = PathPropertyUtils.load(inputFile); if (inputFile == null)
result = new Organization(properties);
result.setInputFile(inputFile);
result.setInputURL(inputURL);
result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
PathProperties subs = result.getByPrefix("subs");
for (PathProperty property : subs)
{ {
if (StringUtils.startsWith(property.getValue(), "http")) result = null;
}
else
{
PathProperties properties = PathPropertyUtils.load(inputFile);
result = new Organization(properties);
result.setInputFile(inputFile);
result.setInputURL(inputURL);
result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png"));
PathProperties subs = result.getByPrefix("subs");
for (PathProperty property : subs)
{ {
URL serviceInputURL = new URL(property.getValue()); if (StringUtils.startsWith(property.getValue(), "http"))
Service service = loadService(serviceInputURL, cache); {
service.setOrganization(result); URL serviceInputURL = new URL(property.getValue());
service.setLogoFileName(result.getTechnicalName() + "-" + service.getLogoFileName()); Service service = loadService(serviceInputURL, cache);
result.getServices().add(service); service.setOrganization(result);
service.setLogoFileName(result.getTechnicalName() + "-" + service.getLogoFileName());
result.getServices().add(service);
}
} }
} }

View file

@ -19,6 +19,7 @@
package fr.devinsy.statoolinfos.crawl; package fr.devinsy.statoolinfos.crawl;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
@ -147,39 +148,48 @@ public class Crawler
*/ */
public static void crawl(final URL url, final CrawlCache cache) throws StatoolInfosException, IOException public static void crawl(final URL url, final CrawlCache cache) throws StatoolInfosException, IOException
{ {
logger.info("Crawling " + url); try
// Crawl.
File file = cache.store(url);
// Build crawl data.
PathProperties crawlSection = new PathPropertyList();
crawlSection.put("crawl.crawler", "StatoolInfos");
crawlSection.put("crawl.datetime", LocalDateTime.now().toString());
crawlSection.put("crawl.url", url.toString());
crawlSection.put("crawl.file.size", FileUtils.sizeOf(file));
crawlSection.put("crawl.file.datetime", StatoolInfosUtils.urlLastModified(url).toString());
crawlSection.put("crawl.file.sha1", DigestUtils.sha1Hex(FileUtils.readFileToByteArray(file)));
// Add crawl data in crawled file.
String lines = crawlSection.toStringListFormatted().toStringSeparatedBy('\n');
FileUtils.write(file, FileUtils.readFileToString(file, StandardCharsets.UTF_8) + "\n" + lines, StandardCharsets.UTF_8);
// Crawl another resources.
PathProperties properties = PathPropertyUtils.load(file);
cache.storeQuietly(properties.getURL("organization.logo"));
cache.storeQuietly(properties.getURL("service.logo"));
// Crawl subs.
PathProperties subs = properties.getByPrefix("subs");
for (PathProperty property : subs)
{ {
if (StringUtils.isNotBlank(property.getValue()))
logger.info("Crawling " + url);
// Crawl.
File file = cache.store(url);
// Build crawl data.
PathProperties crawlSection = new PathPropertyList();
crawlSection.put("crawl.crawler", "StatoolInfos");
crawlSection.put("crawl.datetime", LocalDateTime.now().toString());
crawlSection.put("crawl.url", url.toString());
crawlSection.put("crawl.file.size", FileUtils.sizeOf(file));
crawlSection.put("crawl.file.datetime", StatoolInfosUtils.urlLastModified(url).toString());
crawlSection.put("crawl.file.sha1", DigestUtils.sha1Hex(FileUtils.readFileToByteArray(file)));
// Add crawl data in crawled file.
String lines = crawlSection.toStringListFormatted().toStringSeparatedBy('\n');
FileUtils.write(file, FileUtils.readFileToString(file, StandardCharsets.UTF_8) + "\n" + lines, StandardCharsets.UTF_8);
// Crawl another resources.
PathProperties properties = PathPropertyUtils.load(file);
cache.storeQuietly(properties.getURL("organization.logo"));
cache.storeQuietly(properties.getURL("service.logo"));
// Crawl subs.
PathProperties subs = properties.getByPrefix("subs");
for (PathProperty property : subs)
{ {
URL subUrl = new URL(property.getValue()); if (StringUtils.isNotBlank(property.getValue()))
crawl(subUrl, cache); {
URL subUrl = new URL(property.getValue());
crawl(subUrl, cache);
}
} }
} }
catch (java.net.ConnectException | FileNotFoundException exception)
{
logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage());
exception.printStackTrace();
}
} }
} }

View file

@ -103,17 +103,24 @@ public class PathPropertyUtils
{ {
PathProperties result; PathProperties result;
result = new PathPropertyList(); if (file == null)
BufferedReader in = null;
try
{ {
in = new BufferedReader(new InputStreamReader(new FileInputStream(file), charsetName)); throw new IllegalArgumentException("File parameter is null.");
result = read(in);
} }
finally else
{ {
IOUtils.closeQuietly(in); result = new PathPropertyList();
BufferedReader in = null;
try
{
in = new BufferedReader(new InputStreamReader(new FileInputStream(file), charsetName));
result = read(in);
}
finally
{
IOUtils.closeQuietly(in);
}
} }
// //