From 0a5fba2ea5a5a08076941cf16a5e0c7223e7867d Mon Sep 17 00:00:00 2001 From: "Christian P. MOMON" Date: Mon, 23 Nov 2020 05:34:12 +0100 Subject: [PATCH] Added URL connexion error management. --- src/fr/devinsy/statoolinfos/core/Factory.java | 44 ++++++++---- .../devinsy/statoolinfos/crawl/Crawler.java | 70 +++++++++++-------- .../properties/PathPropertyUtils.java | 23 +++--- 3 files changed, 84 insertions(+), 53 deletions(-) diff --git a/src/fr/devinsy/statoolinfos/core/Factory.java b/src/fr/devinsy/statoolinfos/core/Factory.java index e8e149f..06873b8 100644 --- a/src/fr/devinsy/statoolinfos/core/Factory.java +++ b/src/fr/devinsy/statoolinfos/core/Factory.java @@ -173,7 +173,14 @@ public class Factory { URL inputURL = new URL(property.getValue()); Organization organization = loadOrganization(inputURL, cache); - result.getOrganizations().add(organization); + if (organization == null) + { + logger.error("Loading organization failed for [{}]", property.getValue()); + } + else + { + result.getOrganizations().add(organization); + } } } @@ -233,22 +240,29 @@ public class Factory File inputFile = cache.restoreFile(inputURL); - PathProperties properties = PathPropertyUtils.load(inputFile); - result = new Organization(properties); - result.setInputFile(inputFile); - result.setInputURL(inputURL); - result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png")); - - PathProperties subs = result.getByPrefix("subs"); - for (PathProperty property : subs) + if (inputFile == null) { - if (StringUtils.startsWith(property.getValue(), "http")) + result = null; + } + else + { + PathProperties properties = PathPropertyUtils.load(inputFile); + result = new Organization(properties); + result.setInputFile(inputFile); + result.setInputURL(inputURL); + result.setLogoFileName(result.getTechnicalName() + "-logo" + StringUtils.defaultIfBlank(cache.getExtension(result.getLogoURL()), ".png")); + + PathProperties subs = result.getByPrefix("subs"); + for (PathProperty property : subs) { - URL serviceInputURL = new URL(property.getValue()); - Service service = loadService(serviceInputURL, cache); - service.setOrganization(result); - service.setLogoFileName(result.getTechnicalName() + "-" + service.getLogoFileName()); - result.getServices().add(service); + if (StringUtils.startsWith(property.getValue(), "http")) + { + URL serviceInputURL = new URL(property.getValue()); + Service service = loadService(serviceInputURL, cache); + service.setOrganization(result); + service.setLogoFileName(result.getTechnicalName() + "-" + service.getLogoFileName()); + result.getServices().add(service); + } } } diff --git a/src/fr/devinsy/statoolinfos/crawl/Crawler.java b/src/fr/devinsy/statoolinfos/crawl/Crawler.java index ae9a3ed..e819421 100644 --- a/src/fr/devinsy/statoolinfos/crawl/Crawler.java +++ b/src/fr/devinsy/statoolinfos/crawl/Crawler.java @@ -19,6 +19,7 @@ package fr.devinsy.statoolinfos.crawl; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.net.URL; import java.nio.charset.StandardCharsets; @@ -147,39 +148,48 @@ public class Crawler */ public static void crawl(final URL url, final CrawlCache cache) throws StatoolInfosException, IOException { - logger.info("Crawling " + url); - - // Crawl. - File file = cache.store(url); - - // Build crawl data. - PathProperties crawlSection = new PathPropertyList(); - crawlSection.put("crawl.crawler", "StatoolInfos"); - crawlSection.put("crawl.datetime", LocalDateTime.now().toString()); - crawlSection.put("crawl.url", url.toString()); - crawlSection.put("crawl.file.size", FileUtils.sizeOf(file)); - crawlSection.put("crawl.file.datetime", StatoolInfosUtils.urlLastModified(url).toString()); - crawlSection.put("crawl.file.sha1", DigestUtils.sha1Hex(FileUtils.readFileToByteArray(file))); - - // Add crawl data in crawled file. - String lines = crawlSection.toStringListFormatted().toStringSeparatedBy('\n'); - FileUtils.write(file, FileUtils.readFileToString(file, StandardCharsets.UTF_8) + "\n" + lines, StandardCharsets.UTF_8); - - // Crawl another resources. - PathProperties properties = PathPropertyUtils.load(file); - - cache.storeQuietly(properties.getURL("organization.logo")); - cache.storeQuietly(properties.getURL("service.logo")); - - // Crawl subs. - PathProperties subs = properties.getByPrefix("subs"); - for (PathProperty property : subs) + try { - if (StringUtils.isNotBlank(property.getValue())) + + logger.info("Crawling " + url); + + // Crawl. + File file = cache.store(url); + + // Build crawl data. + PathProperties crawlSection = new PathPropertyList(); + crawlSection.put("crawl.crawler", "StatoolInfos"); + crawlSection.put("crawl.datetime", LocalDateTime.now().toString()); + crawlSection.put("crawl.url", url.toString()); + crawlSection.put("crawl.file.size", FileUtils.sizeOf(file)); + crawlSection.put("crawl.file.datetime", StatoolInfosUtils.urlLastModified(url).toString()); + crawlSection.put("crawl.file.sha1", DigestUtils.sha1Hex(FileUtils.readFileToByteArray(file))); + + // Add crawl data in crawled file. + String lines = crawlSection.toStringListFormatted().toStringSeparatedBy('\n'); + FileUtils.write(file, FileUtils.readFileToString(file, StandardCharsets.UTF_8) + "\n" + lines, StandardCharsets.UTF_8); + + // Crawl another resources. + PathProperties properties = PathPropertyUtils.load(file); + + cache.storeQuietly(properties.getURL("organization.logo")); + cache.storeQuietly(properties.getURL("service.logo")); + + // Crawl subs. + PathProperties subs = properties.getByPrefix("subs"); + for (PathProperty property : subs) { - URL subUrl = new URL(property.getValue()); - crawl(subUrl, cache); + if (StringUtils.isNotBlank(property.getValue())) + { + URL subUrl = new URL(property.getValue()); + crawl(subUrl, cache); + } } } + catch (java.net.ConnectException | FileNotFoundException exception) + { + logger.error("ERROR: crawl failed for [{}]: {}", url.toString(), exception.getMessage()); + exception.printStackTrace(); + } } } diff --git a/src/fr/devinsy/statoolinfos/properties/PathPropertyUtils.java b/src/fr/devinsy/statoolinfos/properties/PathPropertyUtils.java index 6f1e59b..a2dbbd1 100644 --- a/src/fr/devinsy/statoolinfos/properties/PathPropertyUtils.java +++ b/src/fr/devinsy/statoolinfos/properties/PathPropertyUtils.java @@ -103,17 +103,24 @@ public class PathPropertyUtils { PathProperties result; - result = new PathPropertyList(); - - BufferedReader in = null; - try + if (file == null) { - in = new BufferedReader(new InputStreamReader(new FileInputStream(file), charsetName)); - result = read(in); + throw new IllegalArgumentException("File parameter is null."); } - finally + else { - IOUtils.closeQuietly(in); + result = new PathPropertyList(); + + BufferedReader in = null; + try + { + in = new BufferedReader(new InputStreamReader(new FileInputStream(file), charsetName)); + result = read(in); + } + finally + { + IOUtils.closeQuietly(in); + } } //