Refactored spliting Builder and Crawler.
This commit is contained in:
parent
a8a7b2e8f1
commit
efc212317d
7 changed files with 436 additions and 246 deletions
145
src/fr/devinsy/statoolinfos/build/Builder.java
Normal file
145
src/fr/devinsy/statoolinfos/build/Builder.java
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Christian Pierre MOMON <christian@momon.org>
|
||||||
|
*
|
||||||
|
* This file is part of StatoolInfos, simple service statistics tool.
|
||||||
|
*
|
||||||
|
* StatoolInfos is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* StatoolInfos is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with StatoolInfos. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package fr.devinsy.statoolinfos.build;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import fr.devinsy.statoolinfos.core.Configuration;
|
||||||
|
import fr.devinsy.statoolinfos.core.Factory;
|
||||||
|
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
||||||
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
|
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
||||||
|
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Class StatoolInfos.
|
||||||
|
*/
|
||||||
|
public class Builder
|
||||||
|
{
|
||||||
|
private static Logger logger = LoggerFactory.getLogger(Builder.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds the.
|
||||||
|
*
|
||||||
|
* @param configurationFile
|
||||||
|
* the input
|
||||||
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
|
*/
|
||||||
|
public static void build(final File configurationFile) throws StatoolInfosException, IOException
|
||||||
|
{
|
||||||
|
logger.info("Build {}", configurationFile.getAbsolutePath());
|
||||||
|
|
||||||
|
Configuration configuration = Factory.loadConfiguration(configurationFile);
|
||||||
|
logger.info("Build input setting: {}", configuration.getBuildInput());
|
||||||
|
logger.info("Build directory setting: {}", configuration.getBuildDirectoryPath());
|
||||||
|
|
||||||
|
File inputFile = configuration.getBuildInput();
|
||||||
|
File buildDirectory = configuration.getBuildDirectory();
|
||||||
|
if (inputFile == null)
|
||||||
|
{
|
||||||
|
throw new StatoolInfosException("Input is undefined.");
|
||||||
|
}
|
||||||
|
else if (!inputFile.exists())
|
||||||
|
{
|
||||||
|
throw new StatoolInfosException("Input does not exist.");
|
||||||
|
}
|
||||||
|
else if (!inputFile.isFile())
|
||||||
|
{
|
||||||
|
throw new StatoolInfosException("Input is not a file.");
|
||||||
|
}
|
||||||
|
else if (buildDirectory == null)
|
||||||
|
{
|
||||||
|
throw new StatoolInfosException("Build directory is undefined.");
|
||||||
|
}
|
||||||
|
else if (!buildDirectory.exists())
|
||||||
|
{
|
||||||
|
throw new StatoolInfosException("Build directory does not exist.");
|
||||||
|
}
|
||||||
|
else if (!buildDirectory.isDirectory())
|
||||||
|
{
|
||||||
|
throw new StatoolInfosException("Build directory is not a directory.");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Build file section.
|
||||||
|
PathProperties target = new PathPropertyList();
|
||||||
|
target.put("file.class", configuration.get("conf.class"));
|
||||||
|
target.put("file.generator", "StatoolInfos");
|
||||||
|
target.put("file.datetime", LocalDateTime.now().toString());
|
||||||
|
target.put("file.protocol", configuration.get("conf.protocol"));
|
||||||
|
|
||||||
|
// Load input properties.
|
||||||
|
PathProperties input = PathPropertyUtils.load(inputFile);
|
||||||
|
|
||||||
|
// Add input properties with file section ones.
|
||||||
|
target.add(input);
|
||||||
|
|
||||||
|
// Save the build properties.
|
||||||
|
File targetFile = new File(buildDirectory, configurationFile.getName());
|
||||||
|
PathPropertyUtils.save(targetFile, target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear.
|
||||||
|
*
|
||||||
|
* @param configurationFile
|
||||||
|
* the input
|
||||||
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
|
*/
|
||||||
|
public static void clear(final Configuration configuration) throws StatoolInfosException, IOException
|
||||||
|
{
|
||||||
|
logger.info("Build directory setting: {}", configuration.getBuildDirectoryPath());
|
||||||
|
|
||||||
|
String path = configuration.getBuildDirectoryPath();
|
||||||
|
if (StringUtils.isBlank(path))
|
||||||
|
{
|
||||||
|
logger.warn("Undefined build directory.");
|
||||||
|
}
|
||||||
|
else if (!new File(path).exists())
|
||||||
|
{
|
||||||
|
logger.warn("Build directory does not exist: {}.", path);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
File buildDirectory = configuration.getBuildDirectory();
|
||||||
|
|
||||||
|
for (File file : buildDirectory.listFiles())
|
||||||
|
{
|
||||||
|
if ((file.isFile()) && (StringUtils.endsWithAny(file.getName(), ".properties")))
|
||||||
|
{
|
||||||
|
logger.info("Deleting " + file.getName());
|
||||||
|
file.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,6 +22,7 @@ import java.io.File;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperties;
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperties;
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperty;
|
import fr.devinsy.statoolinfos.properties.PathProperty;
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
||||||
|
@ -103,12 +104,13 @@ public class Factory
|
||||||
/**
|
/**
|
||||||
* Load organization.
|
* Load organization.
|
||||||
*
|
*
|
||||||
* @param properties
|
* @param organizationFile
|
||||||
* the properties
|
* the organization file
|
||||||
* @param cache
|
* @param cache
|
||||||
* the cache
|
* the cache
|
||||||
* @return the organization
|
* @return the organization
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
*/
|
*/
|
||||||
public static Organization loadOrganization(final File organizationFile, final CrawlCache cache) throws IOException
|
public static Organization loadOrganization(final File organizationFile, final CrawlCache cache) throws IOException
|
||||||
{
|
{
|
||||||
|
|
|
@ -20,18 +20,13 @@ package fr.devinsy.statoolinfos.core;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import fr.devinsy.statoolinfos.build.Builder;
|
||||||
|
import fr.devinsy.statoolinfos.crawl.Crawler;
|
||||||
import fr.devinsy.statoolinfos.htmlize.Htmlizer;
|
import fr.devinsy.statoolinfos.htmlize.Htmlizer;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperties;
|
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperty;
|
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Class StatoolInfos.
|
* The Class StatoolInfos.
|
||||||
|
@ -52,57 +47,7 @@ public class StatoolInfos
|
||||||
*/
|
*/
|
||||||
public static void build(final File configurationFile) throws StatoolInfosException, IOException
|
public static void build(final File configurationFile) throws StatoolInfosException, IOException
|
||||||
{
|
{
|
||||||
logger.info("Build {}", configurationFile.getAbsolutePath());
|
Builder.build(configurationFile);
|
||||||
|
|
||||||
Configuration configuration = Factory.loadConfiguration(configurationFile);
|
|
||||||
logger.info("Build input setting: {}", configuration.getBuildInput());
|
|
||||||
logger.info("Build directory setting: {}", configuration.getBuildDirectoryPath());
|
|
||||||
|
|
||||||
File inputFile = configuration.getBuildInput();
|
|
||||||
File buildDirectory = configuration.getBuildDirectory();
|
|
||||||
if (inputFile == null)
|
|
||||||
{
|
|
||||||
throw new StatoolInfosException("Input is undefined.");
|
|
||||||
}
|
|
||||||
else if (!inputFile.exists())
|
|
||||||
{
|
|
||||||
throw new StatoolInfosException("Input does not exist.");
|
|
||||||
}
|
|
||||||
else if (!inputFile.isFile())
|
|
||||||
{
|
|
||||||
throw new StatoolInfosException("Input is not a file.");
|
|
||||||
}
|
|
||||||
else if (buildDirectory == null)
|
|
||||||
{
|
|
||||||
throw new StatoolInfosException("Build directory is undefined.");
|
|
||||||
}
|
|
||||||
else if (!buildDirectory.exists())
|
|
||||||
{
|
|
||||||
throw new StatoolInfosException("Build directory does not exist.");
|
|
||||||
}
|
|
||||||
else if (!buildDirectory.isDirectory())
|
|
||||||
{
|
|
||||||
throw new StatoolInfosException("Build directory is not a directory.");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Build file section.
|
|
||||||
PathProperties target = new PathPropertyList();
|
|
||||||
target.put("file.class", configuration.get("conf.class"));
|
|
||||||
target.put("file.generator", "StatoolInfos");
|
|
||||||
target.put("file.datetime", LocalDateTime.now().toString());
|
|
||||||
target.put("file.protocol", configuration.get("conf.protocol"));
|
|
||||||
|
|
||||||
// Load input properties.
|
|
||||||
PathProperties input = PathPropertyUtils.load(inputFile);
|
|
||||||
|
|
||||||
// Add input properties with file section ones.
|
|
||||||
target.add(input);
|
|
||||||
|
|
||||||
// Save the build properties.
|
|
||||||
File targetFile = new File(buildDirectory, configurationFile.getName());
|
|
||||||
PathPropertyUtils.save(targetFile, target);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -117,80 +62,12 @@ public class StatoolInfos
|
||||||
*/
|
*/
|
||||||
public static void clear(final File configurationFile) throws StatoolInfosException, IOException
|
public static void clear(final File configurationFile) throws StatoolInfosException, IOException
|
||||||
{
|
{
|
||||||
|
logger.info("Clear {}", configurationFile.getAbsolutePath());
|
||||||
Configuration configuration = Factory.loadConfiguration(configurationFile);
|
Configuration configuration = Factory.loadConfiguration(configurationFile);
|
||||||
|
|
||||||
{
|
Builder.clear(configuration);
|
||||||
logger.info("Build directory setting: {}", configuration.getBuildDirectoryPath());
|
Crawler.clear(configuration);
|
||||||
|
Htmlizer.clear(configuration);
|
||||||
String path = configuration.getBuildDirectoryPath();
|
|
||||||
if (StringUtils.isBlank(path))
|
|
||||||
{
|
|
||||||
logger.warn("Undefined build directory.");
|
|
||||||
}
|
|
||||||
else if (!new File(path).exists())
|
|
||||||
{
|
|
||||||
logger.warn("Build directory does not exist: {}.", path);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
File buildDirectory = configuration.getBuildDirectory();
|
|
||||||
|
|
||||||
for (File file : buildDirectory.listFiles())
|
|
||||||
{
|
|
||||||
if ((file.isFile()) && (StringUtils.endsWithAny(file.getName(), ".properties")))
|
|
||||||
{
|
|
||||||
logger.info("Deleting " + file.getName());
|
|
||||||
file.delete();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
logger.info("Cache setting: {}", configuration.getCrawlCachePath());
|
|
||||||
|
|
||||||
String path = configuration.getCrawlCachePath();
|
|
||||||
if (StringUtils.isBlank(path))
|
|
||||||
{
|
|
||||||
logger.warn("Undefined crawl cache.");
|
|
||||||
}
|
|
||||||
else if (!new File(path).exists())
|
|
||||||
{
|
|
||||||
logger.warn("Crawl cache does not exist: {}.", path);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
CrawlCache cache = configuration.getCrawlCache();
|
|
||||||
cache.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
logger.info("Htmlize directory setting: {}", configuration.getHtmlizeDirectoryPath());
|
|
||||||
|
|
||||||
String htmlDirectoryPath = configuration.getHtmlizeDirectoryPath();
|
|
||||||
if (StringUtils.isBlank(htmlDirectoryPath))
|
|
||||||
{
|
|
||||||
logger.warn("Undefined htmlize directory.");
|
|
||||||
}
|
|
||||||
else if (!new File(htmlDirectoryPath).exists())
|
|
||||||
{
|
|
||||||
logger.warn("Htmlize directory does not exist: {}.", htmlDirectoryPath);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
File htmlizeDirectory = configuration.getHtmlizeDirectory();
|
|
||||||
|
|
||||||
for (File file : htmlizeDirectory.listFiles())
|
|
||||||
{
|
|
||||||
if ((file.isFile()) && (StringUtils.endsWithAny(file.getName(), ".properties", ".js", ".html", ".ico", ".css", ".jpg", ".xhtml")))
|
|
||||||
{
|
|
||||||
logger.info("Deleting " + file.getName());
|
|
||||||
file.delete();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -205,62 +82,7 @@ public class StatoolInfos
|
||||||
*/
|
*/
|
||||||
public static void crawl(final File configurationFile) throws StatoolInfosException, IOException
|
public static void crawl(final File configurationFile) throws StatoolInfosException, IOException
|
||||||
{
|
{
|
||||||
Configuration configuration = Factory.loadConfiguration(configurationFile);
|
Crawler.crawl(configurationFile);
|
||||||
|
|
||||||
logger.info("Crawl input setting: {}", configuration.getCrawlInputPath());
|
|
||||||
logger.info("Crawl cache setting: {}", configuration.getCrawlCachePath());
|
|
||||||
|
|
||||||
CrawlCache cache = configuration.getCrawlCache();
|
|
||||||
|
|
||||||
PathProperties input = PathPropertyUtils.load(configuration.getCrawlInput());
|
|
||||||
|
|
||||||
cache.storeQuietly(input.getURL("federation.logo"));
|
|
||||||
cache.storeQuietly(input.getURL("organization.logo"));
|
|
||||||
|
|
||||||
PathProperties subs = input.getByPrefix("subs");
|
|
||||||
for (PathProperty property : subs)
|
|
||||||
{
|
|
||||||
URL url = new URL(property.getValue());
|
|
||||||
crawl(url, cache);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Crawl.
|
|
||||||
*
|
|
||||||
* @param url
|
|
||||||
* the input
|
|
||||||
* @param cache
|
|
||||||
* the cache
|
|
||||||
* @throws StatoolInfosException
|
|
||||||
* the statool infos exception
|
|
||||||
* @throws IOException
|
|
||||||
* Signals that an I/O exception has occurred.
|
|
||||||
*/
|
|
||||||
public static void crawl(final URL url, final CrawlCache cache) throws StatoolInfosException, IOException
|
|
||||||
{
|
|
||||||
logger.info("Crawling " + url);
|
|
||||||
|
|
||||||
File file = cache.store(url);
|
|
||||||
PathProperties properties = PathPropertyUtils.load(file);
|
|
||||||
|
|
||||||
PathProperties crawlSection = new PathPropertyList();
|
|
||||||
crawlSection.put("crawl.crawler", "StatoolInfos");
|
|
||||||
crawlSection.put("crawl.datetime", LocalDateTime.now().toString());
|
|
||||||
crawlSection.put("crawl.url", url.toString());
|
|
||||||
properties.add(crawlSection);
|
|
||||||
cache.storeProperties(url, properties);
|
|
||||||
|
|
||||||
cache.storeQuietly(properties.getURL("organization.logo"));
|
|
||||||
cache.storeQuietly(properties.getURL("service.logo"));
|
|
||||||
|
|
||||||
//
|
|
||||||
PathProperties subs = properties.getByPrefix("subs");
|
|
||||||
for (PathProperty property : subs)
|
|
||||||
{
|
|
||||||
URL subUrl = new URL(property.getValue());
|
|
||||||
crawl(subUrl, cache);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -275,52 +97,6 @@ public class StatoolInfos
|
||||||
*/
|
*/
|
||||||
public static void htmlize(final File configurationFile) throws StatoolInfosException, IOException
|
public static void htmlize(final File configurationFile) throws StatoolInfosException, IOException
|
||||||
{
|
{
|
||||||
Configuration configuration = Factory.loadConfiguration(configurationFile);
|
Htmlizer.htmlize(configurationFile);
|
||||||
|
|
||||||
logger.info("Cache setting: {}", configuration.getCrawlCachePath());
|
|
||||||
logger.info("Htmlize input setting: {}", configuration.getHtmlizeInputPath());
|
|
||||||
logger.info("Htmlize directory setting: {}", configuration.getHtmlizeDirectoryPath());
|
|
||||||
|
|
||||||
File htmlizeInput = configuration.getHtmlizeInput();
|
|
||||||
File htmlizeDirectory = configuration.getHtmlizeDirectory();
|
|
||||||
if (htmlizeInput == null)
|
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize input undefined.");
|
|
||||||
}
|
|
||||||
else if (!htmlizeInput.exists())
|
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize input is missing.");
|
|
||||||
}
|
|
||||||
else if (htmlizeInput.isDirectory())
|
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize input is a directory.");
|
|
||||||
}
|
|
||||||
else if (htmlizeDirectory == null)
|
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize directory undefined.");
|
|
||||||
}
|
|
||||||
else if (!htmlizeDirectory.exists())
|
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize directory is missing.");
|
|
||||||
}
|
|
||||||
else if (!htmlizeDirectory.isDirectory())
|
|
||||||
{
|
|
||||||
throw new IllegalArgumentException("Htmlize directory is not a directory.");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (configuration.isFederation())
|
|
||||||
{
|
|
||||||
Htmlizer.htmlizeFederation(configuration);
|
|
||||||
}
|
|
||||||
else if (configuration.isOrganization())
|
|
||||||
{
|
|
||||||
Htmlizer.htmlizeOrganisation(configuration);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
logger.warn("No htmlize for this input: {}.", configuration.getClassName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
* You should have received a copy of the GNU Affero General Public License
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
* along with StatoolInfos. If not, see <http://www.gnu.org/licenses/>.
|
* along with StatoolInfos. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
package fr.devinsy.statoolinfos.core;
|
package fr.devinsy.statoolinfos.crawl;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -29,6 +29,8 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import fr.devinsy.catgenerator.core.CatGenerator;
|
import fr.devinsy.catgenerator.core.CatGenerator;
|
||||||
|
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
||||||
|
import fr.devinsy.statoolinfos.core.StatoolInfosUtils;
|
||||||
import fr.devinsy.statoolinfos.properties.PathProperties;
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
||||||
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
151
src/fr/devinsy/statoolinfos/crawl/Crawler.java
Normal file
151
src/fr/devinsy/statoolinfos/crawl/Crawler.java
Normal file
|
@ -0,0 +1,151 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Christian Pierre MOMON <christian@momon.org>
|
||||||
|
*
|
||||||
|
* This file is part of StatoolInfos, simple service statistics tool.
|
||||||
|
*
|
||||||
|
* StatoolInfos is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* StatoolInfos is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with StatoolInfos. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package fr.devinsy.statoolinfos.crawl;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import fr.devinsy.statoolinfos.core.Configuration;
|
||||||
|
import fr.devinsy.statoolinfos.core.Factory;
|
||||||
|
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
||||||
|
import fr.devinsy.statoolinfos.properties.PathProperties;
|
||||||
|
import fr.devinsy.statoolinfos.properties.PathProperty;
|
||||||
|
import fr.devinsy.statoolinfos.properties.PathPropertyList;
|
||||||
|
import fr.devinsy.statoolinfos.properties.PathPropertyUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Class StatoolInfos.
|
||||||
|
*/
|
||||||
|
public class Crawler
|
||||||
|
{
|
||||||
|
private static Logger logger = LoggerFactory.getLogger(Crawler.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear.
|
||||||
|
*
|
||||||
|
* @param configurationFile
|
||||||
|
* the input
|
||||||
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
|
*/
|
||||||
|
public static void clear(final Configuration configuration) throws StatoolInfosException, IOException
|
||||||
|
{
|
||||||
|
logger.info("Cache setting: {}", configuration.getCrawlCachePath());
|
||||||
|
|
||||||
|
String path = configuration.getCrawlCachePath();
|
||||||
|
if (StringUtils.isBlank(path))
|
||||||
|
{
|
||||||
|
logger.warn("Undefined crawl cache.");
|
||||||
|
}
|
||||||
|
else if (!new File(path).exists())
|
||||||
|
{
|
||||||
|
logger.warn("Crawl cache does not exist: {}.", path);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CrawlCache cache = configuration.getCrawlCache();
|
||||||
|
cache.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void crawl(final Configuration configuration) throws StatoolInfosException, IOException
|
||||||
|
{
|
||||||
|
logger.info("Crawl input setting: {}", configuration.getCrawlInputPath());
|
||||||
|
logger.info("Crawl cache setting: {}", configuration.getCrawlCachePath());
|
||||||
|
|
||||||
|
CrawlCache cache = configuration.getCrawlCache();
|
||||||
|
|
||||||
|
PathProperties input = PathPropertyUtils.load(configuration.getCrawlInput());
|
||||||
|
|
||||||
|
cache.storeQuietly(input.getURL("federation.logo"));
|
||||||
|
cache.storeQuietly(input.getURL("organization.logo"));
|
||||||
|
|
||||||
|
PathProperties subs = input.getByPrefix("subs");
|
||||||
|
for (PathProperty property : subs)
|
||||||
|
{
|
||||||
|
URL url = new URL(property.getValue());
|
||||||
|
crawl(url, cache);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Crawl.
|
||||||
|
*
|
||||||
|
* @param configurationFile
|
||||||
|
* the input
|
||||||
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
|
*/
|
||||||
|
public static void crawl(final File configurationFile) throws StatoolInfosException, IOException
|
||||||
|
{
|
||||||
|
logger.info("Crawl {}", configurationFile.getAbsolutePath());
|
||||||
|
|
||||||
|
Configuration configuration = Factory.loadConfiguration(configurationFile);
|
||||||
|
|
||||||
|
crawl(configuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Crawl.
|
||||||
|
*
|
||||||
|
* @param url
|
||||||
|
* the input
|
||||||
|
* @param cache
|
||||||
|
* the cache
|
||||||
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
|
*/
|
||||||
|
public static void crawl(final URL url, final CrawlCache cache) throws StatoolInfosException, IOException
|
||||||
|
{
|
||||||
|
logger.info("Crawling " + url);
|
||||||
|
|
||||||
|
File file = cache.store(url);
|
||||||
|
PathProperties properties = PathPropertyUtils.load(file);
|
||||||
|
|
||||||
|
PathProperties crawlSection = new PathPropertyList();
|
||||||
|
crawlSection.put("crawl.crawler", "StatoolInfos");
|
||||||
|
crawlSection.put("crawl.datetime", LocalDateTime.now().toString());
|
||||||
|
crawlSection.put("crawl.url", url.toString());
|
||||||
|
properties.add(crawlSection);
|
||||||
|
cache.storeProperties(url, properties);
|
||||||
|
|
||||||
|
cache.storeQuietly(properties.getURL("organization.logo"));
|
||||||
|
cache.storeQuietly(properties.getURL("service.logo"));
|
||||||
|
|
||||||
|
//
|
||||||
|
PathProperties subs = properties.getByPrefix("subs");
|
||||||
|
for (PathProperty property : subs)
|
||||||
|
{
|
||||||
|
URL subUrl = new URL(property.getValue());
|
||||||
|
crawl(subUrl, cache);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,17 +23,18 @@ import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import fr.devinsy.statoolinfos.core.Configuration;
|
import fr.devinsy.statoolinfos.core.Configuration;
|
||||||
import fr.devinsy.statoolinfos.core.CrawlCache;
|
|
||||||
import fr.devinsy.statoolinfos.core.Factory;
|
import fr.devinsy.statoolinfos.core.Factory;
|
||||||
import fr.devinsy.statoolinfos.core.Federation;
|
import fr.devinsy.statoolinfos.core.Federation;
|
||||||
import fr.devinsy.statoolinfos.core.Organization;
|
import fr.devinsy.statoolinfos.core.Organization;
|
||||||
import fr.devinsy.statoolinfos.core.Service;
|
import fr.devinsy.statoolinfos.core.Service;
|
||||||
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
||||||
import fr.devinsy.statoolinfos.core.StatoolInfosUtils;
|
import fr.devinsy.statoolinfos.core.StatoolInfosUtils;
|
||||||
|
import fr.devinsy.statoolinfos.crawl.CrawlCache;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Class Htmlizer.
|
* The Class Htmlizer.
|
||||||
|
@ -49,6 +50,40 @@ public class Htmlizer
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear.
|
||||||
|
*
|
||||||
|
* @param configuration
|
||||||
|
* the configuration
|
||||||
|
*/
|
||||||
|
public static void clear(final Configuration configuration)
|
||||||
|
{
|
||||||
|
logger.info("Htmlize directory setting: {}", configuration.getHtmlizeDirectoryPath());
|
||||||
|
|
||||||
|
String htmlDirectoryPath = configuration.getHtmlizeDirectoryPath();
|
||||||
|
if (StringUtils.isBlank(htmlDirectoryPath))
|
||||||
|
{
|
||||||
|
logger.warn("Undefined htmlize directory.");
|
||||||
|
}
|
||||||
|
else if (!new File(htmlDirectoryPath).exists())
|
||||||
|
{
|
||||||
|
logger.warn("Htmlize directory does not exist: {}.", htmlDirectoryPath);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
File htmlizeDirectory = configuration.getHtmlizeDirectory();
|
||||||
|
|
||||||
|
for (File file : htmlizeDirectory.listFiles())
|
||||||
|
{
|
||||||
|
if ((file.isFile()) && (StringUtils.endsWithAny(file.getName(), ".properties", ".js", ".html", ".ico", ".css", ".jpg", ".xhtml")))
|
||||||
|
{
|
||||||
|
logger.info("Deleting " + file.getName());
|
||||||
|
file.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy stuff.
|
* Copy stuff.
|
||||||
*
|
*
|
||||||
|
@ -74,15 +109,93 @@ public class Htmlizer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Htmlize.
|
||||||
|
*
|
||||||
|
* @param configuration
|
||||||
|
* the configuration
|
||||||
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
|
*/
|
||||||
|
public static void htmlize(final Configuration configuration) throws IOException, StatoolInfosException
|
||||||
|
{
|
||||||
|
logger.info("Cache setting: {}", configuration.getCrawlCachePath());
|
||||||
|
logger.info("Htmlize input setting: {}", configuration.getHtmlizeInputPath());
|
||||||
|
logger.info("Htmlize directory setting: {}", configuration.getHtmlizeDirectoryPath());
|
||||||
|
|
||||||
|
File htmlizeInput = configuration.getHtmlizeInput();
|
||||||
|
File htmlizeDirectory = configuration.getHtmlizeDirectory();
|
||||||
|
if (htmlizeInput == null)
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Htmlize input undefined.");
|
||||||
|
}
|
||||||
|
else if (!htmlizeInput.exists())
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Htmlize input is missing.");
|
||||||
|
}
|
||||||
|
else if (htmlizeInput.isDirectory())
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Htmlize input is a directory.");
|
||||||
|
}
|
||||||
|
else if (htmlizeDirectory == null)
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Htmlize directory undefined.");
|
||||||
|
}
|
||||||
|
else if (!htmlizeDirectory.exists())
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Htmlize directory is missing.");
|
||||||
|
}
|
||||||
|
else if (!htmlizeDirectory.isDirectory())
|
||||||
|
{
|
||||||
|
throw new IllegalArgumentException("Htmlize directory is not a directory.");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (configuration.isFederation())
|
||||||
|
{
|
||||||
|
Htmlizer.htmlizeFederation(configuration);
|
||||||
|
}
|
||||||
|
else if (configuration.isOrganization())
|
||||||
|
{
|
||||||
|
Htmlizer.htmlizeOrganisation(configuration);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
logger.warn("No htmlize for this input: {}.", configuration.getClassName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Htmlize.
|
||||||
|
*
|
||||||
|
* @param configurationFile
|
||||||
|
* the configuration file
|
||||||
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
|
*/
|
||||||
|
public static void htmlize(final File configurationFile) throws StatoolInfosException, IOException
|
||||||
|
{
|
||||||
|
logger.info("Htmlize {}", configurationFile.getAbsolutePath());
|
||||||
|
|
||||||
|
Configuration configuration = Factory.loadConfiguration(configurationFile);
|
||||||
|
|
||||||
|
htmlize(configuration);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Htmlize federation.
|
* Htmlize federation.
|
||||||
*
|
*
|
||||||
* @param federation
|
* @param configuration
|
||||||
* the federation
|
* the configuration
|
||||||
* @param cache
|
|
||||||
* the cache
|
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
* @throws StatoolInfosException
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
*/
|
*/
|
||||||
public static void htmlizeFederation(final Configuration configuration) throws IOException, StatoolInfosException
|
public static void htmlizeFederation(final Configuration configuration) throws IOException, StatoolInfosException
|
||||||
{
|
{
|
||||||
|
@ -145,14 +258,14 @@ public class Htmlizer
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Htmlize organization.
|
* Htmlize organisation.
|
||||||
*
|
*
|
||||||
* @param federation
|
* @param configuration
|
||||||
* the federation
|
* the configuration
|
||||||
* @param targetDirectory
|
|
||||||
* the target directory
|
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
|
* Signals that an I/O exception has occurred.
|
||||||
* @throws StatoolInfosException
|
* @throws StatoolInfosException
|
||||||
|
* the statool infos exception
|
||||||
*/
|
*/
|
||||||
public static void htmlizeOrganisation(final Configuration configuration) throws IOException, StatoolInfosException
|
public static void htmlizeOrganisation(final Configuration configuration) throws IOException, StatoolInfosException
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue