From 8a563ff19415bc105223f2dc91bf9628249acd09 Mon Sep 17 00:00:00 2001 From: "Christian P. MOMON" Date: Wed, 8 Dec 2021 00:39:48 +0100 Subject: [PATCH] Improved help message, custom pattern log support and Apache patterns. --- .../statoolinfos/cli/StatoolInfosCLI.java | 176 ++++++++---------- .../devinsy/statoolinfos/core/BotFilter.java | 67 +++++++ .../statoolinfos/core/Configuration.java | 15 ++ .../statoolinfos/core/StatoolInfos.java | 24 +-- .../devinsy/statoolinfos/metrics/Prober.java | 26 ++- .../metrics/http/HttpAccessLogAnalyzer.java | 88 ++++----- .../metrics/http/HttpErrorLogAnalyzer.java | 98 +++++----- .../metrics/http/HttpLogIterator.java | 2 +- .../devinsy/statoolinfos/util/FilesUtils.java | 67 +++++++ .../statoolinfos/util/LineIterator.java | 38 ++++ 10 files changed, 380 insertions(+), 221 deletions(-) create mode 100644 src/fr/devinsy/statoolinfos/core/BotFilter.java diff --git a/src/fr/devinsy/statoolinfos/cli/StatoolInfosCLI.java b/src/fr/devinsy/statoolinfos/cli/StatoolInfosCLI.java index 705cbbd..6e591ea 100644 --- a/src/fr/devinsy/statoolinfos/cli/StatoolInfosCLI.java +++ b/src/fr/devinsy/statoolinfos/cli/StatoolInfosCLI.java @@ -25,7 +25,7 @@ import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import fr.devinsy.statoolinfos.core.LogFilter; +import fr.devinsy.statoolinfos.core.BotFilter; import fr.devinsy.statoolinfos.core.StatoolInfos; import fr.devinsy.statoolinfos.util.BuildInformation; import fr.devinsy.statoolinfos.util.Chrono; @@ -104,20 +104,22 @@ public final class StatoolInfosCLI message.appendln("Usage:"); message.appendln(" statoolinfos [ -h | -help | --help ]"); message.appendln(" statoolinfos [ -v | -version | --version ]"); - message.appendln(" statoolinfos build build property files from conf and input"); - message.appendln(" statoolinfos clear remove property files from conf"); - message.appendln(" statoolinfos crawl crawl all file from conf and input"); - message.appendln(" statoolinfos format format in tiny way"); - message.appendln(" statoolinfos htmlize generate web pages from conf"); + message.appendln(); + message.appendln(" statoolinfos build build property files from conf and input"); + message.appendln(" statoolinfos clear remove property files from conf"); + message.appendln(" statoolinfos crawl crawl all file from conf and input"); + message.appendln(" statoolinfos htmlize generate web pages from conf"); + message.appendln(" statoolinfos probe generate metrics files from conf"); + message.appendln(" statoolinfos uptime update uptime journal"); + message.appendln(); + message.appendln(" statoolinfos format format property files in tiny way"); message.appendln(" statoolinfos list ip [-bot|-nobot] generate ip list from log file"); message.appendln(" statoolinfos list ua [-bot|-nobot] generate user agent list from log file"); message.appendln(" statoolinfos list visitors [-bot|-nobot] generate visitors (ip+ua) list from log file"); - message.appendln(" statoolinfos probe generate metrics files from conf"); message.appendln(" statoolinfos stat ip [-bot|-nobot] generate stats about ip from log file"); message.appendln(" statoolinfos stat ua [-bot|-nobot] generate stats about user agent from log file"); message.appendln(" statoolinfos stat visitors [-bot|-nobot] generate stats about visitors (ip+ua) from log file"); message.appendln(" statoolinfos tagdate update the file.datetime file"); - message.appendln(" statoolinfos uptime update uptime journal"); System.out.println(message.toString()); } @@ -200,21 +202,21 @@ public final class StatoolInfosCLI * the source * @return the log filter */ - private static LogFilter parseLogFilterOption(final String source) + private static BotFilter parseLogFilterOption(final String source) { - LogFilter result; + BotFilter result; if (StringUtils.equals(source, "-all")) { - result = LogFilter.ALL; + result = BotFilter.ALL; } else if (StringUtils.equals(source, "-bot")) { - result = LogFilter.BOT; + result = BotFilter.BOT; } else if (StringUtils.equals(source, "-nobot")) { - result = LogFilter.NOBOT; + result = BotFilter.NOBOT; } else { @@ -271,51 +273,43 @@ public final class StatoolInfosCLI { displayVersion(); } - else if (isMatching(args, "build", "\\s*.+\\s*")) + else if (isMatching(args, "build", "\\s*.+\\.conf\\s*")) { - Files inputs = convertPath(StringUtils.trim(args[1])); - for (File input : inputs) + File configurationFile = new File(StringUtils.trim(args[1])); + try { - try - { - StatoolInfos.build(input); - } - catch (Exception exception) - { - logger.error("Error with [{}]: {}", input.getAbsoluteFile(), exception.getMessage()); - } + StatoolInfos.build(configurationFile); + } + catch (Exception exception) + { + logger.error("Error with [{}]: {}", configurationFile.getAbsoluteFile(), exception.getMessage()); } } - else if (isMatching(args, "clear", "\\s*.+\\s*")) + else if (isMatching(args, "clear", "\\s*.+\\.conf\\s*")) { - Files inputs = convertPath(StringUtils.trim(args[1])); - for (File input : inputs) + File configurationFile = new File(StringUtils.trim(args[1])); + try { - try - { - StatoolInfos.clear(input); - } - catch (Exception exception) - { - logger.error("Error with [{}]: {}", input.getAbsoluteFile(), exception.getMessage()); - } + StatoolInfos.clear(configurationFile); + } + catch (Exception exception) + { + logger.error("Error with [{}]: {}", configurationFile.getAbsoluteFile(), exception.getMessage()); } } - else if (isMatching(args, "crawl", "\\s*.+\\s*")) + else if (isMatching(args, "crawl", "\\s*.+\\.conf\\s*")) { Chrono chrono = new Chrono().start(); - Files inputs = convertPath(StringUtils.trim(args[1])); - for (File input : inputs) + + File configurationFile = new File(StringUtils.trim(args[1])); + try { - try - { - StatoolInfos.crawl(input); - } - catch (Exception exception) - { - logger.error("Error with [{}]: {}", input.getAbsoluteFile(), exception.getMessage()); - exception.printStackTrace(); - } + StatoolInfos.crawl(configurationFile); + } + catch (Exception exception) + { + logger.error("Error with [{}]: {}", configurationFile.getAbsoluteFile(), exception.getMessage()); + exception.printStackTrace(); } System.out.println(chrono.format()); } @@ -335,21 +329,18 @@ public final class StatoolInfosCLI } } } - else if (isMatching(args, "htmlize", "\\s*.+\\s*")) + else if (isMatching(args, "htmlize", "\\s*.+\\.conf\\s*")) { Chrono chrono = new Chrono().start(); - Files inputs = convertPath(StringUtils.trim(args[1])); - for (File input : inputs) + File configurationFile = new File(StringUtils.trim(args[1])); + try { - try - { - StatoolInfos.htmlize(input); - } - catch (Exception exception) - { - logger.error("Error with [{}]: {}", input.getAbsoluteFile(), exception.getMessage()); - exception.printStackTrace(); - } + StatoolInfos.htmlize(configurationFile); + } + catch (Exception exception) + { + logger.error("Error with [{}]: {}", configurationFile.getAbsoluteFile(), exception.getMessage()); + exception.printStackTrace(); } System.out.println(chrono.format()); } @@ -357,11 +348,11 @@ public final class StatoolInfosCLI { File source = new File(args[2]); - StatoolInfos.listIps(source, LogFilter.ALL); + StatoolInfos.listIps(source, BotFilter.ALL); } else if (isMatching(args, "list", "ip", "(-all|-bot|-nobot)", "\\s*\\S+\\s*")) { - LogFilter filter = parseLogFilterOption(args[2]); + BotFilter filter = parseLogFilterOption(args[2]); File source = new File(args[3]); StatoolInfos.listIps(source, filter); @@ -370,11 +361,11 @@ public final class StatoolInfosCLI { File source = new File(args[2]); - StatoolInfos.listUserAgents(source, LogFilter.ALL); + StatoolInfos.listUserAgents(source, BotFilter.ALL); } else if (isMatching(args, "list", "(useragent|ua)", "(-all|-bot|-nobot)", "\\s*\\S+\\s*")) { - LogFilter filter = parseLogFilterOption(args[2]); + BotFilter filter = parseLogFilterOption(args[2]); File source = new File(args[3]); StatoolInfos.listUserAgents(source, filter); @@ -383,40 +374,37 @@ public final class StatoolInfosCLI { File source = new File(args[2]); - StatoolInfos.listVisitors(source, LogFilter.ALL); + StatoolInfos.listVisitors(source, BotFilter.ALL); } else if (isMatching(args, "list", "visitors", "(-all|-bot|-nobot)", "\\s*\\S+\\s*")) { - LogFilter filter = parseLogFilterOption(args[2]); + BotFilter filter = parseLogFilterOption(args[2]); File source = new File(args[3]); StatoolInfos.listVisitors(source, filter); } - else if (isMatching(args, "probe", "\\s*.+\\s*")) + else if (isMatching(args, "probe", "\\s*.+\\.conf\\s*")) { - Files inputs = convertPath(StringUtils.trim(args[1])); - for (File input : inputs) + File configurationFile = new File(StringUtils.trim(args[1])); + try { - try - { - StatoolInfos.probe(input); - } - catch (Exception exception) - { - logger.error("Error with [{}]: {}", input.getAbsoluteFile(), exception.getMessage()); - exception.printStackTrace(); - } + StatoolInfos.probe(configurationFile); + } + catch (Exception exception) + { + logger.error("Error with [{}]: {}", configurationFile.getAbsoluteFile(), exception.getMessage()); + exception.printStackTrace(); } } else if (isMatching(args, "stat", "ip", "\\s*\\S+\\s*")) { File source = new File(args[2]); - StatoolInfos.statIps(source, LogFilter.ALL); + StatoolInfos.statIps(source, BotFilter.ALL); } else if (isMatching(args, "stat", "ip", "(-all|-bot|-nobot)", "\\s*\\S+\\s*")) { - LogFilter filter = parseLogFilterOption(args[2]); + BotFilter filter = parseLogFilterOption(args[2]); File source = new File(args[3]); StatoolInfos.statIps(source, filter); @@ -424,11 +412,11 @@ public final class StatoolInfosCLI else if (isMatching(args, "stat", "(useragent|ua)", "\\s*\\S+\\s*")) { File source = new File(args[2]); - StatoolInfos.statUserAgents(source, LogFilter.ALL); + StatoolInfos.statUserAgents(source, BotFilter.ALL); } else if (isMatching(args, "stat", "(useragent|ua)", "(-all|-bot|-nobot)", "\\s*\\S+\\s*")) { - LogFilter filter = parseLogFilterOption(args[2]); + BotFilter filter = parseLogFilterOption(args[2]); File source = new File(args[3]); StatoolInfos.statUserAgents(source, filter); @@ -437,11 +425,11 @@ public final class StatoolInfosCLI { File source = new File(args[2]); - StatoolInfos.statVisitors(source, LogFilter.ALL); + StatoolInfos.statVisitors(source, BotFilter.ALL); } else if (isMatching(args, "stat", "visitors", "(-all|-bot|-nobot)", "\\s*\\S+\\s*")) { - LogFilter filter = parseLogFilterOption(args[2]); + BotFilter filter = parseLogFilterOption(args[2]); File source = new File(args[3]); StatoolInfos.statVisitors(source, filter); @@ -462,21 +450,19 @@ public final class StatoolInfosCLI } } } - else if (isMatching(args, "uptime", "\\s*.+\\s*")) + else if (isMatching(args, "uptime", "\\s*.+\\.conf\\s*")) { Chrono chrono = new Chrono().start(); - Files inputs = convertPath(StringUtils.trim(args[1])); - for (File input : inputs) + + File configurationFile = new File(StringUtils.trim(args[1])); + try { - try - { - StatoolInfos.uptime(input); - } - catch (Exception exception) - { - logger.error("Error with [{}]: {}", input.getAbsoluteFile(), exception.getMessage()); - exception.printStackTrace(); - } + StatoolInfos.uptime(configurationFile); + } + catch (Exception exception) + { + logger.error("Error with [{}]: {}", configurationFile.getAbsoluteFile(), exception.getMessage()); + exception.printStackTrace(); } System.out.println(chrono.format()); } diff --git a/src/fr/devinsy/statoolinfos/core/BotFilter.java b/src/fr/devinsy/statoolinfos/core/BotFilter.java new file mode 100644 index 0000000..1ff6605 --- /dev/null +++ b/src/fr/devinsy/statoolinfos/core/BotFilter.java @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2020-2021 Christian Pierre MOMON + * + * This file is part of StatoolInfos, simple service statistics tool. + * + * StatoolInfos is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * StatoolInfos is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with StatoolInfos. If not, see . + */ +package fr.devinsy.statoolinfos.core; + +import fr.devinsy.statoolinfos.metrics.http.HttpAccessLog; + +/** + * The Enum LogFilter. + */ +public enum BotFilter +{ + ALL, + BOT, + NOBOT; + + /** + * Matches. + * + * @param log + * the log + * @return true, if successful + */ + public boolean matches(final HttpAccessLog log) + { + boolean result; + + if (log == null) + { + result = false; + } + else if (this == ALL) + { + result = true; + } + else if ((this == BOT) && (log.isBot())) + { + result = true; + } + else if ((this == NOBOT) && (!log.isBot())) + { + result = true; + } + else + { + result = false; + } + + // + return result; + } +} \ No newline at end of file diff --git a/src/fr/devinsy/statoolinfos/core/Configuration.java b/src/fr/devinsy/statoolinfos/core/Configuration.java index 1f610f5..4a639f6 100644 --- a/src/fr/devinsy/statoolinfos/core/Configuration.java +++ b/src/fr/devinsy/statoolinfos/core/Configuration.java @@ -331,6 +331,21 @@ public class Configuration extends PathPropertyList return result; } + /** + * Gets the probe http access log pattern. + * + * @return the probe http access log pattern + */ + public String getProbeHttpAccessLogPattern() + { + String result; + + result = get("conf.probe.httpaccesslog.pattern"); + + // + return result; + } + /** * Gets the probe http access log file. * diff --git a/src/fr/devinsy/statoolinfos/core/StatoolInfos.java b/src/fr/devinsy/statoolinfos/core/StatoolInfos.java index 68a8233..d958e0d 100644 --- a/src/fr/devinsy/statoolinfos/core/StatoolInfos.java +++ b/src/fr/devinsy/statoolinfos/core/StatoolInfos.java @@ -169,7 +169,7 @@ public class StatoolInfos * the source * @throws IOException */ - public static void listIps(final File source, final LogFilter filter) + public static void listIps(final File source, final BotFilter filter) { try { @@ -180,7 +180,7 @@ public class StatoolInfos while (iterator.hasNext()) { String line = iterator.next(); - HttpAccessLog log = HttpAccessLogAnalyzer.parseNginxCombinedLog(line); + HttpAccessLog log = HttpAccessLogAnalyzer.parseLog(line, HttpAccessLogAnalyzer.COMBINED_PATTERN); if (filter.matches(log)) { @@ -219,7 +219,7 @@ public class StatoolInfos * @param source * the source */ - public static void listUserAgents(final File source, final LogFilter filter) + public static void listUserAgents(final File source, final BotFilter filter) { try { @@ -230,7 +230,7 @@ public class StatoolInfos while (iterator.hasNext()) { String line = iterator.next(); - HttpAccessLog log = HttpAccessLogAnalyzer.parseNginxCombinedLog(line); + HttpAccessLog log = HttpAccessLogAnalyzer.parseLog(line, HttpAccessLogAnalyzer.COMBINED_PATTERN); if (filter.matches(log)) { @@ -270,7 +270,7 @@ public class StatoolInfos * @param source * the source */ - public static void listVisitors(final File source, final LogFilter filter) + public static void listVisitors(final File source, final BotFilter filter) { try { @@ -281,7 +281,7 @@ public class StatoolInfos while (iterator.hasNext()) { String line = iterator.next(); - HttpAccessLog log = HttpAccessLogAnalyzer.parseNginxCombinedLog(line); + HttpAccessLog log = HttpAccessLogAnalyzer.parseLog(line, HttpAccessLogAnalyzer.COMBINED_PATTERN); if (filter.matches(log)) { @@ -336,7 +336,7 @@ public class StatoolInfos * @param source * the source */ - public static void statIps(final File source, final LogFilter filter) + public static void statIps(final File source, final BotFilter filter) { try { @@ -347,7 +347,7 @@ public class StatoolInfos while (iterator.hasNext()) { String line = iterator.next(); - HttpAccessLog log = HttpAccessLogAnalyzer.parseNginxCombinedLog(line); + HttpAccessLog log = HttpAccessLogAnalyzer.parseLog(line, HttpAccessLogAnalyzer.COMBINED_PATTERN); if (filter.matches(log)) { stator.putLog(log); @@ -387,7 +387,7 @@ public class StatoolInfos * @param source * the source */ - public static void statUserAgents(final File source, final LogFilter filter) + public static void statUserAgents(final File source, final BotFilter filter) { try { @@ -398,7 +398,7 @@ public class StatoolInfos while (iterator.hasNext()) { String line = iterator.next(); - HttpAccessLog log = HttpAccessLogAnalyzer.parseNginxCombinedLog(line); + HttpAccessLog log = HttpAccessLogAnalyzer.parseLog(line, HttpAccessLogAnalyzer.COMBINED_PATTERN); if (filter.matches(log)) { stator.putLog(log); @@ -438,7 +438,7 @@ public class StatoolInfos * @param source * the source */ - public static void statVisitors(final File source, final LogFilter filter) + public static void statVisitors(final File source, final BotFilter filter) { try { @@ -449,7 +449,7 @@ public class StatoolInfos while (iterator.hasNext()) { String line = iterator.next(); - HttpAccessLog log = HttpAccessLogAnalyzer.parseNginxCombinedLog(line); + HttpAccessLog log = HttpAccessLogAnalyzer.parseLog(line, HttpAccessLogAnalyzer.COMBINED_PATTERN); if (filter.matches(log)) { stator.putLog(log); diff --git a/src/fr/devinsy/statoolinfos/metrics/Prober.java b/src/fr/devinsy/statoolinfos/metrics/Prober.java index aba9f75..f2c65ed 100644 --- a/src/fr/devinsy/statoolinfos/metrics/Prober.java +++ b/src/fr/devinsy/statoolinfos/metrics/Prober.java @@ -75,9 +75,11 @@ public class Prober { logger.info("== Processing HttpAccessLog."); String source = configuration.getProbeHttpAccessLogSource(); + String patternRegex = configuration.getProbeHttpAccessLogPattern(); logger.info("source=[{}]", source); + logger.info("pattern=[{}]", patternRegex); - PathCounters data = HttpAccessLogAnalyzer.probe(source); + PathCounters data = HttpAccessLogAnalyzer.probe(source, patternRegex); counters.putAll(data); } @@ -176,15 +178,19 @@ public class Prober for (String year : years) { - { - // Year. - PathCounter yearCounter = prefixCounters.get(prefix, year); - if (yearCounter != null) - { - String line = String.format("%s.%s=%s", yearCounter.getPath(), yearCounter.getTimeMark(), yearCounter.getCounter()); - metrics.appendln(line); - } - } + // Year stat is complicated to build because needs all the + // log of one year. + // { + // // Year. + // PathCounter yearCounter = prefixCounters.get(prefix, year); + // if (yearCounter != null) + // { + // String line = String.format("%s.%s=%s", + // yearCounter.getPath(), yearCounter.getTimeMark(), + // yearCounter.getCounter()); + // metrics.appendln(line); + // } + // } { // Months. diff --git a/src/fr/devinsy/statoolinfos/metrics/http/HttpAccessLogAnalyzer.java b/src/fr/devinsy/statoolinfos/metrics/http/HttpAccessLogAnalyzer.java index 1a206bf..61f1099 100644 --- a/src/fr/devinsy/statoolinfos/metrics/http/HttpAccessLogAnalyzer.java +++ b/src/fr/devinsy/statoolinfos/metrics/http/HttpAccessLogAnalyzer.java @@ -26,13 +26,13 @@ import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import fr.devinsy.statoolinfos.core.StatoolInfosException; import fr.devinsy.statoolinfos.metrics.PathCounters; +import fr.devinsy.statoolinfos.util.FilesUtils; import fr.devinsy.statoolinfos.util.LineIterator; /** @@ -44,6 +44,12 @@ public class HttpAccessLogAnalyzer public static final String DEFAULT_CHARSET_NAME = "UTF-8"; + // log_format combined '$remote_addr - $remote_user [$time_local] ' + // '"$request" $status $body_bytes_sent ' + // '"$http_referer" "$http_user_agent"'; + public static final Pattern COMBINED_PATTERN = Pattern.compile( + "^(?[a-zA-F0-9\\\\:\\\\.]+) - (?[^\\[]+) \\[(?