Added UserAgent anonymize feature and UserAgent stat command.

This commit is contained in:
Christian P. MOMON 2021-05-14 02:38:58 +02:00
parent fb1407d332
commit 2f07f26d2a
13 changed files with 971 additions and 20 deletions

View file

@ -29,6 +29,7 @@ import java.util.zip.GZIPOutputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.april.logar.util.Chrono;
import org.april.logar.util.Files;
import org.april.logar.util.FilesUtils;
import org.april.logar.util.LineIterator;
@ -39,6 +40,7 @@ import fr.devinsy.logar.app.anonymizer.Anonymizer;
import fr.devinsy.logar.app.log.Log;
import fr.devinsy.logar.app.log.LogFile;
import fr.devinsy.logar.app.log.LogParser;
import fr.devinsy.logar.stats.UserAgentStator;
import fr.devinsy.strings.StringList;
/**
@ -610,6 +612,113 @@ public final class Logar
}
}
/**
* Stats user agents.
*
* @param source
* the source
*/
public static void statUserAgents(final File source)
{
Files files = FilesUtils.searchEndingWith(source, ".log", ".log.gz").keepFileType().sortByName();
for (File file : files)
{
statUserAgentsForFile(file);
}
}
/**
* Stat user agents for file.
*
* @param file
* the file
*/
public static void statUserAgentsForFile(final File file)
{
if (file == null)
{
throw new IllegalArgumentException("Null parameter.");
}
else if (!file.isFile())
{
throw new IllegalArgumentException("Parameter is not a file.");
}
else
{
System.out.println("== Stat UserAgents for [" + file.getName() + "]");
int lineCount = 0;
int badLineCount = 0;
try
{
UserAgentStator stator = new UserAgentStator();
LineIterator iterator = new LineIterator(file);
Chrono chrono = new Chrono().start();
long lastDuration = 0;
while (iterator.hasNext())
{
String line = iterator.next();
lineCount += 1;
if ((chrono.duration() % 60 == 0) && (chrono.duration() != lastDuration))
{
lastDuration = chrono.duration();
System.out.println(chrono.format() + " line counter " + lineCount);
}
try
{
Log log = LogParser.parseAccessLog(line);
stator.putLog(log);
}
catch (IllegalArgumentException exception)
{
System.out.println("Bad format line: " + line);
badLineCount += 1;
exception.printStackTrace();
}
catch (DateTimeParseException exception)
{
System.out.println("Bad datetime format: " + line);
badLineCount += 1;
}
}
//
System.out.println("Log count =" + stator.getLogCount());
System.out.println("Ip count =" + stator.getIps().size());
System.out.println("UserAgent count =" + stator.getUserAgents().size());
System.out.println("IpUserAgent count =" + stator.getIpUserAgents().size());
System.out.println("out=" + file.getCanonicalFile().getParentFile());
stator.saveIpList(new File(file.getParentFile(), "stator-ip.list"));
stator.saveUserAgentList(new File(file.getParentFile(), "stator-ua.list"));
stator.saveIpUserAgentList(new File(file.getParentFile(), "stator-ipua.list"));
stator.shrink();
System.out.println("UserAgent count =" + stator.getUserAgents().size());
System.out.println("IpUserAgent count =" + stator.getIpUserAgents().size());
stator.saveUserAgentList(new File(file.getParentFile(), "stator-ua-short.list"));
stator.saveIpUserAgentList(new File(file.getParentFile(), "stator-ipua-short.list"));
stator.computeIpLinkCountForUserAgent();
stator.saveUserAgentLinkCount(new File(file.getParentFile(), "stator-stats.list"));
}
catch (IOException exception)
{
System.err.println("Error with file [" + file.getAbsolutePath() + "]");
exception.printStackTrace();
}
if (badLineCount > 0)
{
System.out.println("Bad line count: " + badLineCount + "/" + lineCount);
}
}
}
/**
* Check concate.
*

View file

@ -274,24 +274,6 @@ public final class Anonymizer
return result;
}
/**
* Anonymize user agent.
*
* @param log
* the log
* @return the log
*/
public String anonymizeUserAgent(final String userAgent)
{
String result;
//
result = StringUtils.replaceChars(userAgent, "()_-/@.0123456789", "");
//
return result;
}
/**
* Gets the map table.
*

View file

@ -70,11 +70,12 @@ public final class UserAgentGenerator
// }
//
result = source;
result = RegExUtils.replaceAll(result, "\\[.+\\]", "");
result = RegExUtils.replaceAll(result, "https?://[^\\s\\)\\],;]+", "");
result = RegExUtils.replaceAll(result, "\\(\\)", "");
result = RegExUtils.replaceAll(result, "build [0-9a-zA-Z\\.-]+", "build");
result = RegExUtils.replaceAll(result, "(B|b)uild ?[~0-9a-zA-Z/\\.-_@]+", "build");
result = RegExUtils.replaceAll(result, "\\d{4}/\\d{2}/\\d{2}", "");
result = RegExUtils.replaceAll(result, "(/|:)[~0-9a-zA-Z\\.-]+", "");
result = RegExUtils.replaceAll(result, "(/|:|-|@)?[0-9][~0-9a-zA-Z\\.-_]+", "");
// result = StringUtils.replaceChars(result, ".[]_-0123456789", "");
result = StringUtils.rightPad(result, source.length());

View file

@ -0,0 +1,60 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.devinsy.logar.stats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Class IpStat.
*/
public final class Ip
{
private static Logger logger = LoggerFactory.getLogger(Ip.class);
private String value;
private long count;
/**
* Instantiates a new ip stat.
*
* @param ip
* the ip
*/
public Ip(final String ip)
{
this.value = ip;
this.count = 0;
}
public long getCount()
{
return this.count;
}
public String getValue()
{
return this.value;
}
public void inc()
{
this.count += 1;
}
}

View file

@ -0,0 +1,70 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.devinsy.logar.stats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Class UserAgentStat.
*/
public final class IpUserAgent
{
private static Logger logger = LoggerFactory.getLogger(IpUserAgent.class);
private String ip;
private String userAgent;
private long count;
/**
* Instantiates a new user agent stat.
*
* @param ip
* the ip
* @param userAgent
* the user agent
*/
public IpUserAgent(final String ip, final String userAgent)
{
this.ip = ip;
this.userAgent = userAgent;
this.count = 0;
}
public long getCount()
{
return this.count;
}
public String getIp()
{
return this.ip;
}
public String getUserAgent()
{
return this.userAgent;
}
public void inc()
{
this.count += 1;
}
}

View file

@ -0,0 +1,42 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.devinsy.logar.stats;
import java.util.ArrayList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Class IpUserAgentStatList.
*/
public final class IpUserAgentList extends ArrayList<IpUserAgent>
{
private static final long serialVersionUID = 385645188228694639L;
private static Logger logger = LoggerFactory.getLogger(IpUserAgentList.class);
/**
* Instantiates a new ip user agent stats.
*/
public IpUserAgentList()
{
super();
}
}

View file

@ -0,0 +1,143 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.devinsy.logar.stats;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Class IpUserAgentStats.
*/
public final class IpUserAgents implements Iterable<IpUserAgent>
{
private static final long serialVersionUID = -3011532898476944152L;
private static Logger logger = LoggerFactory.getLogger(IpUserAgents.class);
private HashMap<String, IpUserAgent> data;
/**
* Instantiates a new ip user agent stats.
*/
public IpUserAgents()
{
this.data = new HashMap<String, IpUserAgent>();
}
public void clear()
{
this.data.clear();
}
public int countByUserAgent(final String userAgent)
{
int result;
result = 0;
for (IpUserAgent ipUserAgent : this.data.values())
{
if (StringUtils.equals(ipUserAgent.getUserAgent(), userAgent))
{
result += 1;
}
}
//
return result;
}
/**
* Gets the by user agent.
*
* @param userAgent
* the user agent
* @return the by user agent
*/
public IpUserAgentList getByUserAgent(final String userAgent)
{
IpUserAgentList result;
result = new IpUserAgentList();
for (IpUserAgent ipUserAgent : this.data.values())
{
if (StringUtils.equals(ipUserAgent.getUserAgent(), userAgent))
{
result.add(ipUserAgent);
}
}
//
return result;
}
/* (non-Javadoc)
* @see java.lang.Iterable#iterator()
*/
@Override
public Iterator<IpUserAgent> iterator()
{
Iterator<IpUserAgent> result;
result = this.data.values().iterator();
//
return result;
}
public void put(final IpUserAgent ipUserAgent)
{
this.data.put(ipUserAgent.getIp() + ipUserAgent.getUserAgent(), ipUserAgent);
}
/**
* Put.
*
* @param ip
* the ip
* @param userAgent
* the user agent
*/
public void put(final String ip, final String userAgent)
{
IpUserAgent stat = this.data.get(ip + userAgent);
if (stat == null)
{
stat = new IpUserAgent(ip, userAgent);
this.data.put(ip + userAgent, stat);
}
stat.inc();
}
public int size()
{
int result;
result = this.data.size();
//
return result;
}
}

View file

@ -0,0 +1,54 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.devinsy.logar.stats;
import java.util.HashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Class IpStats.
*/
public final class Ips extends HashMap<String, Ip>
{
private static final long serialVersionUID = -8411746796941831991L;
private static Logger logger = LoggerFactory.getLogger(Ips.class);
/**
* Instantiates a new ip stats.
*/
public Ips()
{
super();
}
public void put(final String ip)
{
Ip stat = get(ip);
if (stat == null)
{
stat = new Ip(ip);
this.put(ip, stat);
}
stat.inc();
}
}

View file

@ -0,0 +1,75 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.devinsy.logar.stats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Class UserAgentStat.
*/
public final class UserAgent
{
private static Logger logger = LoggerFactory.getLogger(UserAgent.class);
private String value;
private long count;
private long ipLinkCount;
/**
* Instantiates a new user agent stat.
*
* @param ip
* the ip
* @param userAgent
* the user agent
*/
public UserAgent(final String userAgent)
{
this.value = userAgent;
this.count = 0;
this.ipLinkCount = 0;
}
public long getCount()
{
return this.count;
}
public long getIpLinkCount()
{
return this.ipLinkCount;
}
public String getValue()
{
return this.value;
}
public void inc()
{
this.count += 1;
}
public void setIpLinkCount(final long ipLinkCount)
{
this.ipLinkCount = ipLinkCount;
}
}

View file

@ -0,0 +1,232 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.devinsy.logar.stats;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import org.apache.commons.io.IOUtils;
import org.april.logar.util.Chrono;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import fr.devinsy.logar.app.log.Log;
import fr.devinsy.strings.StringsUtils;
/**
* The Class UserAgentStator.
*/
public final class UserAgentStator
{
private static Logger logger = LoggerFactory.getLogger(UserAgentStator.class);
private long logCount;
private Ips ips;
private UserAgents userAgents;
private IpUserAgents ipUserAgents;
/**
* Instantiates a new user agent stator.
*/
public UserAgentStator()
{
this.logCount = 0;
this.ips = new Ips();
this.userAgents = new UserAgents();
this.ipUserAgents = new IpUserAgents();
}
/**
* Compute ip link count for user agent.
*/
public void computeIpLinkCountForUserAgent()
{
int index = 0;
Chrono chrono = new Chrono().start();
long lastDuration = 0;
for (UserAgent userAgent : this.userAgents.values())
{
index += 1;
if ((chrono.duration() % 60 == 0) && (chrono.duration() != lastDuration))
{
lastDuration = chrono.duration();
System.out.println(chrono.format() + " IpLinkCount " + index + "/" + this.userAgents.size());
}
long count = this.ipUserAgents.countByUserAgent(userAgent.getValue());
userAgent.setIpLinkCount(count);
}
}
public Ips getIps()
{
return this.ips;
}
public IpUserAgents getIpUserAgents()
{
return this.ipUserAgents;
}
public long getLogCount()
{
return this.logCount;
}
public UserAgents getUserAgents()
{
return this.userAgents;
}
/**
* Adds the log.
*
* @param log
* the log
*/
public void putLog(final Log log)
{
String userAgent = log.getUserAgent().trim();
this.logCount += 1;
this.ips.put(log.getIp());
this.userAgents.put(userAgent);
this.ipUserAgents.put(log.getIp(), userAgent);
}
public void saveIpList(final File target)
{
PrintWriter out = null;
try
{
out = new PrintWriter(new FileOutputStream(target));
for (Ip ip : this.ips.values())
{
out.println(ip.getValue());
}
}
catch (FileNotFoundException exception)
{
exception.printStackTrace();
}
finally
{
IOUtils.closeQuietly(out);
}
}
public void saveIpUserAgentList(final File target)
{
PrintWriter out = null;
try
{
out = new PrintWriter(new FileOutputStream(target));
for (IpUserAgent userAgent : this.ipUserAgents)
{
out.println(userAgent.getIp() + " " + userAgent.getUserAgent());
}
}
catch (FileNotFoundException exception)
{
exception.printStackTrace();
}
finally
{
IOUtils.closeQuietly(out);
}
}
public void saveUserAgentLinkCount(final File target)
{
PrintWriter out = null;
try
{
out = new PrintWriter(new FileOutputStream(target));
for (UserAgent userAgent : this.userAgents.values())
{
out.println(userAgent.getIpLinkCount() + " " + userAgent.getCount() + " " + userAgent.getValue());
}
}
catch (FileNotFoundException exception)
{
exception.printStackTrace();
}
finally
{
IOUtils.closeQuietly(out);
}
}
public void saveUserAgentList(final File target)
{
PrintWriter out = null;
try
{
out = new PrintWriter(new FileOutputStream(target));
for (UserAgent userAgent : this.userAgents.values())
{
out.println(userAgent.getValue());
}
}
catch (FileNotFoundException exception)
{
exception.printStackTrace();
}
finally
{
IOUtils.closeQuietly(out);
}
}
public void shrink()
{
String[] tokens = { "android", "apple", "chrome", "iphone", "linux", "mac", "mozilla", "opera", "safari", "thunderbird" };
//
UserAgents userAgentBis = new UserAgents();
for (UserAgent userAgent : this.userAgents.values())
{
if (StringsUtils.containsAnyIgnoreCase(userAgent.getValue(), tokens))
{
userAgentBis.put(userAgent);
}
}
this.userAgents.clear();
this.userAgents = userAgentBis;
//
IpUserAgents ipUserAgentBis = new IpUserAgents();
for (IpUserAgent ipUserAgent : this.ipUserAgents)
{
if (StringsUtils.containsAnyIgnoreCase(ipUserAgent.getUserAgent(), tokens))
{
ipUserAgentBis.put(ipUserAgent);
}
}
this.ipUserAgents.clear();
this.ipUserAgents = ipUserAgentBis;
}
}

View file

@ -0,0 +1,59 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.devinsy.logar.stats;
import java.util.HashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Class UserAgents.
*/
public final class UserAgents extends HashMap<String, UserAgent>
{
private static final long serialVersionUID = -7943808966632477322L;
private static Logger logger = LoggerFactory.getLogger(UserAgents.class);
/**
* Instantiates a new user agents.
*/
public UserAgents()
{
super();
}
public void put(final String userAgent)
{
UserAgent stat = get(userAgent);
if (stat == null)
{
stat = new UserAgent(userAgent);
this.put(userAgent, stat);
}
stat.inc();
}
public void put(final UserAgent userAgent)
{
this.put(userAgent.getValue(), userAgent);
}
}

View file

@ -238,6 +238,12 @@ public final class LogarCLI
Logar.sort(source);
}
else if (isMatching(args, "statuseragent", "\\s*\\S+\\s*"))
{
File source = new File(args[1]);
Logar.statUserAgents(source);
}
else if (isMatching(args, "testconcate", "\\s*\\S+\\s*"))
{
File source = new File(args[1]);

View file

@ -0,0 +1,118 @@
/*
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
*
* This file is part of Logar, simple tool to manage http log files.
*
* Logar is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* Logar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Logar. If not, see <http://www.gnu.org/licenses/>.
*/
package org.april.logar.util;
import java.time.Duration;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
/**
* The Class Chrono.
*/
public class Chrono
{
private LocalDateTime start;
/**
* Instantiates a new time keeper.
*/
public Chrono()
{
reset();
}
/**
* Duration.
*
* @return the long
*/
public long duration()
{
long result;
result = LocalDateTime.now().toEpochSecond(ZoneOffset.UTC) - this.start.toEpochSecond(ZoneOffset.UTC);
//
return result;
}
/**
* Format.
*
* @return the string
*/
public String format()
{
String result;
if (this.start == null)
{
result = "n/a";
}
else
{
LocalDateTime end = LocalDateTime.now();
Duration duration = Duration.between(this.start, end);
result = String.format("%02d:%02d:%02d", duration.getSeconds() / 60 / 60, duration.getSeconds() / 60, duration.getSeconds() % 60);
}
//
return result;
}
/**
* Reset.
*/
public void reset()
{
this.start = null;
}
/**
* Start.
*/
public Chrono start()
{
Chrono result;
this.start = LocalDateTime.now();
result = this;
//
return result;
}
/**
* To string.
*
* @return the string
*/
@Override
public String toString()
{
String result;
result = format();
//
return result;
}
}