Refactored user agent bot detection feature.
This commit is contained in:
parent
440d973624
commit
a1e1a60c5a
4 changed files with 224 additions and 21 deletions
|
@ -23,13 +23,11 @@ import java.time.format.DateTimeFormatter;
|
|||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import fr.devinsy.statoolinfos.metrics.TimeMarkUtils;
|
||||
import fr.devinsy.strings.StringList;
|
||||
import fr.devinsy.strings.StringsUtils;
|
||||
|
||||
/**
|
||||
* The Class HttpAccessLog.
|
||||
|
@ -179,30 +177,15 @@ public class HttpAccessLog
|
|||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
* Checks if is bot.
|
||||
*
|
||||
* @return true, if is bot
|
||||
*/
|
||||
public boolean isBot()
|
||||
{
|
||||
boolean result;
|
||||
|
||||
if (StringsUtils.containsAnyIgnoreCase(this.userAgent.toString(), "BingPreview", "bot", "crawler", "HeadlessChrome/", " - Mobilizon ", "monitoring", "YisouSpider"))
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
else if (StringUtils.startsWithAny(this.userAgent.toString(), "Apache-HttpClient/", "curl/", "Friendica ", "git/", "github-camo", "hackney/", "http.rb/", "FediList ", "Go-http-client",
|
||||
"GoModuleMirror/", "HotJava/", "Java/", "JGit/", "MastoPeek ", "mattermost-", "Misskey/", "newspaper/", "node-fetch/", "okhttp/", "PeerTube/", "PHP/", "Pleroma ", "python-requests/",
|
||||
"python/", "Python/", "Synapse/", "Tusky/"))
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
else if (StringUtils.equalsAnyIgnoreCase(this.userAgent.toString(), "-"))
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
result = UserAgentBotDetector.isBot(this.userAgent.toString());
|
||||
|
||||
//
|
||||
return result;
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
|
||||
*
|
||||
* This file is part of StatoolInfos, simple service statistics tool.
|
||||
*
|
||||
* StatoolInfos is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* StatoolInfos is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with StatoolInfos. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package fr.devinsy.statoolinfos.metrics.http;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import fr.devinsy.strings.StringList;
|
||||
import fr.devinsy.strings.StringsUtils;
|
||||
|
||||
/**
|
||||
* The Class UserAgentBotDetector.
|
||||
*/
|
||||
public class UserAgentBotDetector
|
||||
{
|
||||
private static Logger logger = LoggerFactory.getLogger(UserAgentBotDetector.class);
|
||||
|
||||
private static final StringList startList = new StringList();
|
||||
private static final StringList containList = new StringList();
|
||||
|
||||
static
|
||||
{
|
||||
StringList lines;
|
||||
try
|
||||
{
|
||||
lines = StringsUtils.load(UserAgentBotDetector.class.getResource("/fr/devinsy/statoolinfos/metrics/http/userAgentBotDetectorData.txt"));
|
||||
}
|
||||
catch (IOException exception)
|
||||
{
|
||||
exception.printStackTrace();
|
||||
lines = new StringList();
|
||||
}
|
||||
|
||||
for (String line : lines)
|
||||
{
|
||||
if (line.startsWith("^"))
|
||||
{
|
||||
startList.add(line.substring(1));
|
||||
}
|
||||
else
|
||||
{
|
||||
containList.add(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates a new user agent bot detector.
|
||||
*/
|
||||
private UserAgentBotDetector()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if is bot.
|
||||
*
|
||||
* @param userAgent
|
||||
* the user agent
|
||||
* @return true, if is bot
|
||||
*/
|
||||
public static boolean isBot(final String userAgent)
|
||||
{
|
||||
boolean result;
|
||||
|
||||
if (StringUtils.isBlank(userAgent))
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
else if (StringUtils.equalsAny(userAgent.trim(), "-"))
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
else if (StringsUtils.containsAnyIgnoreCase(userAgent, containList))
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
else if (StringsUtils.startsWithAny(userAgent, startList))
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = false;
|
||||
}
|
||||
|
||||
//
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
^Apache-HttpClient/
|
||||
bot
|
||||
BingPreview
|
||||
crawler
|
||||
^curl/
|
||||
^Friendica
|
||||
^git/
|
||||
^github-camo
|
||||
^hackney/
|
||||
HeadlessChrome/
|
||||
^http.rb/
|
||||
^FediList
|
||||
^Go-http-client
|
||||
^GoModuleMirror/
|
||||
^HotJava/
|
||||
^Java/
|
||||
^JGit/
|
||||
^MastoPeek
|
||||
^mattermost-
|
||||
^Misskey/
|
||||
- Mobilizon
|
||||
monitoring
|
||||
^newspaper/
|
||||
^node-fetch/
|
||||
^okhttp/
|
||||
^PeerTube/
|
||||
^PHP/
|
||||
^Pleroma
|
||||
^python-requests/
|
||||
^python/
|
||||
^Python/
|
||||
^Synapse/
|
||||
^Tusky/
|
||||
YisouSpider
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
|
||||
*
|
||||
* This file is part of StatoolInfos, simple key value database.
|
||||
*
|
||||
* StatoolInfos is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* StatoolInfos is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with StatoolInfos. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package fr.devinsy.statoolinfos.metrics.http;
|
||||
|
||||
import org.apache.log4j.BasicConfigurator;
|
||||
import org.apache.log4j.Level;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import fr.devinsy.statoolinfos.core.StatoolInfosException;
|
||||
|
||||
/**
|
||||
* The Class UserAgentBotDetectorTest.
|
||||
*/
|
||||
public class UserAgentBotDetectorTest
|
||||
{
|
||||
private static org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(UserAgentBotDetectorTest.class);
|
||||
|
||||
/**
|
||||
* Test 01.
|
||||
*
|
||||
* @throws Exception
|
||||
* the exception
|
||||
*/
|
||||
@Test
|
||||
public void test01() throws Exception
|
||||
{
|
||||
Assert.assertTrue(UserAgentBotDetector.isBot(""));
|
||||
Assert.assertTrue(UserAgentBotDetector.isBot("-"));
|
||||
Assert.assertTrue(UserAgentBotDetector.isBot("mobilizon.zapashcanon.fr - Mobilizon 1.1.2"));
|
||||
Assert.assertTrue(UserAgentBotDetector.isBot("Pleroma 2.3.50-242-g8e9f032f-develop; https://soc.abcdefg.club <ary@abcdefg.club>"));
|
||||
|
||||
Assert.assertFalse(UserAgentBotDetector.isBot("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36"));
|
||||
}
|
||||
|
||||
/**
|
||||
* After class.
|
||||
*
|
||||
* @throws StatoolInfosException
|
||||
* the Juga exception
|
||||
*/
|
||||
@AfterClass
|
||||
public static void afterClass() throws StatoolInfosException
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Before class.
|
||||
*
|
||||
* @throws StatoolInfosException
|
||||
* the Juga exception
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws StatoolInfosException
|
||||
{
|
||||
BasicConfigurator.configure();
|
||||
Logger.getRootLogger().setLevel(Level.DEBUG);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue