2021-05-17 13:24:10 +02:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2021 Christian Pierre MOMON <christian@momon.org>
|
|
|
|
*
|
|
|
|
* This file is part of StatoolInfos, simple service statistics tool.
|
|
|
|
*
|
|
|
|
* StatoolInfos is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Affero General Public License as
|
|
|
|
* published by the Free Software Foundation, either version 3 of the
|
|
|
|
* License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* StatoolInfos is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Affero General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
* along with StatoolInfos. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
package fr.devinsy.statoolinfos.crawl;
|
|
|
|
|
|
|
|
import java.net.URL;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Collections;
|
2021-05-19 15:22:37 +02:00
|
|
|
import java.util.Iterator;
|
2021-05-17 13:24:10 +02:00
|
|
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
2021-05-18 19:40:29 +02:00
|
|
|
import fr.devinsy.statoolinfos.util.URLUtils;
|
|
|
|
|
2021-05-17 13:24:10 +02:00
|
|
|
/**
|
|
|
|
* The Class CrawlLogs.
|
|
|
|
*/
|
|
|
|
public class CrawlLogs extends ArrayList<CrawlLog>
|
|
|
|
{
|
|
|
|
private static final long serialVersionUID = -8749217049690008582L;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Instantiates a new crawl logs.
|
|
|
|
*/
|
|
|
|
public CrawlLogs()
|
|
|
|
{
|
|
|
|
super();
|
|
|
|
}
|
|
|
|
|
2021-05-18 19:40:29 +02:00
|
|
|
/* (non-Javadoc)
|
|
|
|
* @see java.util.ArrayList#add(java.lang.Object)
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public boolean add(final CrawlLog log)
|
|
|
|
{
|
|
|
|
boolean result;
|
|
|
|
|
|
|
|
if (log == null)
|
|
|
|
{
|
|
|
|
result = false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
result = super.add(log);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-05-17 13:24:10 +02:00
|
|
|
/**
|
|
|
|
* Adds the.
|
|
|
|
*
|
|
|
|
* @param url
|
|
|
|
* the url
|
|
|
|
* @param status
|
|
|
|
* the status
|
|
|
|
*/
|
2021-05-18 19:40:29 +02:00
|
|
|
public void add(final URL url, final URL parentUrl, final CrawlStatus status)
|
2021-05-17 13:24:10 +02:00
|
|
|
{
|
2021-05-18 19:40:29 +02:00
|
|
|
this.add(new CrawlLog(url, parentUrl, status));
|
2021-05-17 13:24:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-05-19 15:22:37 +02:00
|
|
|
* Find by url.
|
2021-05-17 13:24:10 +02:00
|
|
|
*
|
2021-05-19 15:22:37 +02:00
|
|
|
* @param url
|
|
|
|
* the url
|
|
|
|
* @return the crawl logs
|
2021-05-17 13:24:10 +02:00
|
|
|
*/
|
|
|
|
public CrawlLogs findByUrl(final URL url)
|
|
|
|
{
|
|
|
|
CrawlLogs result;
|
|
|
|
|
|
|
|
result = new CrawlLogs();
|
|
|
|
|
|
|
|
for (CrawlLog log : this)
|
|
|
|
{
|
|
|
|
if (StringUtils.equals(log.getUrl().toString(), url.toString()))
|
|
|
|
{
|
|
|
|
result.add(log);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-05-19 15:22:37 +02:00
|
|
|
/**
|
|
|
|
* Gets the by url.
|
|
|
|
*
|
|
|
|
* @param url
|
|
|
|
* the url
|
|
|
|
* @return the by url
|
|
|
|
*/
|
|
|
|
public CrawlLog getByUrl(final URL url)
|
|
|
|
{
|
|
|
|
CrawlLog result;
|
|
|
|
|
|
|
|
boolean ended = false;
|
|
|
|
Iterator<CrawlLog> iterator = iterator();
|
|
|
|
result = null;
|
|
|
|
while (!ended)
|
|
|
|
{
|
|
|
|
if (iterator.hasNext())
|
|
|
|
{
|
|
|
|
CrawlLog log = iterator.next();
|
|
|
|
|
|
|
|
if (URLUtils.equals(log.getUrl(), url))
|
|
|
|
{
|
|
|
|
ended = true;
|
|
|
|
result = log;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ended = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-05-17 13:24:10 +02:00
|
|
|
/**
|
|
|
|
* Gets the errors.
|
|
|
|
*
|
|
|
|
* @return the errors
|
|
|
|
*/
|
|
|
|
public CrawlLogs getErrors()
|
|
|
|
{
|
|
|
|
CrawlLogs result;
|
|
|
|
|
|
|
|
result = new CrawlLogs();
|
|
|
|
|
|
|
|
for (CrawlLog log : this)
|
|
|
|
{
|
|
|
|
if (log.getStatus().isError())
|
|
|
|
{
|
|
|
|
result.add(log);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the success.
|
|
|
|
*
|
|
|
|
* @return the success
|
|
|
|
*/
|
|
|
|
public CrawlLogs getSuccess()
|
|
|
|
{
|
|
|
|
CrawlLogs result;
|
|
|
|
|
|
|
|
result = new CrawlLogs();
|
|
|
|
|
|
|
|
for (CrawlLog log : this)
|
|
|
|
{
|
|
|
|
if (!log.getStatus().isError())
|
|
|
|
{
|
|
|
|
result.add(log);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Reverse.
|
|
|
|
*
|
|
|
|
* @return the categories
|
|
|
|
*/
|
|
|
|
public CrawlLogs reverse()
|
|
|
|
{
|
|
|
|
CrawlLogs result;
|
|
|
|
|
|
|
|
Collections.reverse(this);
|
|
|
|
|
|
|
|
result = this;
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
2021-05-18 19:40:29 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the by parent.
|
|
|
|
*
|
|
|
|
* @param parentURL
|
|
|
|
* the parent URL
|
|
|
|
* @return the by parent
|
|
|
|
*/
|
|
|
|
public CrawlLogs searchByParent(final URL parentURL)
|
|
|
|
{
|
|
|
|
CrawlLogs result;
|
|
|
|
|
|
|
|
result = new CrawlLogs();
|
|
|
|
|
|
|
|
for (CrawlLog log : this)
|
|
|
|
{
|
|
|
|
if (URLUtils.equals(log.getParentUrl(), parentURL))
|
|
|
|
{
|
|
|
|
result.add(log);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
return result;
|
|
|
|
}
|
2021-05-17 13:24:10 +02:00
|
|
|
}
|