Fixed empty file crawled.

This commit is contained in:
Christian P. MOMON 2021-01-11 19:23:44 +01:00
parent 36f6d67d8a
commit 726f4fccfa

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2020 Christian Pierre MOMON <christian@momon.org> * Copyright (C) 2020-2021 Christian Pierre MOMON <christian@momon.org>
* *
* This file is part of StatoolInfos, simple service statistics tool. * This file is part of StatoolInfos, simple service statistics tool.
* *
@ -21,6 +21,7 @@ package fr.devinsy.statoolinfos.crawl;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.nio.file.Files;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
@ -389,7 +390,17 @@ public class CrawlCache
{ {
final int TIMEOUT = 5000; final int TIMEOUT = 5000;
result = buildFile(url.toString()); result = buildFile(url.toString());
FileUtils.copyURLToFile(url, result, TIMEOUT, TIMEOUT); File temp = Files.createTempFile("tmp-", ".statoolsinfos").toFile();
FileUtils.copyURLToFile(url, temp, TIMEOUT, TIMEOUT);
if (temp.length() == 0)
{
logger.warn("WARNING: empty file crawled for [{}]", url);
}
else
{
temp.renameTo(result);
}
temp.delete();
} }
else else
{ {