Set of tools used to generate usage statistics for 42l https://stats.42l.fr
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

163 lines
5.6 KiB

#!/usr/bin/python3
import sys
import os
import subprocess
import re
import datetime as dt
from datetime import datetime
from pathlib import Path
RED = "\033[1;31m"
GREEN = "\033[1;32m"
YELLOW = "\033[1;33m"
BLUE = "\033[1;34m"
RESET = "\033[0;0m"
LOGS_DIR = "/var/lib/docker/volumes/nginx-proxy-logs/_data/weekly-logs/"
SCRIPTS_DIR = "./scripts/"
REPORTS_DIR = "/var/lib/docker/volumes/stats-reports/_data/reports/weekly/"
INTERNAL_DIR = "/var/lib/docker/volumes/stats-reports/_data/internal/weekly/"
PARSED_DIR = "/var/lib/docker/volumes/nginx-proxy-logs/_data/monthly-logs/"
R_CR_DIR = "crawlers-only/"
R_NOCR_DIR = "ignore-crawlers/"
R_ALL_DIR = "all/"
months = {
"Jan" : 1,
"Feb" : 2,
"Mar" : 3,
"Apr" : 4,
"May" : 5,
"Jun" : 6,
"Jul" : 7,
"Aug" : 8,
"Sep" : 9,
"Oct" : 10,
"Nov" : 11,
"Dec" : 12,
}
def print_color(string, color):
sys.stdout.write(color)
print(string)
sys.stdout.write(RESET)
def list_files(folder):
response = run_command(["ls", folder])
return (response.stdout.decode('utf-8').split('\n'))
def run_script(script_name, in_file, week_num, args, out_file):
return run_command([SCRIPTS_DIR + script_name + ".sh", in_file, "week " + week_num, args, out_file, ""])
def donotexist_check(filepath):
my_file = Path(filepath)
if my_file.exists():
print_color ("Error: " + filepath + " exists.\nAborting.", RED)
exit()
def run_command(arr_cmd):
#response = subprocess.run("sh ./" + SCRIPTS_DIR + script_files[j] + ".sh", log_files[i], week, REPORTS_DIR + date_path], capture_output = True)
try:
response = subprocess.run(arr_cmd, capture_output = True)
except Exception as e:
print_color ("Error: " + arr_cmd[0] + " couldn't execute:\n" + e + "\nAborting.", RED)
exit()
if (response.returncode != 0):
print_color ("Error: " + arr_cmd[0] + " didn't return 0:\n" + str(response.stderr) + "\nAborting.", RED)
exit()
return response
#print_color (log_files[i] + " succesfully parsed.", GREEN)
log_files = list_files(LOGS_DIR)
script_files = list_files(SCRIPTS_DIR)
#for i in range(0, len(script_files) - 1):
# try:
# script_files[i] = re.findall("^(.*)\.sh$", script_files[i])[0]
# except:
# print_color("Error : Missing or not well formatted script files. Aborting.", RED)
# exit()
#print (script_files)
print_color("42l Stats - Initializing at " + datetime.now().strftime("%d/%m/%Y %H:%M") + ".", BLUE)
for i in range(0, len(log_files) - 1):
service_name = log_files[i].split('.', 1)[0]
if service_name == "access":
service_name = "42l"
elif service_name == "error" or service_name == "misc":
print("Ignored file " + service_name + ".")
continue
log_files[i] = LOGS_DIR + log_files[i]
if os.path.isdir(log_files[i]):
print("Ignored directory " + service_name + ".")
continue
# Guessing week number from file content
# XXX: Will break if the archive is compressed.
rand_entry = run_command(["shuf", "-n", "1", log_files[i]]).stdout.decode('utf-8')
try:
date = re.findall("^[^:\"]*\[([^\:]*)", rand_entry)[0]
except:
print_color("Warning: File " + log_files[i] + " empty.\n", YELLOW)
continue
date = date.split("/")
isocal = dt.date(int(date[2]), months[date[1]], int(date[0])).isocalendar()
week_num = str(isocal[1])
date_path = str(isocal[0]) + "/" + week_num
# Writing public reports
r_dir = REPORTS_DIR + date_path + "/"
run_command(["mkdir", "-p", r_dir])
print("Writing " + service_name + " report with crawlers.")
run_command(["mkdir", "-p", r_dir + R_ALL_DIR])
report_path = r_dir + R_ALL_DIR + service_name + ".html"
donotexist_check(report_path)
run_script(service_name, log_files[i], week_num, "", report_path)
print("Writing " + service_name + " report without crawlers.")
run_command(["mkdir", "-p", r_dir + R_NOCR_DIR])
report_path = r_dir + R_NOCR_DIR + service_name + ".html"
donotexist_check(report_path)
run_script(service_name, log_files[i], week_num, "--ignore-crawlers", report_path)
print("Writing " + service_name + " report, crawlers only.")
run_command(["mkdir", "-p", r_dir + R_CR_DIR])
report_path = r_dir + R_CR_DIR + service_name + ".html"
donotexist_check(report_path)
run_script(service_name, log_files[i], week_num, "--crawlers-only", report_path)
# Writing internal reports
internal_folder = INTERNAL_DIR + date_path + "/"
run_command(["mkdir", "-p", internal_folder])
internal_file = internal_folder + service_name
if service_name == "nitter":
print("Skipping nitter report.")
continue
print("Writing internal " + service_name + " report.")
run_script("internal", log_files[i], week_num, "html", internal_file + ".html")
print("Writing internal " + service_name + " report (JSON).")
run_script("internal", log_files[i], week_num, "json", internal_file + ".json")
# Moving parsed logs
run_command(["mkdir", "-p", PARSED_DIR + date_path])
# if there's already an existing report, interrupt
new_log_filename = log_files[i].replace(LOGS_DIR, PARSED_DIR + date_path + "/");
donotexist_check(new_log_filename)
run_command(["mv", "-n", log_files[i], PARSED_DIR + date_path])
# compress the log file if it isn’t compressed already
if not log_files[i].endswith(".gz"):
print("Compressing " + service_name + " log file")
run_command(["gzip", new_log_filename])
print_color ("Completed!", GREEN)