Set of tools used to generate usage statistics for 42l https://stats.42l.fr
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

180 lines
5.7 KiB

#!/usr/bin/python3
import sys
import os
import datetime as dt
from datetime import datetime
import calendar
import subprocess
from pathlib import Path
RED = "\033[1;31m"
GREEN = "\033[1;32m"
YELLOW = "\033[1;33m"
BLUE = "\033[1;34m"
RESET = "\033[0;0m"
LOGS_DIR = "/var/lib/docker/volumes/nginx-proxy-logs/_data/monthly-logs/"
SCRIPTS_DIR = "./scripts/"
REPORTS_DIR = "/var/lib/docker/volumes/stats-reports/_data/reports/monthly/"
INTERNAL_DIR = "/var/lib/docker/volumes/stats-reports/_data/internal/monthly/"
#PARSED_DIR = "/var/lib/docker/volumes/nginx-proxy-logs/_data/trash/"
R_CR_DIR = "crawlers-only/"
R_NOCR_DIR = "ignore-crawlers/"
R_ALL_DIR = "all/"
GREP_MONTH = "^[0-9\.]*[^\[]*\[[0-3][0-9]\/{#}"
months = {
"Jan" : 1,
"Feb" : 2,
"Mar" : 3,
"Apr" : 4,
"May" : 5,
"Jun" : 6,
"Jul" : 7,
"Aug" : 8,
"Sep" : 9,
"Oct" : 10,
"Nov" : 11,
"Dec" : 12,
}
def print_color(string, color):
sys.stdout.write(color)
print(string)
sys.stdout.write(RESET)
def run_script(script_name, in_file, week_num, args, out_file):
return run_command([SCRIPTS_DIR + script_name + ".sh", in_file, "month " + week_num, args, out_file, grep_selmonth])
def donotexist_check(filepath):
my_file = Path(filepath)
if my_file.exists():
print_color ("Error: " + filepath + " exists.\nAborting.", RED)
exit()
def run_command(arr_cmd):
#response = subprocess.run("sh ./" + SCRIPTS_DIR + script_files[j] + ".sh", log_files[i], week, REPORTS_DIR + date_path], capture_output = True)
try:
response = subprocess.run(arr_cmd, capture_output = True)
except Exception as e:
print_color ("Error: " + arr_cmd[0] + " couldn't execute:\n" + e + "\nAborting.", RED)
exit()
if (response.returncode != 0):
print_color ("Error: " + arr_cmd[0] + " didn't return 0:\n" + str(response.stderr) + "\nAborting.", RED)
exit()
return response
def list_files(folder):
response = run_command(["ls", folder])
return (response.stdout.decode('utf-8').split('\n'))
#if (len(sys.argv) != 3):
# print ("Usage : ./logs-rapports-monthly.py [year] [month]")
# exit (0)
year = dt.datetime.now().year
month = dt.datetime.now().month - 1
# handle new year
if month == 0:
month = 12
year -= 1
nb_days = calendar.monthrange(year, month)[1]
week_list = []
for i in range(1, nb_days):
yearnb = dt.date(year, month, i).isocalendar()[0]
weeknb = dt.date(year, month, i).isocalendar()[1]
pathlog = str(yearnb) + "/" + str(weeknb) + "/"
if pathlog not in week_list:
week_list.append(pathlog)
nbweeks = len(week_list)
print_color("42l Stats - Initializing at " + datetime.now().strftime("%d/%m/%Y %H:%M") + ".", BLUE)
print_color("Now writing logs for month " + str(month) + ", ISO weeks "
+ week_list[0] + " to " + week_list[nbweeks - 1], GREEN)
log_files_list = dict()
grep_selmonth = GREP_MONTH.replace("{#}",
str(list(months.keys())[list(months.values()).index(month)]))
for i in range(0, nbweeks):
path_logs = LOGS_DIR + week_list[i]
log_files_list[path_logs] = list_files(path_logs)
nbservices = len(list(log_files_list.values())[nbweeks - 1])
for s in list(log_files_list.values())[nbweeks - 1]:
service_name = s.split('.', 1)[0]
i_service_name = service_name
if service_name == "access":
service_name = "42l"
elif service_name == "error" or service_name == "misc":
print("Ignored file " + service_name + ".")
continue
elif len(service_name) == 0 or len(s) == 0:
continue
if os.path.isdir(s):
print("Ignored directory " + service_name + ".")
continue
#print(log_files_list)
# list arguments
# can't use index since it can variate the service count changes
log_arg = ""
for t, u in log_files_list.items():
for v in u:
if i_service_name in v:
log_arg += t + v + " "
break
#log_files[i] = LOGS_DIR + log_files[i]
# Writing public reports
r_dir = REPORTS_DIR + str(year) + "/" + str(month) + "/"
run_command(["mkdir", "-p", r_dir])
print("Writing " + service_name + " report with crawlers.")
run_command(["mkdir", "-p", r_dir + R_ALL_DIR])
report_path = r_dir + R_ALL_DIR + service_name + ".html"
donotexist_check(report_path)
run_script(service_name, log_arg, str(month), "", report_path)
print("Writing " + service_name + " report without crawlers.")
run_command(["mkdir", "-p", r_dir + R_NOCR_DIR])
report_path = r_dir + R_NOCR_DIR + service_name + ".html"
donotexist_check(report_path)
run_script(service_name, log_arg, str(month), "--ignore-crawlers", report_path)
print("Writing " + service_name + " report, crawlers only.")
run_command(["mkdir", "-p", r_dir + R_CR_DIR])
report_path = r_dir + R_CR_DIR + service_name + ".html"
donotexist_check(report_path)
run_script(service_name, log_arg, str(month), "--crawlers-only", report_path)
# Writing internal reports
internal_folder = INTERNAL_DIR + str(year) + "/" + str(month) + "/"
run_command(["mkdir", "-p", internal_folder])
internal_file = internal_folder + service_name
if service_name == 'nitter':
continue
print("Writing internal " + service_name + " report.")
run_script("internal", log_arg, str(month), "html", internal_file + ".html")
print("Writing internal " + service_name + " report (JSON).")
run_script("internal", log_arg, str(month), "json", internal_file + ".json")
# Moving parsed logs
#run_command(["mkdir", "-p", PARSED_DIR + date_path])
#run_command(["mv", "-n", log_files[i], PARSED_DIR + date_path])
print_color ("Completed!", GREEN)