You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

161 lines
8.6 KiB
Bash

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/bin/bash
set -euo pipefail
# log2stats
# The successor of logs-rapports for 42l
# Must be run with weekly rotated logs (on Mondays).
NGINX_LOGS_FOLDER="/var/lib/docker/volumes/local-syslog/_data/nginx/"
STATS_REPORTS_FOLDER="/var/lib/docker/volumes/nfs/_data/stats-reports/"
STATS_DATABASE_FOLDER="/var/lib/docker/volumes/local-stats-data/_data/"
REGEX_DATE="\[[^\]]*\] \"(GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH|PROPFIND)"
weekly() {
LOGFILES=`ls $NGINX_LOGS_FOLDER | grep log.1`
echo "Start processing logs at `date`."
for logfile in ${LOGFILES[@]}; do
# We dont want stats for those
if [[ $logfile =~ ^misc.log.1 || $logfile =~ ^error.log.1 || $logfile =~ ^collabora.log.1 ]]; then
echo "Skipping file $logfile."
continue
fi
# Get first log lines date
FIRST_DATE=`head -n 1 "$NGINX_LOGS_FOLDER/$logfile" | grep -Po "$REGEX_DATE" | grep -o "^[^ :]*:" | grep -o "[0-9].*[0-9]" | sed 's/\// /g'`
FIRST_DATE=`date -d "$FIRST_DATE" +%Y-%m-%d`
LAST_DATE=`date -d "$FIRST_DATE-$(date -d$FIRST_DATE +%u) days + 8 days" +%Y-%m-%d`
# For monthly stats
FIRST_MONTH=`date -d "$FIRST_DATE" +%m`
LAST_MONTH=`date -d "$LAST_DATE" +%m`
# Calculate the Monday and Sunday with escaped slashes for sed
FIRST_DAY=`date -d "$FIRST_DATE" +%d\\\/%b\\\/%Y`
FIRST_MONDAY=`date -d "$FIRST_DATE-$(date -d$FIRST_DATE +%u) days + 1 day" +%d\\\/%b\\\/%Y`
LAST_DAY=`date -d "$FIRST_DATE-$(date -d$FIRST_DATE +%u) days + 8 days" +%d\\\/%b\\\/%Y`
if [ "$FIRST_DAY" != "$FIRST_MONDAY" ]; then
echo "Alert: First day regex is $FIRST_DAY while first Monday is $FIRST_MONDAY. Logrotate issue?"
fi
# Calculate week and year te create path
WEEK_NUMBER=`date -d "$FIRST_DATE" +%V`
YEAR=`date -d "$FIRST_DATE" +%Y`
OUTPUT_FOLDER="$STATS_REPORTS_FOLDER/reports/weekly/$YEAR/$WEEK_NUMBER"
mkdir -p $OUTPUT_FOLDER/ignore-crawlers
mkdir -p $OUTPUT_FOLDER/crawlers-only
mkdir -p $OUTPUT_FOLDER/all
SERVICE_NAME=`echo $logfile | sed "s/.log.*//g"`
# custom script for each service
# calling gen-stats.sh then goaccess
# $1: input log path
# $2: used for title text, week X or month X
# $3: goaccess bonus args (on crawlers, db,…)
# $4: output file
# $5: sed pipe
# $6 / $7: reserved for the custom script
# generating public statistics
echo "Generating $SERVICE_NAME stats, ignoring crawlers."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--ignore-crawlers -" "$OUTPUT_FOLDER/ignore-crawlers/$SERVICE_NAME.html" "/$FIRST_DAY/,/$LAST_DAY/p" || true
echo "Generating $SERVICE_NAME stats, crawlers only."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--crawlers-only -" "$OUTPUT_FOLDER/crawlers-only/$SERVICE_NAME.html" "/$FIRST_DAY/,/$LAST_DAY/p" || true
echo "Generating $SERVICE_NAME stats, full stats."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "-" "$OUTPUT_FOLDER/all/$SERVICE_NAME.html" "/$FIRST_DAY/,/$LAST_DAY/p" || true
# generating internal reports
INTERNAL_FOLDER="$STATS_REPORTS_FOLDER/internal/weekly/$YEAR/$WEEK_NUMBER"
mkdir -p $INTERNAL_FOLDER
# script for internal statistics
echo "Generating internal $SERVICE_NAME stats, HTML format."
scripts/internal.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "html" "$INTERNAL_FOLDER/$SERVICE_NAME.html" "/$FIRST_DAY/,/$LAST_DAY/p" || true
echo "Generating internal $SERVICE_NAME stats, JSON format."
scripts/internal.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "json" "$INTERNAL_FOLDER/$SERVICE_NAME.json" "/$FIRST_DAY/,/$LAST_DAY/p" || true
# now recalculating all data for monthly stats
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/ignore-crawlers
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/crawlers-only
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/all
# check if the logs overlaps a new month
if [ "$FIRST_MONTH" != "$LAST_MONTH" ]; then
# month change: only send logs until last day of month
echo "New month overlap detected!"
# overrides LAST_DAY
LAST_DAY_OLD_MONTH=`date -d "$YEAR/$FIRST_MONTH/1 + 1 month - 1 day" +%d\\\/%b\\\/%Y`
else
LAST_DAY_OLD_MONTH=$LAST_DAY
fi
echo "Feeding monthly data for $SERVICE_NAME stats, ignoring crawlers."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--ignore-crawlers --process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/ignore-crawlers/ -" "/dev/stdout" "/$FIRST_DAY/,/$LAST_DAY_OLD_MONTH/p" || true
echo "Feeding monthly data for $SERVICE_NAME stats, crawlers only."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--crawlers-only --process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/crawlers-only/ -" "/dev/stdout" "/$FIRST_DAY/,/$LAST_DAY_OLD_MONTH/p" || true
echo "Feeding monthly data for $SERVICE_NAME stats, full stats."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/all/ -" "/dev/stdout" "/$FIRST_DAY/,/$LAST_DAY_OLD_MONTH/p" || true
# then generate the monthly stats and clear the DB
if [ "$FIRST_MONTH" != "$LAST_MONTH" ]; then
OUTPUT_FOLDER_MONTHLY="$STATS_REPORTS_FOLDER/reports/monthly/$YEAR/$FIRST_MONTH"
mkdir -p $OUTPUT_FOLDER_MONTHLY/ignore-crawlers
mkdir -p $OUTPUT_FOLDER_MONTHLY/crawlers-only
mkdir -p $OUTPUT_FOLDER_MONTHLY/all
echo "Generating monthly $SERVICE_NAME stats, ignoring crawlers."
scripts/$SERVICE_NAME.sh "/dev/null" "month $FIRST_MONTH" "--ignore-crawlers --persist --restore --db-path /stats-data/$SERVICE_NAME/ignore-crawlers/" "$OUTPUT_FOLDER_MONTHLY/ignore-crawlers/$SERVICE_NAME.html" ""
echo "Generating monthly $SERVICE_NAME stats, crawlers only."
scripts/$SERVICE_NAME.sh "/dev/null" "month $FIRST_MONTH" "--crawlers-only --persist --restore --db-path /stats-data/$SERVICE_NAME/crawlers-only/" "$OUTPUT_FOLDER_MONTHLY/crawlers-only/$SERVICE_NAME.html" ""
echo "Generating monthly $SERVICE_NAME stats, full stats."
scripts/$SERVICE_NAME.sh "/dev/null" "month $FIRST_MONTH" "--persist --restore --db-path /stats-data/$SERVICE_NAME/all/" "$OUTPUT_FOLDER_MONTHLY/all/$SERVICE_NAME.html" ""
# clear the database once monthly stats have been generated
echo "Resetting $SERVICE_NAME monthly database…"
rm -rf $STATS_DATABASE_FOLDER/$SERVICE_NAME
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/ignore-crawlers
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/crawlers-only
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/all
# gets the first day of the new month
FIRST_DAY_NEW_MONTH=`date -d "$YEAR/$FIRST_MONTH/1 + 1 month" +%d\\\/%b\\\/%Y`
echo "Feeding new monthly data for $SERVICE_NAME stats, ignoring crawlers."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--ignore-crawlers --process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/ignore-crawlers/ -" "/dev/stdout" "/$FIRST_DAY_NEW_MONTH/,/$LAST_DAY/p"
echo "Feeding new monthly data for $SERVICE_NAME stats, crawlers only."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--crawlers-only --process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/crawlers-only/ -" "/dev/stdout" "/$FIRST_DAY_NEW_MONTH/,/$LAST_DAY/p"
echo "Feeding new monthly data for $SERVICE_NAME stats, full stats."
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/all/ -" "/dev/stdout" "/$FIRST_DAY_NEW_MONTH/,/$LAST_DAY/p"
fi
done
echo "Applying recursive chmod 755 on folder $STATS_REPORTS_FOLDER"
chmod -R 755 $STATS_REPORTS_FOLDER
echo "Applying recursive chown nogroup:nobody on folder $STATS_REPORTS_FOLDER"
chown -R nobody:nogroup $STATS_REPORTS_FOLDER
echo "Finished processing logs at `date`."
}
if [ "$EUID" -ne 0 ]; then
echo "Please run as root"
exit
fi
weekly