|
|
#!/bin/bash
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
# log2stats
|
|
|
# The successor of logs-rapports for 42l
|
|
|
# Must be run with weekly rotated logs (on Mondays).
|
|
|
|
|
|
NGINX_LOGS_FOLDER="/var/lib/docker/volumes/local-syslog/_data/nginx/"
|
|
|
|
|
|
STATS_REPORTS_FOLDER="/var/lib/docker/volumes/nfs/_data/stats-reports/"
|
|
|
|
|
|
STATS_DATABASE_FOLDER="/var/lib/docker/volumes/local-stats-data/_data/"
|
|
|
|
|
|
REGEX_DATE="\[[^\]]*\] \"(GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH|PROPFIND)"
|
|
|
|
|
|
weekly() {
|
|
|
LOGFILES=`ls $NGINX_LOGS_FOLDER | grep log.1`
|
|
|
echo "Start processing logs at `date`."
|
|
|
for logfile in ${LOGFILES[@]}; do
|
|
|
# We don’t want stats for those
|
|
|
if [[ $logfile =~ ^misc.log.1 || $logfile =~ ^error.log.1 || $logfile =~ ^collabora.log.1 ]]; then
|
|
|
echo "Skipping file $logfile."
|
|
|
continue
|
|
|
fi
|
|
|
|
|
|
# Get first log line’s date
|
|
|
FIRST_DATE=`head -n 1 "$NGINX_LOGS_FOLDER/$logfile" | grep -Po "$REGEX_DATE" | grep -o "^[^ :]*:" | grep -o "[0-9].*[0-9]" | sed 's/\// /g'`
|
|
|
FIRST_DATE=`date -d "$FIRST_DATE" +%Y-%m-%d`
|
|
|
LAST_DATE=`date -d "$FIRST_DATE-$(date -d$FIRST_DATE +%u) days + 8 days" +%Y-%m-%d`
|
|
|
|
|
|
# For monthly stats
|
|
|
FIRST_MONTH=`date -d "$FIRST_DATE" +%m`
|
|
|
LAST_MONTH=`date -d "$LAST_DATE" +%m`
|
|
|
|
|
|
# Calculate the Monday and Sunday − with escaped slashes for sed
|
|
|
FIRST_DAY=`date -d "$FIRST_DATE" +%d\\\/%b\\\/%Y`
|
|
|
FIRST_MONDAY=`date -d "$FIRST_DATE-$(date -d$FIRST_DATE +%u) days + 1 day" +%d\\\/%b\\\/%Y`
|
|
|
LAST_DAY=`date -d "$FIRST_DATE-$(date -d$FIRST_DATE +%u) days + 8 days" +%d\\\/%b\\\/%Y`
|
|
|
|
|
|
if [ "$FIRST_DAY" != "$FIRST_MONDAY" ]; then
|
|
|
echo "Alert: First day regex is $FIRST_DAY while first Monday is $FIRST_MONDAY. Logrotate issue?"
|
|
|
fi
|
|
|
|
|
|
# Calculate week and year te create path
|
|
|
WEEK_NUMBER=`date -d "$FIRST_DATE" +%V`
|
|
|
YEAR=`date -d "$FIRST_DATE" +%Y`
|
|
|
|
|
|
OUTPUT_FOLDER="$STATS_REPORTS_FOLDER/reports/weekly/$YEAR/$WEEK_NUMBER"
|
|
|
mkdir -p $OUTPUT_FOLDER/ignore-crawlers
|
|
|
mkdir -p $OUTPUT_FOLDER/crawlers-only
|
|
|
mkdir -p $OUTPUT_FOLDER/all
|
|
|
|
|
|
SERVICE_NAME=`echo $logfile | sed "s/.log.*//g"`
|
|
|
|
|
|
# custom script for each service
|
|
|
# calling gen-stats.sh then goaccess
|
|
|
# $1: input log path
|
|
|
# $2: used for title text, week X or month X
|
|
|
# $3: goaccess bonus args (on crawlers, db,…)
|
|
|
# $4: output file
|
|
|
# $5: sed pipe
|
|
|
# $6 / $7: reserved for the custom script
|
|
|
|
|
|
# generating public statistics
|
|
|
echo "Generating $SERVICE_NAME stats, ignoring crawlers."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--ignore-crawlers -" "$OUTPUT_FOLDER/ignore-crawlers/$SERVICE_NAME.html" "/$FIRST_DAY/,/$LAST_DAY/p" || true
|
|
|
|
|
|
echo "Generating $SERVICE_NAME stats, crawlers only."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--crawlers-only -" "$OUTPUT_FOLDER/crawlers-only/$SERVICE_NAME.html" "/$FIRST_DAY/,/$LAST_DAY/p" || true
|
|
|
|
|
|
echo "Generating $SERVICE_NAME stats, full stats."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "-" "$OUTPUT_FOLDER/all/$SERVICE_NAME.html" "/$FIRST_DAY/,/$LAST_DAY/p" || true
|
|
|
|
|
|
# generating internal reports
|
|
|
INTERNAL_FOLDER="$STATS_REPORTS_FOLDER/internal/weekly/$YEAR/$WEEK_NUMBER"
|
|
|
|
|
|
mkdir -p $INTERNAL_FOLDER
|
|
|
|
|
|
# script for internal statistics
|
|
|
echo "Generating internal $SERVICE_NAME stats, HTML format."
|
|
|
scripts/internal.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "html" "$INTERNAL_FOLDER/$SERVICE_NAME.html" "/$FIRST_DAY/,/$LAST_DAY/p" || true
|
|
|
|
|
|
echo "Generating internal $SERVICE_NAME stats, JSON format."
|
|
|
scripts/internal.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "json" "$INTERNAL_FOLDER/$SERVICE_NAME.json" "/$FIRST_DAY/,/$LAST_DAY/p" || true
|
|
|
|
|
|
# now recalculating all data for monthly stats
|
|
|
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/ignore-crawlers
|
|
|
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/crawlers-only
|
|
|
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/all
|
|
|
|
|
|
# check if the logs overlaps a new month
|
|
|
if [ "$FIRST_MONTH" != "$LAST_MONTH" ]; then
|
|
|
# month change: only send logs until last day of month
|
|
|
echo "New month overlap detected!"
|
|
|
# overrides LAST_DAY
|
|
|
LAST_DAY_OLD_MONTH=`date -d "$YEAR/$FIRST_MONTH/1 + 1 month - 1 day" +%d\\\/%b\\\/%Y`
|
|
|
else
|
|
|
LAST_DAY_OLD_MONTH=$LAST_DAY
|
|
|
fi
|
|
|
|
|
|
echo "Feeding monthly data for $SERVICE_NAME stats, ignoring crawlers."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--ignore-crawlers --process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/ignore-crawlers/ -" "/dev/stdout" "/$FIRST_DAY/,/$LAST_DAY_OLD_MONTH/p" || true
|
|
|
|
|
|
echo "Feeding monthly data for $SERVICE_NAME stats, crawlers only."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--crawlers-only --process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/crawlers-only/ -" "/dev/stdout" "/$FIRST_DAY/,/$LAST_DAY_OLD_MONTH/p" || true
|
|
|
|
|
|
echo "Feeding monthly data for $SERVICE_NAME stats, full stats."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/all/ -" "/dev/stdout" "/$FIRST_DAY/,/$LAST_DAY_OLD_MONTH/p" || true
|
|
|
|
|
|
# then generate the monthly stats and clear the DB
|
|
|
if [ "$FIRST_MONTH" != "$LAST_MONTH" ]; then
|
|
|
OUTPUT_FOLDER_MONTHLY="$STATS_REPORTS_FOLDER/reports/monthly/$YEAR/$FIRST_MONTH"
|
|
|
mkdir -p $OUTPUT_FOLDER_MONTHLY/ignore-crawlers
|
|
|
mkdir -p $OUTPUT_FOLDER_MONTHLY/crawlers-only
|
|
|
mkdir -p $OUTPUT_FOLDER_MONTHLY/all
|
|
|
|
|
|
echo "Generating monthly $SERVICE_NAME stats, ignoring crawlers."
|
|
|
scripts/$SERVICE_NAME.sh "/dev/null" "month $FIRST_MONTH" "--ignore-crawlers --persist --restore --db-path /stats-data/$SERVICE_NAME/ignore-crawlers/" "$OUTPUT_FOLDER_MONTHLY/ignore-crawlers/$SERVICE_NAME.html" ""
|
|
|
|
|
|
echo "Generating monthly $SERVICE_NAME stats, crawlers only."
|
|
|
scripts/$SERVICE_NAME.sh "/dev/null" "month $FIRST_MONTH" "--crawlers-only --persist --restore --db-path /stats-data/$SERVICE_NAME/crawlers-only/" "$OUTPUT_FOLDER_MONTHLY/crawlers-only/$SERVICE_NAME.html" ""
|
|
|
|
|
|
echo "Generating monthly $SERVICE_NAME stats, full stats."
|
|
|
scripts/$SERVICE_NAME.sh "/dev/null" "month $FIRST_MONTH" "--persist --restore --db-path /stats-data/$SERVICE_NAME/all/" "$OUTPUT_FOLDER_MONTHLY/all/$SERVICE_NAME.html" ""
|
|
|
|
|
|
# clear the database once monthly stats have been generated
|
|
|
echo "Resetting $SERVICE_NAME monthly database…"
|
|
|
rm -rf $STATS_DATABASE_FOLDER/$SERVICE_NAME
|
|
|
|
|
|
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/ignore-crawlers
|
|
|
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/crawlers-only
|
|
|
mkdir -p $STATS_DATABASE_FOLDER/$SERVICE_NAME/all
|
|
|
|
|
|
# gets the first day of the new month
|
|
|
FIRST_DAY_NEW_MONTH=`date -d "$YEAR/$FIRST_MONTH/1 + 1 month" +%d\\\/%b\\\/%Y`
|
|
|
|
|
|
echo "Feeding new monthly data for $SERVICE_NAME stats, ignoring crawlers."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--ignore-crawlers --process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/ignore-crawlers/ -" "/dev/stdout" "/$FIRST_DAY_NEW_MONTH/,/$LAST_DAY/p"
|
|
|
|
|
|
echo "Feeding new monthly data for $SERVICE_NAME stats, crawlers only."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--crawlers-only --process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/crawlers-only/ -" "/dev/stdout" "/$FIRST_DAY_NEW_MONTH/,/$LAST_DAY/p"
|
|
|
|
|
|
echo "Feeding new monthly data for $SERVICE_NAME stats, full stats."
|
|
|
scripts/$SERVICE_NAME.sh "$NGINX_LOGS_FOLDER/$logfile" "week $WEEK_NUMBER" "--process-and-exit --persist --restore --db-path /stats-data/$SERVICE_NAME/all/ -" "/dev/stdout" "/$FIRST_DAY_NEW_MONTH/,/$LAST_DAY/p"
|
|
|
fi
|
|
|
done
|
|
|
echo "Applying recursive chmod 755 on folder $STATS_REPORTS_FOLDER…"
|
|
|
chmod -R 755 $STATS_REPORTS_FOLDER
|
|
|
echo "Applying recursive chown nogroup:nobody on folder $STATS_REPORTS_FOLDER…"
|
|
|
chown -R nobody:nogroup $STATS_REPORTS_FOLDER
|
|
|
echo "Finished processing logs at `date`."
|
|
|
}
|
|
|
|
|
|
if [ "$EUID" -ne 0 ]; then
|
|
|
echo "Please run as root"
|
|
|
exit
|
|
|
fi
|
|
|
|
|
|
weekly
|