awk program to merge "fail2ban" logs with other log files
I was interested in seeing the interaction between login failures on penguin
and fail2ban. This can be done my merging /var/log/secure
and var/log/fail2ban
together on date and time, with the complication they use different formats for
the time. So I wrote an awk program to standardise the output of the log files.
Sample run:
cd /var/log awk -f /r/merge-logs-for-fail2ban.awk fail2ban.log secure httpd/*{access,error}_log | sort | less
File /r/merge-logs-for-fail2ban.awk
_
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
#!/usr/bin/awk BEGIN { month["Jan"]="01"; month["Feb"]="02"; month["Mar"]="03" month["Apr"]="04"; month["May"]="05"; month["Jun"]="06" month["Jul"]="07"; month["Aug"]="08"; month["Sep"]="09" month["Oct"]="10"; month["Nov"]="11"; month["Dec"]="12" curr_year = strftime("%Y", systime()) } FILENAME != prev_FILENAME { print "Processing " FILENAME >"/dev/stderr" prev_FILENAME = FILENAME } { source = "" } # Function to add "milliseconds" (actually it's just an incrementing counter) # to a timestamp # Expects a timestamp in format "YYYY-MM-DD HH:MM:SS" # Returns "YYYY-MM-DD HH:MM:SS,mmm" function timestamp_msec(date_time) { if (date_time == prev_date_time) { msec = msec + 1 } else { msec = 0 } prev_date_time = date_time x = msec; if (msec < 10) x = "0" x; if (msec < 100) x = "0" x return date_time "," x } # $1 $2 $3 $4 $5- # fail2ban: 2018-11-07 17:46:07,711 fail2ban.actions [1466]: message match($1, /2[01][012][0-9]-[01][0-9]-[012][0-9]/) { if ($3 == "fail2ban.filtersystemd") { next } date_time = $1 " " $2 source = "f2b." substr($3, 10) pid = $4 match($0, /\[[0-9]+\]: (.*)/, a) message = a[1] } # Apache httpd log file # $1 $2 $3 $4 $5 $6 # 50.31.96.12 - - [10/Aug/2014:04:03:16 -0500] "GET /assets/nodes_map.css HTTP/1.1" 200 14198 match($4, /\[([012][0-9])\/([ADFJMNOS][aceopu][bcglnprtvy])\/(2[01][012][0-9]):([012][0-9]:[0-5][0-9]:[0-5][0-9])/, a) { date_time = timestamp_msec(a[3] "-" month[a[2]] "-" a[1] " " a[4]) match(FILENAME, /([a-z_]+)_log/, a) source = a[1] pid = "" match ($0, /("[^"]+") ([0-9]+)/, a) message = $1 " " a[1] " " a[2] # IP, request, and status } # Apache httpd error file # $1 $2 $3 $4 $5 $6 $7 $8 $9- # [Sun Nov 04 03:38:21.299746 2018] [core:notice] [pid 1613] message # a[1] a[2] a[3] match($0, /^\[... ... .. (..:..:..)\.(...)... (....)\]/, a) { date_time = a[3] "-" month[$2] "-" $3 " " a[1] "," a[2] match(FILENAME, /([a-z_]+)_log/, a) source = a[1] match($0, /\[pid ([0-9]+)\] (.*)/, a) pid = a[1] message = a[2] } # /var/log/secure # $1 $2 $3 $4 $5 $6- # Nov 5 19:14:21 penguin sshd[10119]: message match($1, /[ADFJMNOS][aceopu][bcglnprtvy]/) { date_time = timestamp_msec(curr_year "-" month[$1] "-" ($2 < 10 ? "0" : "") $2 " " $3) match($0, /([a-z]+)(\[[0-9]+\]:) (.*)/, a) source = a[1] pid = a[2] message = a[3] } # output: 2018-11-07 17:46:07,700 source [pid]: meessage source { printf("%s %-12s %-8s %s\n", date_time, source, pid, message) } |