136 lines
5.5 KiB
Python
Executable File
136 lines
5.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import datetime
|
|
import argparse
|
|
from collections import defaultdict
|
|
from tabulate import tabulate
|
|
from concurrent.futures import ProcessPoolExecutor
|
|
|
|
from irclogtools.containers import CombinedLogfile
|
|
from irclogtools.tools import discover_logfiles
|
|
|
|
|
|
def archiveit(output_dir, _channel, _logfiles):
|
|
fout = os.path.join(output_dir, "{}.log".format(_channel))
|
|
log = CombinedLogfile(fout)
|
|
for item in _logfiles:
|
|
log.add_section(item)
|
|
log.write()
|
|
|
|
|
|
def by_totalsize(logfiles):
|
|
"""
|
|
Given a list of `LogFile`s, return the total reported size, in bytes, of the log
|
|
"""
|
|
return sum([i.bytes() for i in logfiles])
|
|
|
|
|
|
def main():
|
|
"""
|
|
Tool for archiving IRC logs (in ZNC's log format: Network_#channel_20170223.log). In testing, inputs and outputs
|
|
always match sha256 sums.
|
|
import:
|
|
given the path to a directory containing many znc logs under one network, combine the logs into 1 log archive
|
|
per channel, placed in the output dir.
|
|
inspect:
|
|
print some stats about the contents of a log archive
|
|
slice:
|
|
given an input log archive, create a new log archive containing a subset of the contents sliced by date range
|
|
split:
|
|
given an input log archive, reproduce the original input logs
|
|
"""
|
|
parser = argparse.ArgumentParser("manipulate irc log archives")
|
|
subparser_action = parser.add_subparsers(dest='action', help='action to take')
|
|
|
|
parser_import = subparser_action.add_parser('import', help='Import raw ZNC logfiles into a log archive')
|
|
parser_import.add_argument("-d", "--dir", required=True, help="dir containing log files")
|
|
parser_import.add_argument("-o", "--output", required=True, help="output dir")
|
|
parser_import.add_argument("--all", action="store_true", help="ingest all log files, not just channels")
|
|
|
|
parser_inspect = subparser_action.add_parser('inspect', help='Inspect log archives')
|
|
parser_inspect.add_argument("-f", "--file", required=True, help="log archive file to inspect")
|
|
parser_inspect.add_argument("--detail", action="store_true", help="show more detail")
|
|
|
|
parser_inspect = subparser_action.add_parser('slice', help='Extract date range to new file')
|
|
parser_inspect.add_argument("-s", "--src", required=True, help="source log archive path")
|
|
parser_inspect.add_argument("-d", "--dest", required=True, help="source log archive path")
|
|
parser_inspect.add_argument("--start", help="start timestamp such as 2016-1-1")
|
|
parser_inspect.add_argument("--end", help="end timestamp such as 2016-1-1")
|
|
parser_inspect.add_argument("--raw", action="store_true", help="write raw lines instead of log archive")
|
|
|
|
parser_split = subparser_action.add_parser('split', help='Split a log archive back into original logfiles')
|
|
parser_split.add_argument("-s", "--src", required=True, help="source log archive path")
|
|
parser_split.add_argument("-d", "--dest", required=True, help="dir to dump logs into")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.action == "import":
|
|
os.makedirs(args.output, exist_ok=True)
|
|
|
|
logs = discover_logfiles(args.dir)
|
|
|
|
by_channel = defaultdict(list)
|
|
for log in logs:
|
|
if not args.all and not log.channel.startswith("#"):
|
|
continue
|
|
by_channel[log.channel].append(log)
|
|
|
|
_display = [[k, len(v)] for k, v in by_channel.items()]
|
|
print(tabulate(sorted(_display, key=lambda x: x[0].lower()), headers=["channel", "num logs"]) + "\n")
|
|
|
|
with ProcessPoolExecutor(max_workers=os.cpu_count() * 2) as tp:
|
|
for channel, logfiles in sorted(by_channel.items(), key=lambda x: by_totalsize(x[1]), reverse=True):
|
|
tp.submit(archiveit, args.output, channel, logfiles)
|
|
|
|
elif args.action == "inspect":
|
|
log = CombinedLogfile(args.file)
|
|
|
|
drange = log.get_range()
|
|
|
|
info = [["portions", len(log.data)],
|
|
["start", drange[0].strftime('%Y-%m-%d')],
|
|
["end", drange[1].strftime('%Y-%m-%d')]]
|
|
|
|
print(tabulate(info, headers=["property", "value"]) + "\n")
|
|
|
|
if args.detail:
|
|
info = []
|
|
total_bytes = 0
|
|
total_lines = 0
|
|
for portion in log.data:
|
|
data = portion.contents()
|
|
size = len(data)
|
|
total_bytes += size
|
|
lines = len(data.split(b"\n"))
|
|
total_lines += lines
|
|
info.append([portion.name,
|
|
portion.network,
|
|
portion.channel,
|
|
portion.date.strftime('%Y-%m-%d'),
|
|
lines,
|
|
"{:,}".format(size)])
|
|
info.append([])
|
|
info.append(['', '', '', 'total:', "{:,}".format(total_lines), "{:,} B".format(total_bytes)])
|
|
|
|
print(tabulate(info, headers=["portion file", "network", "channel", "date", "lines", "bytes"]) + "\n")
|
|
|
|
elif args.action == "slice":
|
|
src = CombinedLogfile(args.src)
|
|
|
|
limstart = args.start and datetime.datetime.strptime(args.start, '%Y-%m-%d')
|
|
limend = args.end and datetime.datetime.strptime(args.end, '%Y-%m-%d')
|
|
|
|
src.limit(start=limstart, end=limend)
|
|
src.write(args.dest, raw=args.raw)
|
|
|
|
elif args.action == "split":
|
|
src = CombinedLogfile(args.src)
|
|
|
|
for portion in src.data:
|
|
with open(os.path.join(args.dest, portion.name), "wb") as f:
|
|
f.write(portion.contents())
|
|
|
|
if __name__ == '__main__':
|
|
main()
|