
209 lines
6.8 KiB

import os
import datetime
import json
from irclogtools import logfile_pattern
class CombinedLogfile(object):
def __init__(self, fpath):
self.path = fpath
self.data = []
if os.path.exists(self.path):
# TODO maybe an interface to limit the date range of added stuff?
def _parse(self):
Open the logfile and load each section into memory
with open(self.path, "rb") as f:
# Read the magic header
header = f.readline().decode("UTF-8")
assert self.HEADER and header[0:len(self.HEADER)] == self.HEADER, "Invalid header!"
channel = None
network = None
meta = None
portion = None
while True:
line = f.readline()
if not line:
if line.startswith(self.PORTIONHEADER.encode("UTF-8")):
assert portion is None and meta is None, "Started portion while already in portion?"
meta = json.loads(line.decode("UTF-8").split(" ", 1)[1].strip())
portion = b''
if not channel:
channel = meta["channel"]
if not network:
network = meta["network"]
assert channel == meta["channel"], "Portion does not match first portion's channel"
# assert network == meta["network"], "Portion does not match first portion's network"
elif line.startswith(self.ENDPORTIONHEADER.encode("UTF-8")):
assert portion is not None and meta is not None, "Ended portion while not in portion?"
self.data.append(VirtualLogFile(meta["name"], portion))
portion = None
meta = None
portion += line
assert portion is None and meta is None, "Unexpected EOF during open portion"
def write(self, target_path=None, raw=False):
Write the in-memory contents to disk. A log archive is a UTF-8 text file and is described below.
The files start with a header, containing only the channel name: #TODO number of portions + check on parsing
#$$$COMBINEDLOG '#chan'
Then sorted repeating units of:
#$$$BEGINPORTION {"channel": "#chan", "date": "20140119", "name": "#chan_20140119.log", "network": null}
newline-separated UTF-8 log messages
#$$$ENDPORTION #hcsmp_20140119.log
the metadata is json and must be sorted by key. network may be null but no other fields may be. date must be
formatted as above and name, the original file name, must match by irclogtools.logfile_pattern.
if not target_path:
target_path = self.path
channel = self.data[0].channel
print("{}: writing {}{} portions".format(target_path, len(self.data), " raw" if raw else ''))
with open(target_path, "wb") as f:
# Write the magic header
if not raw:
f.write("{} '{}'\n".format(self.HEADER, channel).encode("UTF-8"))
# Put portions in order
# Write each portion
for section in self.data:
if not raw:
meta = {"name": section.name,
"network": section.network,
"channel": section.channel,
"date": section.date.strftime("%Y%m%d"),
"lines": section.lines(),
"size": section.bytes()}
f.write("{} {}\n".format(self.PORTIONHEADER, json.dumps(meta, sort_keys=True)).encode("UTF-8"))
contents = section.contents()
if not raw:
if not contents.endswith(b"\n"):
f.write("{} {}\n".format(self.ENDPORTIONHEADER, section.name).encode("UTF-8"))
def sort(self):
self.data.sort(key=lambda x: x.date)
def add_section(self, section):
Add a portion (as a LogFile object) to the log file. If a portion with matching dates exists, it will
be replaced
for s in self.data:
assert section.channel == s.channel
if s.date == section.date:
def get_range(self):
Return (start, end) datetime tuple of sections
start = self.data[0].date
end = self.data[0].date
for item in self.data:
if item.date > end:
end = item.date
if item.date < start:
start = item.date
return (start, end, )
def limit(self, end=None, start=None):
Drop all portions newer than end or older than start
assert end or start, "Need an start, end, or both"
for item in self.data[:]:
if (end and item.date > end) or (start and item.date < start):
class LogFile(object):
def __init__(self, fname, root=None):
self.dir = root
self.name = fname
self.network = None
self.channel = None
self.date = None # datetime object for this channel
def _parse(self):
matches = logfile_pattern.match(self.name).groupdict()
self.network = matches["network"]
self.channel = matches["channel"]
date = matches["date"]
self.date = datetime.datetime.strptime(date, '%Y%m%d')
def contents(self):
Return log contents
with open(os.path.join(self.dir, self.name), "rb") as f:
return f.read()
def lines(self):
Return line count
lines = 0
with open(os.path.join(self.dir, self.name), "rb") as f:
for _ in f.readlines():
lines += 1
return lines
def bytes(self):
return os.path.getsize(os.path.join(self.dir, self.name))
def create(fname):
return LogFile(os.path.basename(fname), root=os.path.dirname(fname))
def __str__(self):
return "<__main__.LogFile '{}'>".format(self.name)
__repr__ = __str__
class VirtualLogFile(LogFile):
def __init__(self, fname, contents):
self.data = contents
def contents(self):
return self.data
def lines(self):
return len(self.data.split(b'\n'))
def bytes(self):
return len(self.data)