asciitools/parser.py
2017-06-25 13:06:16 -07:00

377 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
import re
from colored import fg, bg, attr, style
import os
from collections import defaultdict
IRC_BOLD = "\x02"
IRC_COLOR = "\x03"
IRC_ITALIC = "\x16"
IRC_UNDERLINE = "\x1f"
# See https://pypi.python.org/pypi/colored
WHITE = "white"
DARK_GREY = "grey_23"
BLUE = "blue"
GREEN = "green"
ORANGE = "light_red"
RED = "red"
PURPLE = "plum_4"
OFF_YELLOW = "gold_1"
YELLOW = "yellow"
LIGHT_GREEN = "light_green"
AQUAMARINE = "cyan"
CYAN = "light_cyan"
LIGHT_BLUE = "light_blue"
LIGHT_PURPLE = "light_magenta"
LIGHT_GREY = "dark_gray"
V_LIGHT_GREY = "grey_82"
ircolors = [WHITE, DARK_GREY, BLUE, GREEN, ORANGE, RED, PURPLE, OFF_YELLOW, YELLOW,
LIGHT_GREEN, AQUAMARINE, CYAN, LIGHT_BLUE, LIGHT_PURPLE, LIGHT_GREY, V_LIGHT_GREY]
TRANSFORM_RE = re.compile(r'(?P<start>[0-9]+)(\-(?P<end>[0-9]+))?(?P<tag>[a-z]+)')
def print_palette():
"""
Prints out color palette for debugging colors
"""
for i in range(0, len(ircolors)):
print("{reset}{num}: {code} {num} {num} {num} {num} {num} "
.format(reset=style.RESET, num=i, code=bg(ircolors[i])))
def load_file(path):
with open(path, "rb") as f:
all_lines = []
while True:
line = f.readline()
if not line:
break
try:
all_lines.append(line.decode("UTF-8").rstrip("\n"))
except UnicodeDecodeError:
print("Dropped line: {}".format(line))
return all_lines
def write_ascii(chatlines, output_dir):
if not os.path.exists(output_dir):
os.mkdir(output_dir)
items = os.listdir(output_dir)
max_item = 0
for item in items:
num = int(item)
if num > max_item:
max_item = num
max_item += 1
dest_file = os.path.join(output_dir, str(max_item))
with open(dest_file, "wb") as f:
for line in chatlines:
f.write(line.message.encode("UTF-8"))
f.write("\n".encode("UTF-8"))
return dest_file
class ChatLine(object):
"""docstring for ChatLine"""
line_format_re = re.compile(r'^(?P<timestamp>\[[0-9\:]+\]) ((<(?P<nick>[^>]+)>) (?P<message>.+)|(?P<other>.+))')
formatting_re = re.compile(r'([\x02\x03\x16\x1f])')
def __init__(self, line):
self._parts = []
# Was the line a privmsg or something else?
self.is_chat = False
# Was there formatting in the line?
self.has_format = False
# Sender nick (if chat)
self.nick = None
# Message only
self.message = None
self.original = line
def parse(self):
line = self._original
matches = ChatLine.line_format_re.match(line)
if not matches:
# Assume it's just a message ready to be formatted
self.message = line
self.parse_message_args(line)
else:
line_parts = matches.groupdict()
if line_parts["message"]:
line = line_parts["message"]
self.is_chat = True
self.nick = line_parts["nick"]
self.message = line_parts["message"]
self.parse_message_args(self.message)
def parse_message_args(self, message):
# Split the line on formatting sequences
matches = ChatLine.formatting_re.split(message)
# Anything before a format char isn't formatted
self._parts.append(matches.pop(0))
# Read each pair of parts.
# Each pair is a formatting sequence and the following text
part_index = 0
while part_index < len(matches):
seperator = matches[part_index]
part = matches[part_index + 1]
part_index += 2
if seperator == IRC_COLOR:
self.has_format = True
# Try to parse foreground, background colors
# Can look like:
# 9,9DATA
# 10,10DATA or 9,10DATA or 10,9DATA
# ,10DATA
# Neither number will look like
# 09, 00, 01, etc
colors = [None, None]
# Check if there is a number
if part and ChatLine.is_numeric(part[0]):
color_digits = part[0]
part = part[1:]
# Is there another number?
if part and ChatLine.is_numeric(part[0]):
color_digits_temp = color_digits + part[0]
# Are we breaking the "no 09 formatting" rule?
if int(color_digits_temp) <= 15:
color_digits = color_digits_temp
part = part[1:]
# We found a valid 1 or 2 digit color!
colors[0] = int(color_digits)
# Is there a comma followed by a number?
if len(part) >= 2 and part[0] == "," and ChatLine.is_numeric(part[1]):
color_digits = part[1]
part = part[2:]
# Is there a 2nd digit?
if part and ChatLine.is_numeric(part[0]):
color_digits_temp = color_digits + part[0]
# Are we breaking the "no 09 formatting" rule?
if int(color_digits_temp) <= 15:
color_digits = color_digits_temp
part = part[1:]
# We found a valid 1 or 2 digit color!
colors[1] = int(color_digits)
# Alter the foreground
if colors[0] is not None:
self._parts.append(fg(ircolors[colors[0]]))
# Alter the background
if colors[1] is not None:
self._parts.append(bg(ircolors[colors[1]]),)
# If no valid formatting was found, reset all styles
if colors[0] is None and colors[1] is None:
self._parts.append(style.RESET)
# if colors[0] is not None:
# print(colors[0], end='')
# if colors[1] is not None:
# print(",", end='')
# print(colors[1], end='')
# elif seperator == IRC_BOLD:
# self.has_format = True
# self._parts.append(attr("bold"))
# elif seperator == IRC_ITALIC:
# pass
# # ???
# elif seperator == IRC_UNDERLINE:
# self.has_format = True
# self._parts.append(attr("underlined"))
self._parts.append(part)
@property
def formatted(self):
return ''.join(self._parts)
def __str__(self):
return self.formatted
@property
def original(self):
return self._original
@original.setter
def original(self, original):
self._original = original
self.parse()
@classmethod
def is_numeric(self, char):
i = ord(char)
return i >= 48 and i <= 57
def parse_logfile(logfile, output_dir):
# print_palette()
all_lines = load_file(logfile)
# Maping of sender nick to lines sent/meta
# meta is a mapping of
# lastseen->lineno(int), line number the chatter was last seen
# lines->list, string lines of chat
# ref -> string if part of a group, act on this instead
class Ascii:
def __init__(self, lastseen, lines, ref):
self.lastseen = lastseen
self.lines = []
self.ref = ref
def __str__(self):
return "Ascii(lastseen={}, len(lines)={}, ref={})".format(self.lastseen, len(self.lines), self.ref)
watched_asciis = {}
def resolve(input_nic):
n = watched_asciis[input_nic]
if n.ref:
return resolve(n.ref)
return input_nic
def clean_refs(input_nic):
for nickslot_name, nickslot in [i for i in watched_asciis.items()]:
if nickslot.ref == input_nic:
del watched_asciis[nickslot_name]
for lineno in range(0, len(all_lines)):
line = ChatLine(all_lines[lineno])
# print(line.nick, line, style.RESET)
# Do EOF completes
for nickslot_name, nickslot in [i for i in watched_asciis.items()]:
if not nickslot.ref and lineno - nickslot.lastseen > 16:
print("EOF-Completed {}".format(nickslot_name))
for i in nickslot.lines:
print(i.nick, i, style.RESET)
write_ascii(nickslot.lines, output_dir)
del watched_asciis[nickslot_name]
clean_refs(nickslot_name)
if line.has_format: # TODO or smells like ascii art
nickslot_name = line.nick
nickslot = watched_asciis.get(nickslot_name, None)
if nickslot:
nickslot.lastseen = lineno
if nickslot.ref:
nickslot_name = nickslot.ref
nickslot = watched_asciis.get(nickslot_name)
nickslot.lastseen = lineno
nickslot.lines.append(line)
continue
# If:
# - any of the last 3 lines was formatted
# - I speak again with formatting in the next X lines,
# We assume multi-user ascii
last_3_lines = [ChatLine(i) for i in all_lines[max(lineno - 3, 0):lineno - 1]]
next_X_lines = [ChatLine(i) for i in all_lines[lineno + 1:lineno + 16]]
format_in_last = any([i.has_format for i in last_3_lines])
i_speak_again = any([i.nick == nickslot_name for i in next_X_lines])
if format_in_last and i_speak_again:
# Ref onto existing ascii
last_line = [i for i in last_3_lines if i.has_format][-1]
target = resolve(last_line.nick)
watched_asciis[nickslot_name] = Ascii(lineno, [], target)
watched_asciis[target].lines.append(line)
else:
# Start a new ascii
watched_asciis[nickslot_name] = Ascii(0, [], "")
watched_asciis[nickslot_name].lines.append(line)
watched_asciis[nickslot_name].lastseen = lineno
for nickslot_name, nickslot in watched_asciis.items():
print("EOL-Completed {}".format(nickslot_name))
for line in nickslot.lines:
print(line.nick, line, style.RESET)
write_ascii(nickslot.lines, output_dir)
def main():
import argparse
parser = argparse.ArgumentParser(description="IRC log ascii art exporter toolkit")
subparser_action = parser.add_subparsers(dest='action', help='action to take')
parser_parse = subparser_action.add_parser('parse', help='Extract asciis from logs')
parser_parse.add_argument('-o', '--output-dir', help="Output directory", default="./asciis")
parser_parse.add_argument('files', nargs="+", help="Input log files")
parser_insp = subparser_action.add_parser('inspect', help='View files with fomratting')
parser_insp.add_argument("-l", "--lines", action="store_true", help="Show line numbers")
parser_insp.add_argument('file', nargs=1, help="Input file")
parser_split = subparser_action.add_parser('split', help='Split asciis into multiple')
parser_split.add_argument('file', nargs=1, help="Input file")
parser_split.add_argument("-p", "--preview", action="store_true", help="Don't write anything")
parser_split.add_argument('-o', '--output-dir', help="Output directory", default="./asciis")
parser_split.add_argument("-t", "--sections", nargs="+", help="sections to modify", required=True)
# TODO write docs for --sections
# Format is:
# 0a associate line 0 with group a
# 10-20a associate lines 10-20, inclusive, with group a
args = parser.parse_args()
if args.action == "parse":
for item in args.files:
parse_logfile(item, args.output_dir)
elif args.action == "inspect":
lineno = 0
for line in load_file(args.file[0]):
if args.lines:
print("{}\t".format(lineno), end='')
print(ChatLine(line).formatted + style.RESET)
lineno += 1
elif args.action == "split":
lines = [[None, line] for line in load_file(args.file[0])]
for t in args.sections:
spec = TRANSFORM_RE.match(t).groupdict()
start = int(spec["start"])
end = int(spec["end"]) if spec["end"] else start
for i in range(start, end + 1):
lines[i][0] = spec["tag"]
grouped = defaultdict(list)
for tag, line in lines:
grouped[tag].append(line)
if None in grouped.keys():
raise Exception("Not all lines grouped")
for tag, lines in grouped.items():
print("Tag {}:".format(tag))
for line in lines:
print(ChatLine(line).formatted + style.RESET)
if not args.preview:
new_file = write_ascii([ChatLine(i) for i in lines], args.output_dir)
print("Wrote {}".format(new_file))
print("\n\n")
if not args.preview:
os.unlink(args.file[0])
if __name__ == '__main__':
main()