Make LinkTitler a little safer

This commit is contained in:
dave 2015-10-31 15:12:22 -07:00
parent 80d99a8cb0
commit 2643883a88
2 changed files with 46 additions and 9 deletions

View File

@ -114,8 +114,9 @@ class IRCCore(asynchat.async_chat):
socket_type = socket.AF_INET6 socket_type = socket.AF_INET6
socketInfo = socket.getaddrinfo(self.server, self.port, socket_type) socketInfo = socket.getaddrinfo(self.server, self.port, socket_type)
self.create_socket(socket_type, socket.SOCK_STREAM) self.create_socket(socket_type, socket.SOCK_STREAM)
self.log.debug("Socket created")
self.connect(socketInfo[0][4]) self.connect(socketInfo[0][4])
self.log.debug("Connection established")
self.asynmap[self._fileno] = self # http://willpython.blogspot.com/2010/08/multiple-event-loops-with-asyncore-and.html self.asynmap[self._fileno] = self # http://willpython.blogspot.com/2010/08/multiple-event-loops-with-asyncore-and.html
def handle_connect(self): def handle_connect(self):

View File

@ -13,13 +13,14 @@ import re
import time import time
import praw #TODO: enable/disable modules import praw #TODO: enable/disable modules
import datetime import datetime
from requests import get from requests import get,head
import html.parser import html.parser
from threading import Thread from threading import Thread
class LinkTitler(ModuleBase): class LinkTitler(ModuleBase):
def __init__(self, bot, moduleName): def __init__(self, bot, moduleName):
ModuleBase.__init__(self, bot, moduleName); ModuleBase.__init__(self, bot, moduleName)
self.REQUEST_SIZE_LIMIT = 10*1024
self.hooks=[ModuleHook("PRIVMSG", self.searches)] self.hooks=[ModuleHook("PRIVMSG", self.searches)]
def searches(self, args, prefix, trailing): def searches(self, args, prefix, trailing):
@ -83,15 +84,50 @@ class LinkTitler(ModuleBase):
if match[0] in done: if match[0] in done:
continue continue
done.append(match[0]) done.append(match[0])
d = get(match[0])
titleMatches = re.findall(r'<title>([^<]+)</title>', d.text, re.I) headers = self.url_headers(match[0])
if len(titleMatches)>0 and d.status_code==200:
h = html.parser.HTMLParser() # Don't mess with unknown content types
title = h.unescape(titleMatches[0]).strip() if not "Content-Type" in headers:
if len(title)>0: continue
if "text/html" in headers["Content-Type"]:
# Fetch HTML title
title = self.url_htmltitle(match[0])
if title:
self.bot.act_PRIVMSG(args[0], "%s: \x02%s\x02" % (sender.nick, title)) self.bot.act_PRIVMSG(args[0], "%s: \x02%s\x02" % (sender.nick, title))
if "image/" in headers["Content-Type"]:
self.bot.act_PRIVMSG(args[0], "%s: \x02%s\x02, %s" % (sender.nick, headers["Content-Type"], str(int(int(headers["Content-Length"])/1024))+"KB" if "Content-Length" in headers else "unknown size"))
return return
def url_headers(self, url):
"HEAD requests a url to check content type & length, returns something like: {'type': 'image/jpeg', 'size': '90583'}"
self.log.debug("url_headers(%s)" % (url,))
resp = head(url=url, allow_redirects=True)
return resp.headers
def url_htmltitle(self, url):
"Requests page html and returns title in a safe way"
self.log.debug("url_htmltitle(%s)" % (url,))
resp = get(url=url, stream=True)
# Fetch no more than first 10kb
# if the title isn't seen by then, you're doing it wrong
data = ""
for chunk in resp.iter_content(1024):
data += str(chunk)
if len(data) > self.REQUEST_SIZE_LIMIT:
break
titleMatches = re.findall(r'<title>([^<]+)</title>', data, re.I)
if len(titleMatches)>0 and resp.status_code==200:
h = html.parser.HTMLParser()
title = h.unescape(titleMatches[0]).strip()
if len(title)>0:
return title
return None
# For youtube # For youtube
def getISOdurationseconds(self, stamp): def getISOdurationseconds(self, stamp):
ISO_8601_period_rx = re.compile( ISO_8601_period_rx = re.compile(