Make LinkTitler a little safer
This commit is contained in:
parent
80d99a8cb0
commit
2643883a88
|
@ -114,8 +114,9 @@ class IRCCore(asynchat.async_chat):
|
|||
socket_type = socket.AF_INET6
|
||||
socketInfo = socket.getaddrinfo(self.server, self.port, socket_type)
|
||||
self.create_socket(socket_type, socket.SOCK_STREAM)
|
||||
|
||||
self.log.debug("Socket created")
|
||||
self.connect(socketInfo[0][4])
|
||||
self.log.debug("Connection established")
|
||||
self.asynmap[self._fileno] = self # http://willpython.blogspot.com/2010/08/multiple-event-loops-with-asyncore-and.html
|
||||
|
||||
def handle_connect(self):
|
||||
|
|
|
@ -13,13 +13,14 @@ import re
|
|||
import time
|
||||
import praw #TODO: enable/disable modules
|
||||
import datetime
|
||||
from requests import get
|
||||
from requests import get,head
|
||||
import html.parser
|
||||
from threading import Thread
|
||||
|
||||
class LinkTitler(ModuleBase):
|
||||
def __init__(self, bot, moduleName):
|
||||
ModuleBase.__init__(self, bot, moduleName);
|
||||
ModuleBase.__init__(self, bot, moduleName)
|
||||
self.REQUEST_SIZE_LIMIT = 10*1024
|
||||
self.hooks=[ModuleHook("PRIVMSG", self.searches)]
|
||||
|
||||
def searches(self, args, prefix, trailing):
|
||||
|
@ -83,14 +84,49 @@ class LinkTitler(ModuleBase):
|
|||
if match[0] in done:
|
||||
continue
|
||||
done.append(match[0])
|
||||
d = get(match[0])
|
||||
titleMatches = re.findall(r'<title>([^<]+)</title>', d.text, re.I)
|
||||
if len(titleMatches)>0 and d.status_code==200:
|
||||
|
||||
headers = self.url_headers(match[0])
|
||||
|
||||
# Don't mess with unknown content types
|
||||
if not "Content-Type" in headers:
|
||||
continue
|
||||
|
||||
if "text/html" in headers["Content-Type"]:
|
||||
# Fetch HTML title
|
||||
title = self.url_htmltitle(match[0])
|
||||
if title:
|
||||
self.bot.act_PRIVMSG(args[0], "%s: \x02%s\x02" % (sender.nick, title))
|
||||
|
||||
if "image/" in headers["Content-Type"]:
|
||||
self.bot.act_PRIVMSG(args[0], "%s: \x02%s\x02, %s" % (sender.nick, headers["Content-Type"], str(int(int(headers["Content-Length"])/1024))+"KB" if "Content-Length" in headers else "unknown size"))
|
||||
|
||||
return
|
||||
|
||||
def url_headers(self, url):
|
||||
"HEAD requests a url to check content type & length, returns something like: {'type': 'image/jpeg', 'size': '90583'}"
|
||||
self.log.debug("url_headers(%s)" % (url,))
|
||||
resp = head(url=url, allow_redirects=True)
|
||||
return resp.headers
|
||||
|
||||
def url_htmltitle(self, url):
|
||||
"Requests page html and returns title in a safe way"
|
||||
self.log.debug("url_htmltitle(%s)" % (url,))
|
||||
resp = get(url=url, stream=True)
|
||||
# Fetch no more than first 10kb
|
||||
# if the title isn't seen by then, you're doing it wrong
|
||||
data = ""
|
||||
for chunk in resp.iter_content(1024):
|
||||
data += str(chunk)
|
||||
if len(data) > self.REQUEST_SIZE_LIMIT:
|
||||
break
|
||||
|
||||
titleMatches = re.findall(r'<title>([^<]+)</title>', data, re.I)
|
||||
if len(titleMatches)>0 and resp.status_code==200:
|
||||
h = html.parser.HTMLParser()
|
||||
title = h.unescape(titleMatches[0]).strip()
|
||||
if len(title)>0:
|
||||
self.bot.act_PRIVMSG(args[0], "%s: \x02%s\x02" % (sender.nick, title))
|
||||
return
|
||||
return title
|
||||
return None
|
||||
|
||||
# For youtube
|
||||
def getISOdurationseconds(self, stamp):
|
||||
|
|
Loading…
Reference in New Issue