From bdf0081ba84b665e799d47320c94ca4f7449cac4 Mon Sep 17 00:00:00 2001 From: dave Date: Sat, 31 Oct 2015 16:55:33 -0700 Subject: [PATCH] Handle content properly --- pyircbot/modules/LinkTitler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyircbot/modules/LinkTitler.py b/pyircbot/modules/LinkTitler.py index 670ea3a..c994356 100755 --- a/pyircbot/modules/LinkTitler.py +++ b/pyircbot/modules/LinkTitler.py @@ -121,12 +121,14 @@ class LinkTitler(ModuleBase): resp = get(url=url, stream=True) # Fetch no more than first 10kb # if the title isn't seen by then, you're doing it wrong - data = "" + data = b"" for chunk in resp.iter_content(1024): - data += str(chunk) + data += chunk if len(data) > self.REQUEST_SIZE_LIMIT: break + data = data.decode('utf-8', "ignore") + titleMatches = re.findall(r'([^<]+)', data, re.I) if len(titleMatches)>0:# and resp.status_code==200: h = html.parser.HTMLParser()