Handle content properly
This commit is contained in:
parent
b07a6202b4
commit
bdf0081ba8
|
@ -121,12 +121,14 @@ class LinkTitler(ModuleBase):
|
||||||
resp = get(url=url, stream=True)
|
resp = get(url=url, stream=True)
|
||||||
# Fetch no more than first 10kb
|
# Fetch no more than first 10kb
|
||||||
# if the title isn't seen by then, you're doing it wrong
|
# if the title isn't seen by then, you're doing it wrong
|
||||||
data = ""
|
data = b""
|
||||||
for chunk in resp.iter_content(1024):
|
for chunk in resp.iter_content(1024):
|
||||||
data += str(chunk)
|
data += chunk
|
||||||
if len(data) > self.REQUEST_SIZE_LIMIT:
|
if len(data) > self.REQUEST_SIZE_LIMIT:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
data = data.decode('utf-8', "ignore")
|
||||||
|
|
||||||
titleMatches = re.findall(r'<title>([^<]+)</title>', data, re.I)
|
titleMatches = re.findall(r'<title>([^<]+)</title>', data, re.I)
|
||||||
if len(titleMatches)>0:# and resp.status_code==200:
|
if len(titleMatches)>0:# and resp.status_code==200:
|
||||||
h = html.parser.HTMLParser()
|
h = html.parser.HTMLParser()
|
||||||
|
|
Loading…
Reference in New Issue