pageDownloader overhaul
This commit is contained in:
parent
b7158c6a07
commit
09e9adc3e7
@ -1,27 +1,26 @@
|
|||||||
global urllib
|
global urllib
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
|
global html
|
||||||
|
import html
|
||||||
|
|
||||||
global downloadPage
|
global downloadPage
|
||||||
def downloadPage(url,headers = False):
|
def downloadPage(url,headers = False):
|
||||||
if not headers: headers = {}
|
if not headers: headers = {}
|
||||||
request = urllib.request.Request(url,headers=headers)
|
response = {
|
||||||
response = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = urllib.request.urlopen(request)
|
|
||||||
except urllib.error.HTTPError as e:
|
|
||||||
response = e
|
|
||||||
|
|
||||||
# process headers
|
|
||||||
headers = response.getheaders()
|
|
||||||
headersOut = {}
|
|
||||||
for hl in headers:
|
|
||||||
headersOut[hl[0]] = hl[1]
|
|
||||||
|
|
||||||
out = {
|
|
||||||
"url": url,
|
"url": url,
|
||||||
"headers": headersOut,
|
"body": None,
|
||||||
"body": response.read()
|
"headers": []
|
||||||
}
|
}
|
||||||
|
request = urllib.request.Request(url,headers=headers)
|
||||||
|
requestHandler = None
|
||||||
|
try:
|
||||||
|
requestHandler = urllib.request.urlopen(request)
|
||||||
|
response["headers"] = requestHandler.getheaders()
|
||||||
|
response["body"] = requestHandler.read()
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
response["body"] = e.read()
|
||||||
|
except Exception as e:
|
||||||
|
response["body"] = html.escape(e)
|
||||||
|
|
||||||
return out
|
return response
|
Loading…
Reference in New Issue
Block a user