BirdyNet/addons/0.pageDownloader.py

36 lines
873 B
Python
Raw Normal View History

2021-03-18 15:22:56 +00:00
global urllib
import urllib.request
2021-03-20 15:41:10 +00:00
global html
import html
2021-03-18 15:22:56 +00:00
global downloadPage
def downloadPage(url,headers = False):
if not headers: headers = {}
2021-03-20 15:41:10 +00:00
response = {
"url": url,
"body": None,
"headers": []
}
2021-03-20 16:23:38 +00:00
class rdrh(urllib.request.HTTPRedirectHandler):
def redirect_request(self,req,fp,code,msg,hdrs,newurl):
response["url"] = newurl
2021-03-20 16:57:46 +00:00
try:
return urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl)
except urllib.error.HTTPError as e:
return e
2021-03-20 16:23:38 +00:00
opener = urllib.request.build_opener(rdrh)
2021-03-20 17:54:23 +00:00
opener.addheaders = headers
2021-03-20 16:23:38 +00:00
2021-03-18 15:22:56 +00:00
try:
2021-03-20 16:57:46 +00:00
requestHandler = opener.open(url)
2021-03-20 15:41:10 +00:00
response["headers"] = requestHandler.getheaders()
response["body"] = requestHandler.read()
2021-03-18 15:22:56 +00:00
except urllib.error.HTTPError as e:
2021-03-20 15:41:10 +00:00
response["body"] = e.read()
except Exception as e:
response["body"] = html.escape(e)
2021-03-18 15:22:56 +00:00
2021-03-20 15:41:10 +00:00
return response