From 09e9adc3e751b3b6f056da2ffeb2754a0e28c6c7 Mon Sep 17 00:00:00 2001 From: Fierelier Date: Sat, 20 Mar 2021 16:41:10 +0100 Subject: [PATCH] pageDownloader overhaul --- addons/0.pageDownloader.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/addons/0.pageDownloader.py b/addons/0.pageDownloader.py index 762397f..110061a 100644 --- a/addons/0.pageDownloader.py +++ b/addons/0.pageDownloader.py @@ -1,27 +1,26 @@ global urllib import urllib.request +global html +import html + global downloadPage def downloadPage(url,headers = False): if not headers: headers = {} - request = urllib.request.Request(url,headers=headers) - response = None - - try: - response = urllib.request.urlopen(request) - except urllib.error.HTTPError as e: - response = e - - # process headers - headers = response.getheaders() - headersOut = {} - for hl in headers: - headersOut[hl[0]] = hl[1] - - out = { + response = { "url": url, - "headers": headersOut, - "body": response.read() + "body": None, + "headers": [] } + request = urllib.request.Request(url,headers=headers) + requestHandler = None + try: + requestHandler = urllib.request.urlopen(request) + response["headers"] = requestHandler.getheaders() + response["body"] = requestHandler.read() + except urllib.error.HTTPError as e: + response["body"] = e.read() + except Exception as e: + response["body"] = html.escape(e) - return out \ No newline at end of file + return response \ No newline at end of file