Support for Content-Type and different encodings, no longer trying to render non-text files

This commit is contained in:
Fierelier 2021-03-25 20:34:16 +01:00
parent 5e2f94678c
commit 3f0b844aa9
4 changed files with 61 additions and 3 deletions

View File

@ -185,7 +185,19 @@ class browserWindow(QMainWindow):
self.cStatusBar.showMessage("Rendering...")
self.cStatusBar.repaint()
start = time.time()
self.cDoc.cRenderHtml(response["body"].decode("utf-8",errors="ignore"))
htm = response["body"]
contentType, contentTypeArguments = getContentType(response["headers"],"text")
if not "charset" in contentTypeArguments: contentTypeArguments["charset"] = "utf-8"
print("content-type: " +contentType+ "\n" +prettyJson(contentTypeArguments))
try:
htm = htm.decode(contentTypeArguments["charset"],errors="ignore")
except Exception as e:
print("decoding html as '" +contentTypeArguments["charset"]+ "' failed, trying utf-8...")
htm = htm.decode("utf-8",errors="ignore")
self.cDoc.cRenderHtml(htm,contentType)
end = time.time()
print("Rendering page: " +str(end - start))
self.cStatusBar.showMessage("Ready")

View File

@ -54,7 +54,13 @@ class browserDoc(QTextBrowser):
if curTag in self.blackList: return
self.output += html.escape(data)
def cRenderHtml(self,htm):
def cRenderHtml(self,htm,contentType):
if contentType != "text/html":
self.clear()
self.insertHtml("<html><body><pre>" +html.escape(htm)+ "</pre></body></html>")
self.update()
return
parser = self.cHtmlParser()
parser.feed(htm)

View File

@ -25,12 +25,21 @@ def downloadPage(window,downloadId,url,headers = False):
try:
requestHandler = opener.open(url)
# Check whether to transfer the page to the viewer, or if to open downloader
response["headers"] = requestHandler.getheaders()
contentType, contentTypeArguments = getContentType(response["headers"],"application/octet-stream")
if not contentType.startswith("text/"): # Make the fileDownloader handle the request instead
return
response["body"] = requestHandler.read()
requestHandler.close()
except urllib.error.HTTPError as e:
response["body"] = e.read()
requestHandler.close()
except Exception as e:
response["body"] = html.escape(str(e)).encode("utf-8")
response["body"] = str(e).encode("utf-8")
response["headers"] = [["content-type","text; charset=utf-8"]]
browserWindowsLock.acquire()
if not window in browserWindows:

View File

@ -102,6 +102,37 @@ def urlJoin(*args):
return outUrl
global getContentType
def getContentType(headers,fallback):
contentType = fallback
for header in headers:
if header[0].lower() == "content-type":
contentType = header[1].lower()
contentTypeSplit = contentType.split(";")
index = 0
length = len(contentTypeSplit)
while index < length:
s = contentTypeSplit[index]
while len(s) > 0 and s[0] == " ": s = s[1:]
while len(s) > 0 and s[-1] == " ": s = s[:-1]
contentTypeSplit[index] = s
index += 1
contentType = contentTypeSplit.pop(0)
contentTypeArguments = {}
for arg in contentTypeSplit:
argSplit = arg.split("=",1)
if len(argSplit) < 2:
argSplit.append("")
while len(argSplit[0]) > 0 and argSplit[0][0] == " ": argSplit[0] = argSplit[0][1:]
while len(argSplit[0]) > 0 and argSplit[0][-1] == " ": argSplit[0] = argSplit[0][:-1]
while len(argSplit[1]) > 0 and argSplit[1][0] == " ": argSplit[1] = argSplit[1][1:]
while len(argSplit[1]) > 0 and argSplit[1][-1] == " ": argSplit[1] = argSplit[1][:-1]
contentTypeArguments[argSplit[0]] = argSplit[1]
return contentType, contentTypeArguments
global infoFetcher
def infoFetcher(info):
''' if "Content-Base" in info["headers"]: