Support for Content-Type and different encodings, no longer trying to render non-text files

2021-03-25 20:34:16 +01:00 · 2021-03-25 20:34:16 +01:00 · 3f0b844aa9
parent 5e2f94678c
commit 3f0b844aa9
4 changed files with 61 additions and 3 deletions
--- a/BirdyNet.py
+++ b/BirdyNet.py
@ -185,7 +185,19 @@ class browserWindow(QMainWindow):
 		self.cStatusBar.showMessage("Rendering...")
 		self.cStatusBar.repaint()
 		start = time.time()
-		self.cDoc.cRenderHtml(response["body"].decode("utf-8",errors="ignore"))
+		
 		htm = response["body"]
 		contentType, contentTypeArguments = getContentType(response["headers"],"text")
 		if not "charset" in contentTypeArguments: contentTypeArguments["charset"] = "utf-8"
 		print("content-type: " +contentType+ "\n" +prettyJson(contentTypeArguments))
 		try:
 			htm = htm.decode(contentTypeArguments["charset"],errors="ignore")
 		except Exception as e:
 			print("decoding html as '" +contentTypeArguments["charset"]+ "' failed, trying utf-8...")
 			htm = htm.decode("utf-8",errors="ignore")
 		self.cDoc.cRenderHtml(htm,contentType)
 		end = time.time()
 		print("Rendering page: " +str(end - start))
 		self.cStatusBar.showMessage("Ready")
--- a/addons/0.documentViewer.QTextBrowser.py
+++ b/addons/0.documentViewer.QTextBrowser.py
@ -54,7 +54,13 @@ class browserDoc(QTextBrowser):
 			if curTag in self.blackList: return
 			self.output += html.escape(data)
-	def cRenderHtml(self,htm):
+	def cRenderHtml(self,htm,contentType):
 		if contentType != "text/html":
 			self.clear()
 			self.insertHtml("<html><body><pre>" +html.escape(htm)+ "</pre></body></html>")
 			self.update()
 			return
 		parser = self.cHtmlParser()
 		parser.feed(htm)
--- a/addons/0.pageDownloader.py
+++ b/addons/0.pageDownloader.py
@ -25,12 +25,21 @@ def downloadPage(window,downloadId,url,headers = False):
 	try:
 		requestHandler = opener.open(url)
 		# Check whether to transfer the page to the viewer, or if to open downloader
 		response["headers"] = requestHandler.getheaders()
 		contentType, contentTypeArguments = getContentType(response["headers"],"application/octet-stream")
 		if not contentType.startswith("text/"): # Make the fileDownloader handle the request instead
 			return
 		response["body"] = requestHandler.read()
 		requestHandler.close()
 	except urllib.error.HTTPError as e:
 		response["body"] = e.read()
 		requestHandler.close()
 	except Exception as e:
-		response["body"] = html.escape(str(e)).encode("utf-8")
+		response["body"] = str(e).encode("utf-8")
 		response["headers"] = [["content-type","text; charset=utf-8"]]
 	browserWindowsLock.acquire()
 	if not window in browserWindows:
--- a/addons/0.utils.py
+++ b/addons/0.utils.py
@ -102,6 +102,37 @@ def urlJoin(*args):
 	return outUrl
 global getContentType
 def getContentType(headers,fallback):
 	contentType = fallback
 	for header in headers:
 		if header[0].lower() == "content-type":
 			contentType = header[1].lower()
 	contentTypeSplit = contentType.split(";")
 	index = 0
 	length = len(contentTypeSplit)
 	while index < length:
 		s = contentTypeSplit[index]
 		while len(s) > 0 and s[0] == " ": s = s[1:]
 		while len(s) > 0 and s[-1] == " ": s = s[:-1]
 		contentTypeSplit[index] = s
 		index += 1
 	contentType = contentTypeSplit.pop(0)
 	contentTypeArguments = {}
 	for arg in contentTypeSplit:
 		argSplit = arg.split("=",1)
 		if len(argSplit) < 2:
 			argSplit.append("")
 		while len(argSplit[0]) > 0 and argSplit[0][0] == " ": argSplit[0] = argSplit[0][1:]
 		while len(argSplit[0]) > 0 and argSplit[0][-1] == " ": argSplit[0] = argSplit[0][:-1]
 		while len(argSplit[1]) > 0 and argSplit[1][0] == " ": argSplit[1] = argSplit[1][1:]
 		while len(argSplit[1]) > 0 and argSplit[1][-1] == " ": argSplit[1] = argSplit[1][:-1]
 		contentTypeArguments[argSplit[0]] = argSplit[1]
 	return contentType, contentTypeArguments
 global infoFetcher
 def infoFetcher(info):
 	''' if "Content-Base" in info["headers"]: