Support for Content-Type and different encodings, no longer trying to render non-text files
This commit is contained in:
parent
5e2f94678c
commit
3f0b844aa9
14
BirdyNet.py
14
BirdyNet.py
@ -185,7 +185,19 @@ class browserWindow(QMainWindow):
|
||||
self.cStatusBar.showMessage("Rendering...")
|
||||
self.cStatusBar.repaint()
|
||||
start = time.time()
|
||||
self.cDoc.cRenderHtml(response["body"].decode("utf-8",errors="ignore"))
|
||||
|
||||
htm = response["body"]
|
||||
contentType, contentTypeArguments = getContentType(response["headers"],"text")
|
||||
if not "charset" in contentTypeArguments: contentTypeArguments["charset"] = "utf-8"
|
||||
print("content-type: " +contentType+ "\n" +prettyJson(contentTypeArguments))
|
||||
|
||||
try:
|
||||
htm = htm.decode(contentTypeArguments["charset"],errors="ignore")
|
||||
except Exception as e:
|
||||
print("decoding html as '" +contentTypeArguments["charset"]+ "' failed, trying utf-8...")
|
||||
htm = htm.decode("utf-8",errors="ignore")
|
||||
|
||||
self.cDoc.cRenderHtml(htm,contentType)
|
||||
end = time.time()
|
||||
print("Rendering page: " +str(end - start))
|
||||
self.cStatusBar.showMessage("Ready")
|
||||
|
@ -54,7 +54,13 @@ class browserDoc(QTextBrowser):
|
||||
if curTag in self.blackList: return
|
||||
self.output += html.escape(data)
|
||||
|
||||
def cRenderHtml(self,htm):
|
||||
def cRenderHtml(self,htm,contentType):
|
||||
if contentType != "text/html":
|
||||
self.clear()
|
||||
self.insertHtml("<html><body><pre>" +html.escape(htm)+ "</pre></body></html>")
|
||||
self.update()
|
||||
return
|
||||
|
||||
parser = self.cHtmlParser()
|
||||
parser.feed(htm)
|
||||
|
||||
|
@ -25,12 +25,21 @@ def downloadPage(window,downloadId,url,headers = False):
|
||||
|
||||
try:
|
||||
requestHandler = opener.open(url)
|
||||
|
||||
# Check whether to transfer the page to the viewer, or if to open downloader
|
||||
response["headers"] = requestHandler.getheaders()
|
||||
contentType, contentTypeArguments = getContentType(response["headers"],"application/octet-stream")
|
||||
if not contentType.startswith("text/"): # Make the fileDownloader handle the request instead
|
||||
return
|
||||
|
||||
response["body"] = requestHandler.read()
|
||||
requestHandler.close()
|
||||
except urllib.error.HTTPError as e:
|
||||
response["body"] = e.read()
|
||||
requestHandler.close()
|
||||
except Exception as e:
|
||||
response["body"] = html.escape(str(e)).encode("utf-8")
|
||||
response["body"] = str(e).encode("utf-8")
|
||||
response["headers"] = [["content-type","text; charset=utf-8"]]
|
||||
|
||||
browserWindowsLock.acquire()
|
||||
if not window in browserWindows:
|
||||
|
@ -102,6 +102,37 @@ def urlJoin(*args):
|
||||
|
||||
return outUrl
|
||||
|
||||
global getContentType
|
||||
def getContentType(headers,fallback):
|
||||
contentType = fallback
|
||||
for header in headers:
|
||||
if header[0].lower() == "content-type":
|
||||
contentType = header[1].lower()
|
||||
|
||||
contentTypeSplit = contentType.split(";")
|
||||
index = 0
|
||||
length = len(contentTypeSplit)
|
||||
while index < length:
|
||||
s = contentTypeSplit[index]
|
||||
while len(s) > 0 and s[0] == " ": s = s[1:]
|
||||
while len(s) > 0 and s[-1] == " ": s = s[:-1]
|
||||
contentTypeSplit[index] = s
|
||||
index += 1
|
||||
|
||||
contentType = contentTypeSplit.pop(0)
|
||||
contentTypeArguments = {}
|
||||
for arg in contentTypeSplit:
|
||||
argSplit = arg.split("=",1)
|
||||
if len(argSplit) < 2:
|
||||
argSplit.append("")
|
||||
while len(argSplit[0]) > 0 and argSplit[0][0] == " ": argSplit[0] = argSplit[0][1:]
|
||||
while len(argSplit[0]) > 0 and argSplit[0][-1] == " ": argSplit[0] = argSplit[0][:-1]
|
||||
while len(argSplit[1]) > 0 and argSplit[1][0] == " ": argSplit[1] = argSplit[1][1:]
|
||||
while len(argSplit[1]) > 0 and argSplit[1][-1] == " ": argSplit[1] = argSplit[1][:-1]
|
||||
contentTypeArguments[argSplit[0]] = argSplit[1]
|
||||
|
||||
return contentType, contentTypeArguments
|
||||
|
||||
global infoFetcher
|
||||
def infoFetcher(info):
|
||||
''' if "Content-Base" in info["headers"]:
|
||||
|
Loading…
Reference in New Issue
Block a user