Support for Content-Type and different encodings, no longer trying to render non-text files
This commit is contained in:
parent
5e2f94678c
commit
3f0b844aa9
14
BirdyNet.py
14
BirdyNet.py
|
@ -185,7 +185,19 @@ class browserWindow(QMainWindow):
|
||||||
self.cStatusBar.showMessage("Rendering...")
|
self.cStatusBar.showMessage("Rendering...")
|
||||||
self.cStatusBar.repaint()
|
self.cStatusBar.repaint()
|
||||||
start = time.time()
|
start = time.time()
|
||||||
self.cDoc.cRenderHtml(response["body"].decode("utf-8",errors="ignore"))
|
|
||||||
|
htm = response["body"]
|
||||||
|
contentType, contentTypeArguments = getContentType(response["headers"],"text")
|
||||||
|
if not "charset" in contentTypeArguments: contentTypeArguments["charset"] = "utf-8"
|
||||||
|
print("content-type: " +contentType+ "\n" +prettyJson(contentTypeArguments))
|
||||||
|
|
||||||
|
try:
|
||||||
|
htm = htm.decode(contentTypeArguments["charset"],errors="ignore")
|
||||||
|
except Exception as e:
|
||||||
|
print("decoding html as '" +contentTypeArguments["charset"]+ "' failed, trying utf-8...")
|
||||||
|
htm = htm.decode("utf-8",errors="ignore")
|
||||||
|
|
||||||
|
self.cDoc.cRenderHtml(htm,contentType)
|
||||||
end = time.time()
|
end = time.time()
|
||||||
print("Rendering page: " +str(end - start))
|
print("Rendering page: " +str(end - start))
|
||||||
self.cStatusBar.showMessage("Ready")
|
self.cStatusBar.showMessage("Ready")
|
||||||
|
|
|
@ -54,7 +54,13 @@ class browserDoc(QTextBrowser):
|
||||||
if curTag in self.blackList: return
|
if curTag in self.blackList: return
|
||||||
self.output += html.escape(data)
|
self.output += html.escape(data)
|
||||||
|
|
||||||
def cRenderHtml(self,htm):
|
def cRenderHtml(self,htm,contentType):
|
||||||
|
if contentType != "text/html":
|
||||||
|
self.clear()
|
||||||
|
self.insertHtml("<html><body><pre>" +html.escape(htm)+ "</pre></body></html>")
|
||||||
|
self.update()
|
||||||
|
return
|
||||||
|
|
||||||
parser = self.cHtmlParser()
|
parser = self.cHtmlParser()
|
||||||
parser.feed(htm)
|
parser.feed(htm)
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,21 @@ def downloadPage(window,downloadId,url,headers = False):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
requestHandler = opener.open(url)
|
requestHandler = opener.open(url)
|
||||||
|
|
||||||
|
# Check whether to transfer the page to the viewer, or if to open downloader
|
||||||
response["headers"] = requestHandler.getheaders()
|
response["headers"] = requestHandler.getheaders()
|
||||||
|
contentType, contentTypeArguments = getContentType(response["headers"],"application/octet-stream")
|
||||||
|
if not contentType.startswith("text/"): # Make the fileDownloader handle the request instead
|
||||||
|
return
|
||||||
|
|
||||||
response["body"] = requestHandler.read()
|
response["body"] = requestHandler.read()
|
||||||
|
requestHandler.close()
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
response["body"] = e.read()
|
response["body"] = e.read()
|
||||||
|
requestHandler.close()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
response["body"] = html.escape(str(e)).encode("utf-8")
|
response["body"] = str(e).encode("utf-8")
|
||||||
|
response["headers"] = [["content-type","text; charset=utf-8"]]
|
||||||
|
|
||||||
browserWindowsLock.acquire()
|
browserWindowsLock.acquire()
|
||||||
if not window in browserWindows:
|
if not window in browserWindows:
|
||||||
|
|
|
@ -102,6 +102,37 @@ def urlJoin(*args):
|
||||||
|
|
||||||
return outUrl
|
return outUrl
|
||||||
|
|
||||||
|
global getContentType
|
||||||
|
def getContentType(headers,fallback):
|
||||||
|
contentType = fallback
|
||||||
|
for header in headers:
|
||||||
|
if header[0].lower() == "content-type":
|
||||||
|
contentType = header[1].lower()
|
||||||
|
|
||||||
|
contentTypeSplit = contentType.split(";")
|
||||||
|
index = 0
|
||||||
|
length = len(contentTypeSplit)
|
||||||
|
while index < length:
|
||||||
|
s = contentTypeSplit[index]
|
||||||
|
while len(s) > 0 and s[0] == " ": s = s[1:]
|
||||||
|
while len(s) > 0 and s[-1] == " ": s = s[:-1]
|
||||||
|
contentTypeSplit[index] = s
|
||||||
|
index += 1
|
||||||
|
|
||||||
|
contentType = contentTypeSplit.pop(0)
|
||||||
|
contentTypeArguments = {}
|
||||||
|
for arg in contentTypeSplit:
|
||||||
|
argSplit = arg.split("=",1)
|
||||||
|
if len(argSplit) < 2:
|
||||||
|
argSplit.append("")
|
||||||
|
while len(argSplit[0]) > 0 and argSplit[0][0] == " ": argSplit[0] = argSplit[0][1:]
|
||||||
|
while len(argSplit[0]) > 0 and argSplit[0][-1] == " ": argSplit[0] = argSplit[0][:-1]
|
||||||
|
while len(argSplit[1]) > 0 and argSplit[1][0] == " ": argSplit[1] = argSplit[1][1:]
|
||||||
|
while len(argSplit[1]) > 0 and argSplit[1][-1] == " ": argSplit[1] = argSplit[1][:-1]
|
||||||
|
contentTypeArguments[argSplit[0]] = argSplit[1]
|
||||||
|
|
||||||
|
return contentType, contentTypeArguments
|
||||||
|
|
||||||
global infoFetcher
|
global infoFetcher
|
||||||
def infoFetcher(info):
|
def infoFetcher(info):
|
||||||
''' if "Content-Base" in info["headers"]:
|
''' if "Content-Base" in info["headers"]:
|
||||||
|
|
Loading…
Reference in New Issue