Compare commits

...

2 Commits

Author SHA1 Message Date
Fierelier a6bb0964cd "Proper" HTML parsing for whitelists and interpretation 2021-03-19 19:14:02 +01:00
Fierelier ad811cc9fc Add status bar messages that don't even work yet 2021-03-19 19:12:28 +01:00
2 changed files with 28 additions and 21 deletions

View File

@ -99,6 +99,7 @@ class browserWindow(QMainWindow):
self.cDoc = browserDoc(self)
self.cStatusBar = QStatusBar(self)
self.cStatusBar.showMessage("Please wait...")
self.cResizeElements()
self.show()
@ -117,7 +118,6 @@ class browserWindow(QMainWindow):
self.cStatusBar.move(0,self.cHeight - mb)
self.cStatusBar.resize(self.cWidth,mb)
self.cStatusBar.showMessage("Status")
def resizeEvent(self,event):
self.cWidth = self.width()
@ -127,6 +127,8 @@ class browserWindow(QMainWindow):
def cNavigate(self,event = None):
try:
#print(prettyJson(parseUrl(self.cUrlBar.text())))
self.cStatusBar.showMessage("Downloading...")
self.update()
start = time.time()
response = downloadPage(self.cUrlBar.text(),{"User-Agent": self.cUserAgent})
end = time.time()
@ -135,10 +137,14 @@ class browserWindow(QMainWindow):
infoFetcher(response)
self.cDocumentInfo = response
self.cStatusBar.showMessage("Rendering...")
self.update()
start = time.time()
self.cDoc.cRenderHtml(response["body"].decode("utf-8",errors="ignore"))
end = time.time()
print("Rendering page: " +str(end - start))
self.cStatusBar.showMessage("Ready")
self.update()
#print(prettyJson(response["headers"]))
except Exception as e:
self.cDoc.cRenderHtml(str(e))

View File

@ -23,37 +23,38 @@ class browserDoc(QTextBrowser):
def __init__(self):
HTMLParser.__init__(self)
self.output = ""
self.voidElements = ["area","base","br","col","hr","img","input","link","meta","param","command","keygen","source"]
self.blackList = ["img","script","style"]
self.tagDir = []
def handle_starttag(self,tag,attrs):
if tag == "img":
altText = False
for attr in attrs:
if attr[0] == "alt":
altText = html.escape(attr[1])
break
for attr in attrs:
if attr[0] == "src":
url = attr[1]
if not altText: altText = url.rsplit("/")[-1]
self.output += 'img:<a href="' +html.escape(url)+ '">' +html.escape(altText)+ '</a>'
return
return
if not tag in self.voidElements:
self.tagDir.append(tag)
self.output += "<" +tag
if tag in self.blackList: return
self.output += "<" +html.escape(tag)
for attr in attrs:
self.output += " " +html.escape(attr[0])
if attr[1] != None:
self.output += ' ' +html.escape(attr[0])+ '="' +html.escape(attr[1])+ '"'
else:
self.output += ' ' +html.escape(attr[0])
self.output += '="' +html.escape(attr[1])+ '"'
self.output += ">"
def handle_endtag(self,tag):
if tag in ["img"]: return
if not tag in self.voidElements:
index = len(self.tagDir) - 1
while index >= 0:
if self.tagDir[index] == tag:
self.tagDir = self.tagDir[index + 1:]
break
index -= 1
self.output += "</" +html.escape(tag)+ ">"
def handle_data(self,data):
curTag = ""
if len(self.tagDir) > 0:
curTag = self.tagDir[-1]
if curTag in self.blackList: return
self.output += data
def cRenderHtml(self,htm):