Add HTML parser, properly remove img tags

This commit is contained in:
Fierelier 2021-03-19 03:20:20 +01:00
parent c3f71086d1
commit 25d141d7f8

View File

@ -1,3 +1,9 @@
global HTMLParser
from html.parser import HTMLParser
global html
import html
global browserDoc global browserDoc
class browserDoc(QTextBrowser): class browserDoc(QTextBrowser):
def __init__(self,*args,**kwargs): def __init__(self,*args,**kwargs):
@ -13,9 +19,46 @@ class browserDoc(QTextBrowser):
self.setStyleSheet("QTextBrowser { " +style+ " }") self.setStyleSheet("QTextBrowser { " +style+ " }")
self.anchorClicked.connect(self.cAnchorClicked) self.anchorClicked.connect(self.cAnchorClicked)
def cRenderHtml(self,html): class cHtmlParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.output = ""
def handle_starttag(self,tag,attrs):
if tag == "img":
altText = False
for attr in attrs:
if attr[0] == "alt":
altText = html.escape(attr[1])
break
for attr in attrs:
if attr[0] == "src":
url = attr[1]
if not altText: altText = url.rsplit("/")[-1]
self.output += '<a href="' +html.escape(url)+ '">image: ' +html.escape(altText)+ '</a>'
return
return
self.output += "<" +tag
for attr in attrs:
self.output += ' ' +html.escape(attr[0])+ '="' +html.escape(attr[1])+ '"'
self.output += ">"
def handle_endtag(self,tag):
if tag in ["img"]: return
self.output += "</" +html.escape(tag)+ ">"
def handle_data(self,data):
self.output += data
def cRenderHtml(self,htm):
parser = self.cHtmlParser()
parser.feed(htm)
self.clear() self.clear()
self.insertHtml(html.replace("<img","<dummy")) # half-assed slowdown fix self.insertHtml(parser.output)
self.update() self.update()
def cAnchorClicked(self,url): def cAnchorClicked(self,url):
@ -24,8 +67,9 @@ class browserDoc(QTextBrowser):
curUrl = browserWindow.cDocumentInfo["url"] curUrl = browserWindow.cDocumentInfo["url"]
curUrlParsed = parseUrl(curUrl) curUrlParsed = parseUrl(curUrl)
urlParsed = parseUrl(url) urlParsed = parseUrl(url)
#print("navigating from: " +curUrl) print("---")
#print("to: " +url) print("navigating from: " +curUrl)
print("to: " +url)
if urlParsed["protocol"] == "": # is relative if urlParsed["protocol"] == "": # is relative
if url[0] == "#": if url[0] == "#":
curUrlParsed["anchor"] = url[1:] curUrlParsed["anchor"] = url[1:]