Add HTML parser, properly remove img tags

This commit is contained in:
Fierelier 2021-03-19 03:20:20 +01:00
parent c3f71086d1
commit 25d141d7f8
1 changed files with 48 additions and 4 deletions

View File

@ -1,3 +1,9 @@
global HTMLParser
from html.parser import HTMLParser
global html
import html
global browserDoc
class browserDoc(QTextBrowser):
def __init__(self,*args,**kwargs):
@ -13,9 +19,46 @@ class browserDoc(QTextBrowser):
self.setStyleSheet("QTextBrowser { " +style+ " }")
self.anchorClicked.connect(self.cAnchorClicked)
def cRenderHtml(self,html):
class cHtmlParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.output = ""
def handle_starttag(self,tag,attrs):
if tag == "img":
altText = False
for attr in attrs:
if attr[0] == "alt":
altText = html.escape(attr[1])
break
for attr in attrs:
if attr[0] == "src":
url = attr[1]
if not altText: altText = url.rsplit("/")[-1]
self.output += '<a href="' +html.escape(url)+ '">image: ' +html.escape(altText)+ '</a>'
return
return
self.output += "<" +tag
for attr in attrs:
self.output += ' ' +html.escape(attr[0])+ '="' +html.escape(attr[1])+ '"'
self.output += ">"
def handle_endtag(self,tag):
if tag in ["img"]: return
self.output += "</" +html.escape(tag)+ ">"
def handle_data(self,data):
self.output += data
def cRenderHtml(self,htm):
parser = self.cHtmlParser()
parser.feed(htm)
self.clear()
self.insertHtml(html.replace("<img","<dummy")) # half-assed slowdown fix
self.insertHtml(parser.output)
self.update()
def cAnchorClicked(self,url):
@ -24,8 +67,9 @@ class browserDoc(QTextBrowser):
curUrl = browserWindow.cDocumentInfo["url"]
curUrlParsed = parseUrl(curUrl)
urlParsed = parseUrl(url)
#print("navigating from: " +curUrl)
#print("to: " +url)
print("---")
print("navigating from: " +curUrl)
print("to: " +url)
if urlParsed["protocol"] == "": # is relative
if url[0] == "#":
curUrlParsed["anchor"] = url[1:]