From 25d141d7f8100f71e738e4dc799b60ab87028c9e Mon Sep 17 00:00:00 2001 From: Fierelier Date: Fri, 19 Mar 2021 03:20:20 +0100 Subject: [PATCH] Add HTML parser, properly remove img tags --- addons/0.documentViewer.QTextBrowser.py | 52 +++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/addons/0.documentViewer.QTextBrowser.py b/addons/0.documentViewer.QTextBrowser.py index 803bf96..5d087df 100644 --- a/addons/0.documentViewer.QTextBrowser.py +++ b/addons/0.documentViewer.QTextBrowser.py @@ -1,3 +1,9 @@ +global HTMLParser +from html.parser import HTMLParser + +global html +import html + global browserDoc class browserDoc(QTextBrowser): def __init__(self,*args,**kwargs): @@ -13,9 +19,46 @@ class browserDoc(QTextBrowser): self.setStyleSheet("QTextBrowser { " +style+ " }") self.anchorClicked.connect(self.cAnchorClicked) - def cRenderHtml(self,html): + class cHtmlParser(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self.output = "" + + def handle_starttag(self,tag,attrs): + if tag == "img": + altText = False + for attr in attrs: + if attr[0] == "alt": + altText = html.escape(attr[1]) + break + + for attr in attrs: + if attr[0] == "src": + url = attr[1] + if not altText: altText = url.rsplit("/")[-1] + self.output += 'image: ' +html.escape(altText)+ '' + return + + return + + self.output += "<" +tag + for attr in attrs: + self.output += ' ' +html.escape(attr[0])+ '="' +html.escape(attr[1])+ '"' + self.output += ">" + + def handle_endtag(self,tag): + if tag in ["img"]: return + self.output += "" + + def handle_data(self,data): + self.output += data + + def cRenderHtml(self,htm): + parser = self.cHtmlParser() + parser.feed(htm) + self.clear() - self.insertHtml(html.replace("