Add HTML parser, properly remove img tags
This commit is contained in:
parent
c3f71086d1
commit
25d141d7f8
@ -1,3 +1,9 @@
|
|||||||
|
global HTMLParser
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
global html
|
||||||
|
import html
|
||||||
|
|
||||||
global browserDoc
|
global browserDoc
|
||||||
class browserDoc(QTextBrowser):
|
class browserDoc(QTextBrowser):
|
||||||
def __init__(self,*args,**kwargs):
|
def __init__(self,*args,**kwargs):
|
||||||
@ -13,9 +19,46 @@ class browserDoc(QTextBrowser):
|
|||||||
self.setStyleSheet("QTextBrowser { " +style+ " }")
|
self.setStyleSheet("QTextBrowser { " +style+ " }")
|
||||||
self.anchorClicked.connect(self.cAnchorClicked)
|
self.anchorClicked.connect(self.cAnchorClicked)
|
||||||
|
|
||||||
def cRenderHtml(self,html):
|
class cHtmlParser(HTMLParser):
|
||||||
|
def __init__(self):
|
||||||
|
HTMLParser.__init__(self)
|
||||||
|
self.output = ""
|
||||||
|
|
||||||
|
def handle_starttag(self,tag,attrs):
|
||||||
|
if tag == "img":
|
||||||
|
altText = False
|
||||||
|
for attr in attrs:
|
||||||
|
if attr[0] == "alt":
|
||||||
|
altText = html.escape(attr[1])
|
||||||
|
break
|
||||||
|
|
||||||
|
for attr in attrs:
|
||||||
|
if attr[0] == "src":
|
||||||
|
url = attr[1]
|
||||||
|
if not altText: altText = url.rsplit("/")[-1]
|
||||||
|
self.output += '<a href="' +html.escape(url)+ '">image: ' +html.escape(altText)+ '</a>'
|
||||||
|
return
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
self.output += "<" +tag
|
||||||
|
for attr in attrs:
|
||||||
|
self.output += ' ' +html.escape(attr[0])+ '="' +html.escape(attr[1])+ '"'
|
||||||
|
self.output += ">"
|
||||||
|
|
||||||
|
def handle_endtag(self,tag):
|
||||||
|
if tag in ["img"]: return
|
||||||
|
self.output += "</" +html.escape(tag)+ ">"
|
||||||
|
|
||||||
|
def handle_data(self,data):
|
||||||
|
self.output += data
|
||||||
|
|
||||||
|
def cRenderHtml(self,htm):
|
||||||
|
parser = self.cHtmlParser()
|
||||||
|
parser.feed(htm)
|
||||||
|
|
||||||
self.clear()
|
self.clear()
|
||||||
self.insertHtml(html.replace("<img","<dummy")) # half-assed slowdown fix
|
self.insertHtml(parser.output)
|
||||||
self.update()
|
self.update()
|
||||||
|
|
||||||
def cAnchorClicked(self,url):
|
def cAnchorClicked(self,url):
|
||||||
@ -24,8 +67,9 @@ class browserDoc(QTextBrowser):
|
|||||||
curUrl = browserWindow.cDocumentInfo["url"]
|
curUrl = browserWindow.cDocumentInfo["url"]
|
||||||
curUrlParsed = parseUrl(curUrl)
|
curUrlParsed = parseUrl(curUrl)
|
||||||
urlParsed = parseUrl(url)
|
urlParsed = parseUrl(url)
|
||||||
#print("navigating from: " +curUrl)
|
print("---")
|
||||||
#print("to: " +url)
|
print("navigating from: " +curUrl)
|
||||||
|
print("to: " +url)
|
||||||
if urlParsed["protocol"] == "": # is relative
|
if urlParsed["protocol"] == "": # is relative
|
||||||
if url[0] == "#":
|
if url[0] == "#":
|
||||||
curUrlParsed["anchor"] = url[1:]
|
curUrlParsed["anchor"] = url[1:]
|
||||||
|
Loading…
Reference in New Issue
Block a user