Add HTML parser, properly remove img tags
This commit is contained in:
parent
c3f71086d1
commit
25d141d7f8
@ -1,3 +1,9 @@
|
||||
global HTMLParser
|
||||
from html.parser import HTMLParser
|
||||
|
||||
global html
|
||||
import html
|
||||
|
||||
global browserDoc
|
||||
class browserDoc(QTextBrowser):
|
||||
def __init__(self,*args,**kwargs):
|
||||
@ -13,9 +19,46 @@ class browserDoc(QTextBrowser):
|
||||
self.setStyleSheet("QTextBrowser { " +style+ " }")
|
||||
self.anchorClicked.connect(self.cAnchorClicked)
|
||||
|
||||
def cRenderHtml(self,html):
|
||||
class cHtmlParser(HTMLParser):
|
||||
def __init__(self):
|
||||
HTMLParser.__init__(self)
|
||||
self.output = ""
|
||||
|
||||
def handle_starttag(self,tag,attrs):
|
||||
if tag == "img":
|
||||
altText = False
|
||||
for attr in attrs:
|
||||
if attr[0] == "alt":
|
||||
altText = html.escape(attr[1])
|
||||
break
|
||||
|
||||
for attr in attrs:
|
||||
if attr[0] == "src":
|
||||
url = attr[1]
|
||||
if not altText: altText = url.rsplit("/")[-1]
|
||||
self.output += '<a href="' +html.escape(url)+ '">image: ' +html.escape(altText)+ '</a>'
|
||||
return
|
||||
|
||||
return
|
||||
|
||||
self.output += "<" +tag
|
||||
for attr in attrs:
|
||||
self.output += ' ' +html.escape(attr[0])+ '="' +html.escape(attr[1])+ '"'
|
||||
self.output += ">"
|
||||
|
||||
def handle_endtag(self,tag):
|
||||
if tag in ["img"]: return
|
||||
self.output += "</" +html.escape(tag)+ ">"
|
||||
|
||||
def handle_data(self,data):
|
||||
self.output += data
|
||||
|
||||
def cRenderHtml(self,htm):
|
||||
parser = self.cHtmlParser()
|
||||
parser.feed(htm)
|
||||
|
||||
self.clear()
|
||||
self.insertHtml(html.replace("<img","<dummy")) # half-assed slowdown fix
|
||||
self.insertHtml(parser.output)
|
||||
self.update()
|
||||
|
||||
def cAnchorClicked(self,url):
|
||||
@ -24,8 +67,9 @@ class browserDoc(QTextBrowser):
|
||||
curUrl = browserWindow.cDocumentInfo["url"]
|
||||
curUrlParsed = parseUrl(curUrl)
|
||||
urlParsed = parseUrl(url)
|
||||
#print("navigating from: " +curUrl)
|
||||
#print("to: " +url)
|
||||
print("---")
|
||||
print("navigating from: " +curUrl)
|
||||
print("to: " +url)
|
||||
if urlParsed["protocol"] == "": # is relative
|
||||
if url[0] == "#":
|
||||
curUrlParsed["anchor"] = url[1:]
|
||||
|
Loading…
Reference in New Issue
Block a user