"Proper" HTML parsing for whitelists and interpretation
This commit is contained in:
parent
ad811cc9fc
commit
a6bb0964cd
@ -23,37 +23,38 @@ class browserDoc(QTextBrowser):
|
||||
def __init__(self):
|
||||
HTMLParser.__init__(self)
|
||||
self.output = ""
|
||||
self.voidElements = ["area","base","br","col","hr","img","input","link","meta","param","command","keygen","source"]
|
||||
self.blackList = ["img","script","style"]
|
||||
self.tagDir = []
|
||||
|
||||
def handle_starttag(self,tag,attrs):
|
||||
if tag == "img":
|
||||
altText = False
|
||||
for attr in attrs:
|
||||
if attr[0] == "alt":
|
||||
altText = html.escape(attr[1])
|
||||
break
|
||||
|
||||
for attr in attrs:
|
||||
if attr[0] == "src":
|
||||
url = attr[1]
|
||||
if not altText: altText = url.rsplit("/")[-1]
|
||||
self.output += 'img:<a href="' +html.escape(url)+ '">' +html.escape(altText)+ '</a>'
|
||||
return
|
||||
|
||||
return
|
||||
if not tag in self.voidElements:
|
||||
self.tagDir.append(tag)
|
||||
|
||||
self.output += "<" +tag
|
||||
if tag in self.blackList: return
|
||||
|
||||
self.output += "<" +html.escape(tag)
|
||||
for attr in attrs:
|
||||
self.output += " " +html.escape(attr[0])
|
||||
if attr[1] != None:
|
||||
self.output += ' ' +html.escape(attr[0])+ '="' +html.escape(attr[1])+ '"'
|
||||
else:
|
||||
self.output += ' ' +html.escape(attr[0])
|
||||
self.output += '="' +html.escape(attr[1])+ '"'
|
||||
self.output += ">"
|
||||
|
||||
def handle_endtag(self,tag):
|
||||
if tag in ["img"]: return
|
||||
if not tag in self.voidElements:
|
||||
index = len(self.tagDir) - 1
|
||||
while index >= 0:
|
||||
if self.tagDir[index] == tag:
|
||||
self.tagDir = self.tagDir[index + 1:]
|
||||
break
|
||||
index -= 1
|
||||
self.output += "</" +html.escape(tag)+ ">"
|
||||
|
||||
def handle_data(self,data):
|
||||
curTag = ""
|
||||
if len(self.tagDir) > 0:
|
||||
curTag = self.tagDir[-1]
|
||||
if curTag in self.blackList: return
|
||||
self.output += data
|
||||
|
||||
def cRenderHtml(self,htm):
|
||||
|
Loading…
Reference in New Issue
Block a user