BirdyNet/addons/0.utils.py

256 lines
7.8 KiB
Python

global unparseUrl
def unparseUrl(parsedUrl):
url = ""
if parsedUrl["protocol"] != "":
url = parsedUrl["protocol"] + "://" +parsedUrl["domain"]
if parsedUrl["path"] != "":
if url != "":
url += "/" + parsedUrl["path"]
else:
url = parsedUrl["path"]
if parsedUrl["parameters"] != "":
first = True
for parameter in parsedUrl["parameters"]:
if first == False:
url += "&"
else:
url += "?"
first = False
url += parameter[0]
if parameter[1] != None:
url += "=" + parameter[1]
if parsedUrl["anchor"] != False:
url += "#" + parsedUrl["anchor"]
return url
global parseUrl
def parseUrl(url):
out = {
"protocol": "",
"domain": "",
"path": "",
"parameters": [],
"anchor": False
}
# get anchor
if True:
surl = url.split("#",1)
if len(surl) > 1:
url = surl[0]
out["anchor"] = surl[1]
# get parameters
if True:
surl = url.split("?",1)
if len(surl) > 1:
url = surl[0]
argListDumb = surl[1].split("&")
for arg in argListDumb:
argSplit = arg.split("=",1)
argKey = argSplit[0]
argValue = None
if len(argSplit) > 1:
argValue = argSplit[1]
out["parameters"].append([argKey,argValue])
# get protocol
hasProtocol = False
for s in url:
if s == "/":
break
if s == ":":
hasProtocol = True
break
if hasProtocol:
surl = url.split(":",1)
out["protocol"] = surl[0]
url = surl[1]
while len(url) > 0 and url[0] == "/": url = url[1:]
# get path and domain
if hasProtocol == False:
out["path"] = url
else:
surl = url.split("/",1)
out["domain"] = surl[0]
if len(surl) > 1:
out["path"] = surl[1]
return out
global urlJoin
def urlJoin(*args):
first = True
outUrl = ""
for arg in args:
if first == True:
outUrl = arg
first = False
continue
while len(arg > 0) and arg[0] == "/": arg = arg[1:]
while len(arg > 0) and arg[-1] == "/": arg = arg[:-1]
outUrl = outUrl + "/" + arg
return outUrl
global urlAnchorPairTo
def urlAnchorPairTo(url):
urlSplit = url.split("#",1)
url = urlSplit[0]
anchor = False
if len(urlSplit) > 1:
anchor = urlSplit[1]
return url,anchor
global urlAnchorPairFrom
def urlAnchorPairFrom(url,anchor):
if anchor != False:
url += "#" +anchor
return url
global getContentType
def getContentType(headers,fallback):
contentType = fallback
for header in headers:
if header[0].lower() == "content-type":
contentType = header[1].lower()
contentTypeSplit = contentType.split(";")
index = 0
length = len(contentTypeSplit)
while index < length:
s = contentTypeSplit[index]
while len(s) > 0 and s[0] == " ": s = s[1:]
while len(s) > 0 and s[-1] == " ": s = s[:-1]
contentTypeSplit[index] = s
index += 1
contentType = contentTypeSplit.pop(0)
contentTypeArguments = {}
for arg in contentTypeSplit:
argSplit = arg.split("=",1)
if len(argSplit) < 2:
argSplit.append("")
while len(argSplit[0]) > 0 and argSplit[0][0] == " ": argSplit[0] = argSplit[0][1:]
while len(argSplit[0]) > 0 and argSplit[0][-1] == " ": argSplit[0] = argSplit[0][:-1]
while len(argSplit[1]) > 0 and argSplit[1][0] == " ": argSplit[1] = argSplit[1][1:]
while len(argSplit[1]) > 0 and argSplit[1][-1] == " ": argSplit[1] = argSplit[1][:-1]
contentTypeArguments[argSplit[0]] = argSplit[1]
return contentType, contentTypeArguments
global navigateLink
def navigateLink(curUrl,url):
urlParsed = parseUrl(url)
curUrlParsed = parseUrl(curUrl)
navType = -1
print("---")
print("navigating from: " +curUrl)
print("to: " +url)
if urlParsed["protocol"] == "": # is relative
if url[0] == "#": # scroll to anchor
if len(url) < 2:
curUrlParsed["anchor"] = ""
else:
curUrlParsed["anchor"] = url[1:]
url = unparseUrl(curUrlParsed)
navType = 0
elif url[:2] == "//": # navigate to another domain with the same protocol
url = curUrlParsed["protocol"] + ":" + url
urlParsed = parseUrl(url)
navType = 1
elif url[0] == "/": # navigate to another path, relative to the current domain
urlParsed["protocol"] = curUrlParsed["protocol"]
urlParsed["domain"] = curUrlParsed["domain"]
urlParsed["path"] = urlParsed["path"][1:]
url = unparseUrl(urlParsed)
navType = 2
else: # navigate to another path, relative to the current path
urlParsed["protocol"] = curUrlParsed["protocol"]
urlParsed["domain"] = curUrlParsed["domain"]
while len(curUrlParsed["path"]) > 0 and curUrlParsed["path"][-1] == "/": curUrlParsed["path"] = curUrlParsed["path"][:-1]
urlParsed["path"] = curUrlParsed["path"] + "/" + url
url = unparseUrl(urlParsed)
navType = 3
return navType, url, urlParsed
global infoFetcher
def infoFetcher(info):
''' if "Content-Base" in info["headers"]:
info["baseUrl"] = info["headers"]["Content-Base"]
return
elif "Content-Location" in info["headers"]:
info["baseUrl"] = "/"
return '''
''' https://www.w3.org/TR/WD-html40-970917/htmlweb.html
User agents should calculate the base URL for resolving relative URLs according to the [RFC1808]. The following is a summary of how [RFC1808] applies to HTML. User agents should calculate the base URL according to the following precedences (highest priority to lowest):
1. The base URL is set by the BASE element. (TO BE IMPLEMENTED)
2. The base URL is given by an HTTP header (see [RFC2068]). (TO BE IMPLEMENTED)
3. By default, the base URL is that of the current document. (TO BE IMPLEMENTED)
Additionally, the OBJECT and APPLET elements define attributes that take precedence over the value set by the BASE element. Please consult the definitions of these elements for more information about URL issues specific to them.
'''
''' https://tools.ietf.org/html/rfc2068
14.11 Content-Base
The Content-Base entity-header field may be used to specify the base
URI for resolving relative URLs within the entity. This header field
is described as Base in RFC 1808, which is expected to be revised.
Content-Base = "Content-Base" ":" absoluteURI
If no Content-Base field is present, the base URI of an entity is
defined either by its Content-Location (if that Content-Location URI
is an absolute URI) or the URI used to initiate the request, in that
order of precedence. Note, however, that the base URI of the contents
within the entity-body may be redefined within that entity-body.
14.15 Content-Location
The Content-Location entity-header field may be used to supply the
resource location for the entity enclosed in the message. In the case
where a resource has multiple entities associated with it, and those
entities actually have separate locations by which they might be
individually accessed, the server should provide a Content-Location
for the particular variant which is returned. In addition, a server
SHOULD provide a Content-Location for the resource corresponding to
the response entity.
Content-Location = "Content-Location" ":"
( absoluteURI | relativeURI )
If no Content-Base header field is present, the value of Content-
Location also defines the base URL for the entity (see section
14.11).
The Content-Location value is not a replacement for the original
requested URI; it is only a statement of the location of the resource
corresponding to this particular entity at the time of the request.
Future requests MAY use the Content-Location URI if the desire is to
identify the source of that particular entity.
A cache cannot assume that an entity with a Content-Location
different from the URI used to retrieve it can be used to respond to
later requests on that Content-Location URI. However, the Content-
Location can be used to differentiate between multiple entities
retrieved from a single requested resource, as described in section
13.6.
If the Content-Location is a relative URI, the URI is interpreted
relative to any Content-Base URI provided in the response. If no
Content-Base is provided, the relative URI is interpreted relative to
the Request-URI.
'''