BirdyNet/addons/0.utils.py

global unparseUrl
def unparseUrl(parsedUrl):
	url = ""
	if parsedUrl["protocol"] != "":
		url = parsedUrl["protocol"] + "://" +parsedUrl["domain"]

	if parsedUrl["path"] != "":
		if url != "":
			url += "/" + parsedUrl["path"]
		else:
			url = parsedUrl["path"]

	if parsedUrl["parameters"] != "":
		first = True
		for parameter in parsedUrl["parameters"]:
			if first == False:
				url += "&"
			else:
				url += "?"
				first = False

			url += parameter[0]
			if parameter[1] != None:
				url += "=" + parameter[1]

	if parsedUrl["anchor"] != False:
		url += "#" + parsedUrl["anchor"]

	return url

global parseUrl
def parseUrl(url):
	out = {
		"protocol": "",
		"domain": "",
		"path": "",
		"parameters": [],
		"anchor": False
	}

	# get anchor
	if True:
		surl = url.split("#",1)
		if len(surl) > 1:
			url = surl[0]
			out["anchor"] = surl[1]

	# get parameters
	if True:
		surl = url.split("?",1)
		if len(surl) > 1:
			url = surl[0]
			argListDumb = surl[1].split("&")
			for arg in argListDumb:
				argSplit = arg.split("=",1)
				argKey = argSplit[0]
				argValue = None
				if len(argSplit) > 1:
					argValue = argSplit[1]
				out["parameters"].append([argKey,argValue])

	# get protocol
	hasProtocol = False
	for s in url:
		if s == "/":
			break

		if s == ":":
			hasProtocol = True
			break

	if hasProtocol:
		surl = url.split(":",1)
		out["protocol"] = surl[0]
		url = surl[1]
		while len(url) > 0 and url[0] == "/": url = url[1:]

	# get path and domain
	if hasProtocol == False:
		out["path"] = url
	else:
		surl = url.split("/",1)
		out["domain"] = surl[0]
		if len(surl) > 1:
			out["path"] = surl[1]

	return out

global urlJoin
def urlJoin(*args):
	first = True
	outUrl = ""
	for arg in args:
		if first == True:
			outUrl = arg
			first = False
			continue

		while len(arg > 0) and arg[0] == "/": arg = arg[1:]
		while len(arg > 0) and arg[-1] == "/": arg = arg[:-1]
		outUrl = outUrl + "/" + arg

	return outUrl

global urlAnchorPairTo
def urlAnchorPairTo(url):
	urlSplit = url.split("#",1)
	url = urlSplit[0]
	anchor = False
	if len(urlSplit) > 1:
		anchor = urlSplit[1]
	return url,anchor

global urlAnchorPairFrom
def urlAnchorPairFrom(url,anchor):
	if anchor != False:
		url += "#" +anchor
	return url

global getContentType
def getContentType(headers,fallback):
	contentType = fallback
	for header in headers:
		if header[0].lower() == "content-type":
			contentType = header[1].lower()

	contentTypeSplit = contentType.split(";")
	index = 0
	length = len(contentTypeSplit)
	while index < length:
		s = contentTypeSplit[index]
		while len(s) > 0 and s[0] == " ": s = s[1:]
		while len(s) > 0 and s[-1] == " ": s = s[:-1]
		contentTypeSplit[index] = s
		index += 1

	contentType = contentTypeSplit.pop(0)
	contentTypeArguments = {}
	for arg in contentTypeSplit:
		argSplit = arg.split("=",1)
		if len(argSplit) < 2:
			argSplit.append("")
		while len(argSplit[0]) > 0 and argSplit[0][0] == " ": argSplit[0] = argSplit[0][1:]
		while len(argSplit[0]) > 0 and argSplit[0][-1] == " ": argSplit[0] = argSplit[0][:-1]
		while len(argSplit[1]) > 0 and argSplit[1][0] == " ": argSplit[1] = argSplit[1][1:]
		while len(argSplit[1]) > 0 and argSplit[1][-1] == " ": argSplit[1] = argSplit[1][:-1]
		contentTypeArguments[argSplit[0]] = argSplit[1]

	return contentType, contentTypeArguments

global navigateLink
def navigateLink(curUrl,url):
	urlParsed = parseUrl(url)
	curUrlParsed = parseUrl(curUrl)
	navType = -1
	print("---")
	print("navigating from: " +curUrl)
	print("to: " +url)
	if urlParsed["protocol"] == "": # is relative
		if url[0] == "#": # scroll to anchor
			if len(url) < 2:
				curUrlParsed["anchor"] = ""
			else:
				curUrlParsed["anchor"] = url[1:]
			url = unparseUrl(curUrlParsed)
			navType = 0
		elif url[:2] == "//": # navigate to another domain with the same protocol
			url = curUrlParsed["protocol"] + ":" + url
			urlParsed = parseUrl(url)
			navType = 1
		elif url[0] == "/": # navigate to another path, relative to the current domain
			urlParsed["protocol"] = curUrlParsed["protocol"]
			urlParsed["domain"] = curUrlParsed["domain"]
			urlParsed["path"] = urlParsed["path"][1:]
			url = unparseUrl(urlParsed)
			navType = 2
		else: # navigate to another path, relative to the current path
			urlParsed["protocol"] = curUrlParsed["protocol"]
			urlParsed["domain"] = curUrlParsed["domain"]
			while len(curUrlParsed["path"]) > 0 and curUrlParsed["path"][-1] == "/": curUrlParsed["path"] = curUrlParsed["path"][:-1]
			urlParsed["path"] = curUrlParsed["path"] + "/" + url
			url = unparseUrl(urlParsed)
			navType = 3

	return navType, url, urlParsed

global infoFetcher
def infoFetcher(info):
	''' if "Content-Base" in info["headers"]:
		info["baseUrl"] = info["headers"]["Content-Base"]
		return
	elif "Content-Location" in info["headers"]:
		info["baseUrl"] = "/"
		return '''

	''' https://www.w3.org/TR/WD-html40-970917/htmlweb.html
	User agents should calculate the base URL for resolving relative URLs according to the [RFC1808]. The following is a summary of how [RFC1808] applies to HTML. User agents should calculate the base URL according to the following precedences (highest priority to lowest):

		1. The base URL is set by the BASE element. (TO BE IMPLEMENTED)
		2. The base URL is given by an HTTP header (see [RFC2068]). (TO BE IMPLEMENTED)
		3. By default, the base URL is that of the current document. (TO BE IMPLEMENTED)

	Additionally, the OBJECT and APPLET elements define attributes that take precedence over the value set by the BASE element. Please consult the definitions of these elements for more information about URL issues specific to them.
	'''

	''' https://tools.ietf.org/html/rfc2068
	14.11 Content-Base

		The Content-Base entity-header field may be used to specify the base
		URI for resolving relative URLs within the entity. This header field
		is described as Base in RFC 1808, which is expected to be revised.

			  Content-Base      = "Content-Base" ":" absoluteURI

		If no Content-Base field is present, the base URI of an entity is
		defined either by its Content-Location (if that Content-Location URI
		is an absolute URI) or the URI used to initiate the request, in that
		order of precedence. Note, however, that the base URI of the contents
		within the entity-body may be redefined within that entity-body.

		14.15 Content-Location

		The Content-Location entity-header field may be used to supply the
		resource location for the entity enclosed in the message. In the case
		where a resource has multiple entities associated with it, and those
		entities actually have separate locations by which they might be
		individually accessed, the server should provide a Content-Location
		for the particular variant which is returned. In addition, a server
		SHOULD provide a Content-Location for the resource corresponding to
		the response entity.

			  Content-Location = "Content-Location" ":"
								( absoluteURI | relativeURI )

		If no Content-Base header field is present, the value of Content-
		Location also defines the base URL for the entity (see section
		14.11).

		The Content-Location value is not a replacement for the original
		requested URI; it is only a statement of the location of the resource
		corresponding to this particular entity at the time of the request.
		Future requests MAY use the Content-Location URI if the desire is to
		identify the source of that particular entity.

		A cache cannot assume that an entity with a Content-Location
		different from the URI used to retrieve it can be used to respond to
		later requests on that Content-Location URI. However, the Content-
		Location can be used to differentiate between multiple entities
		retrieved from a single requested resource, as described in section
		13.6.

		If the Content-Location is a relative URI, the URI is interpreted
		relative to any Content-Base URI provided in the response. If no
		Content-Base is provided, the relative URI is interpreted relative to
		the Request-URI.
	'''