BirdyNet/addons/0.utils.py

global unparseUrl
def unparseUrl(parsedUrl):
	url = ""
	if parsedUrl["protocol"] != "":
		url = parsedUrl["protocol"] + "://" +parsedUrl["domain"]

	if parsedUrl["path"] != "":
		if url != "":
			url += "/" + parsedUrl["path"]
		else:
			url = parsedUrl["path"]

	if parsedUrl["parameters"] != "":
		first = True
		for parameter in parsedUrl["parameters"]:
			if first == False:
				url += "&"
			else:
				url += "?"
				first = False

			url += parameter[0]
			if parameter[1] != None:
				url += "=" + parameter[1]

	if parsedUrl["anchor"] != "":
		url += "#" + parsedUrl["anchor"]

	return url

global parseUrl
def parseUrl(url):
	out = {
		"protocol": "",
		"domain": "",
		"path": "",
		"parameters": [],
		"anchor": ""
	}

	# get anchor
	if True:
		surl = url.split("#",1)
		if len(surl) > 1:
			url = surl[0]
			out["anchor"] = surl[1]

	# get parameters
	if True:
		surl = url.split("?",1)
		if len(surl) > 1:
			url = surl[0]
			argListDumb = surl[1].split("&")
			for arg in argListDumb:
				argSplit = arg.split("=",1)
				argKey = argSplit[0]
				argValue = None
				if len(argSplit) > 1:
					argValue = argSplit[1]
				out["parameters"].append([argKey,argValue])

	# get protocol
	hasProtocol = False
	for s in url:
		if s == "/":
			break

		if s == ":":
			hasProtocol = True
			break

	if hasProtocol:
		surl = url.split(":",1)
		out["protocol"] = surl[0]
		url = surl[1]
		while len(url) > 0 and url[0] == "/": url = url[1:]

	# get path and domain
	if hasProtocol == False:
		out["path"] = url
	else:
		surl = url.split("/",1)
		out["domain"] = surl[0]
		if len(surl) > 1:
			out["path"] = surl[1]

	return out

global urlJoin
def urlJoin(*args):
	first = True
	outUrl = ""
	for arg in args:
		if first == True:
			outUrl = arg
			first = False
			continue

		while len(arg > 0) and arg[0] == "/": arg = arg[1:]
		while len(arg > 0) and arg[-1] == "/": arg = arg[:-1]
		outUrl = outUrl + "/" + arg

	return outUrl

global infoFetcher
def infoFetcher(info):
	''' if "Content-Base" in info["headers"]:
		info["baseUrl"] = info["headers"]["Content-Base"]
		return
	elif "Content-Location" in info["headers"]:
		info["baseUrl"] = "/"
		return '''

	''' https://www.w3.org/TR/WD-html40-970917/htmlweb.html
	User agents should calculate the base URL for resolving relative URLs according to the [RFC1808]. The following is a summary of how [RFC1808] applies to HTML. User agents should calculate the base URL according to the following precedences (highest priority to lowest):

		1. The base URL is set by the BASE element. (TO BE IMPLEMENTED)
		2. The base URL is given by an HTTP header (see [RFC2068]). (TO BE IMPLEMENTED)
		3. By default, the base URL is that of the current document. (TO BE IMPLEMENTED)

	Additionally, the OBJECT and APPLET elements define attributes that take precedence over the value set by the BASE element. Please consult the definitions of these elements for more information about URL issues specific to them.
	'''

	''' https://tools.ietf.org/html/rfc2068
	14.11 Content-Base

		The Content-Base entity-header field may be used to specify the base
		URI for resolving relative URLs within the entity. This header field
		is described as Base in RFC 1808, which is expected to be revised.

			  Content-Base      = "Content-Base" ":" absoluteURI

		If no Content-Base field is present, the base URI of an entity is
		defined either by its Content-Location (if that Content-Location URI
		is an absolute URI) or the URI used to initiate the request, in that
		order of precedence. Note, however, that the base URI of the contents
		within the entity-body may be redefined within that entity-body.

		14.15 Content-Location

		The Content-Location entity-header field may be used to supply the
		resource location for the entity enclosed in the message. In the case
		where a resource has multiple entities associated with it, and those
		entities actually have separate locations by which they might be
		individually accessed, the server should provide a Content-Location
		for the particular variant which is returned. In addition, a server
		SHOULD provide a Content-Location for the resource corresponding to
		the response entity.

			  Content-Location = "Content-Location" ":"
								( absoluteURI | relativeURI )

		If no Content-Base header field is present, the value of Content-
		Location also defines the base URL for the entity (see section
		14.11).

		The Content-Location value is not a replacement for the original
		requested URI; it is only a statement of the location of the resource
		corresponding to this particular entity at the time of the request.
		Future requests MAY use the Content-Location URI if the desire is to
		identify the source of that particular entity.

		A cache cannot assume that an entity with a Content-Location
		different from the URI used to retrieve it can be used to respond to
		later requests on that Content-Location URI. However, the Content-
		Location can be used to differentiate between multiple entities
		retrieved from a single requested resource, as described in section
		13.6.

		If the Content-Location is a relative URI, the URI is interpreted
		relative to any Content-Base URI provided in the response. If no
		Content-Base is provided, the relative URI is interpreted relative to
		the Request-URI.
	'''