From c4d2463372bdbf3c5bfb8ee7d1042c54fb15c8e6 Mon Sep 17 00:00:00 2001 From: Fierelier Date: Thu, 25 Mar 2021 20:58:22 +0100 Subject: [PATCH] Add setting for default charset --- BirdyNet.ini | 2 ++ BirdyNet.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/BirdyNet.ini b/BirdyNet.ini index 197bc7e..5a83dc8 100644 --- a/BirdyNet.ini +++ b/BirdyNet.ini @@ -5,6 +5,8 @@ home = https://example.com useragent = BirdyNet/$VER ($OS) # Which protocol to use when none is given in the address bar defaultProtocol = https +# Which charset to use when none is given in the header. https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 defines that iso-8859-1 should be used. You may also use utf-8 though, if it's better for the pages you visit. +defaultCharset = iso-8859-1 [accessibility] # Which background color (CSS color) should be chosen for websites? "default" for system's default. "white" is recommended for compatibility. diff --git a/BirdyNet.py b/BirdyNet.py index c63d956..99e2f16 100644 --- a/BirdyNet.py +++ b/BirdyNet.py @@ -188,14 +188,14 @@ class browserWindow(QMainWindow): htm = response["body"] contentType, contentTypeArguments = getContentType(response["headers"],"text") - if not "charset" in contentTypeArguments: contentTypeArguments["charset"] = "utf-8" + if not "charset" in contentTypeArguments: contentTypeArguments["charset"] = config["default"]["defaultCharset"] print("content-type: " +contentType+ "\n" +prettyJson(contentTypeArguments)) try: htm = htm.decode(contentTypeArguments["charset"],errors="ignore") except Exception as e: - print("decoding html as '" +contentTypeArguments["charset"]+ "' failed, trying utf-8...") - htm = htm.decode("utf-8",errors="ignore") + print("decoding html as '" +contentTypeArguments["charset"]+ "' failed, trying " +config["default"]["defaultCharset"]+ "...") + htm = htm.decode(config["default"]["defaultCharset"],errors="ignore") self.cDoc.cRenderHtml(htm,contentType) end = time.time()