diff --git a/PowerEditor/src/uchardet/CharDistribution.h b/PowerEditor/src/uchardet/CharDistribution.h index 814aeb39..5b22295d 100644 --- a/PowerEditor/src/uchardet/CharDistribution.h +++ b/PowerEditor/src/uchardet/CharDistribution.h @@ -50,7 +50,7 @@ public: CharDistributionAnalysis() {Reset(PR_FALSE);} //feed a block of data and do distribution analysis - void HandleData(const char* /*aBuf*/, PRUint32 /*aLen*/) {} + void HandleData(const char*, PRUint32) {} //Feed a character with known length void HandleOneChar(const char* aStr, PRUint32 aCharLen) @@ -96,7 +96,7 @@ protected: //we do not handle character base on its original encoding string, but //convert this encoding string to a number, here called order. //This allow multiple encoding of a language to share one frequency table - virtual PRInt32 GetOrder(const char* /*str*/) {return -1;} + virtual PRInt32 GetOrder(const char* ) {return -1;} //If this flag is set to PR_TRUE, detection is done and conclusion has been made PRBool mDone; @@ -128,12 +128,12 @@ public: EUCTWDistributionAnalysis(); protected: - //for EUC-TW encoding, we are interested + //for euc-TW encoding, we are interested // first byte range: 0xc4 -- 0xfe // second byte range: 0xa1 -- 0xfe //no validation needed here. State machine has done that - PRInt32 GetOrder(const char* str) - { if ((unsigned char)*str >= (unsigned char)0xc4) + PRInt32 GetOrder(const char* str) { + if ((unsigned char)*str >= (unsigned char)0xc4) return 94*((unsigned char)str[0]-(unsigned char)0xc4) + (unsigned char)str[1] - (unsigned char)0xa1; else return -1; diff --git a/PowerEditor/src/uchardet/JpCntx.cpp b/PowerEditor/src/uchardet/JpCntx.cpp index f834e3c3..7da04139 100644 --- a/PowerEditor/src/uchardet/JpCntx.cpp +++ b/PowerEditor/src/uchardet/JpCntx.cpp @@ -195,16 +195,16 @@ float JapaneseContextAnalysis::GetConfidence(void) PRInt32 SJISContextAnalysis::GetOrder(const char* str, PRUint32 *charLen) { //find out current char's byte length - if (((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f) || - ((unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc)) + if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f || + (unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc ) *charLen = 2; - else + else *charLen = 1; //return its order if it is hiragana - if (*str == '\202' && - (unsigned char)*(str+1) >= (unsigned char)0x9f && - (unsigned char)*(str+1) <= (unsigned char)0xf1) + if (*str == '\202' && + (unsigned char)*(str+1) >= (unsigned char)0x9f && + (unsigned char)*(str+1) <= (unsigned char)0xf1) return (unsigned char)*(str+1) - (unsigned char)0x9f; return -1; } @@ -213,17 +213,17 @@ PRInt32 EUCJPContextAnalysis::GetOrder(const char* str, PRUint32 *charLen) { //find out current char's byte length if ((unsigned char)*str == (unsigned char)0x8e || - ((unsigned char)*str >= (unsigned char)0xa1 && - (unsigned char)*str <= (unsigned char)0xfe)) + (unsigned char)*str >= (unsigned char)0xa1 && + (unsigned char)*str <= (unsigned char)0xfe) *charLen = 2; else if ((unsigned char)*str == (unsigned char)0x8f) - *charLen = 3; + *charLen = 3; else - *charLen = 1; + *charLen = 1; //return its order if it is hiragana if ((unsigned char)*str == (unsigned char)0xa4 && - (unsigned char)*(str+1) >= (unsigned char)0xa1 && + (unsigned char)*(str+1) >= (unsigned char)0xa1 && (unsigned char)*(str+1) <= (unsigned char)0xf3) return (unsigned char)*(str+1) - (unsigned char)0xa1; return -1; diff --git a/PowerEditor/src/uchardet/LangModels/LangBulgarianModel.cpp b/PowerEditor/src/uchardet/LangBulgarianModel.cpp similarity index 89% rename from PowerEditor/src/uchardet/LangModels/LangBulgarianModel.cpp rename to PowerEditor/src/uchardet/LangBulgarianModel.cpp index 18c58ee2..77686607 100644 --- a/PowerEditor/src/uchardet/LangModels/LangBulgarianModel.cpp +++ b/PowerEditor/src/uchardet/LangBulgarianModel.cpp @@ -35,12 +35,12 @@ * * ***** END LICENSE BLOCK ***** */ -#include "../nsSBCharSetProber.h" +#include "nsSBCharSetProber.h" /**************************************************************** -CTR: Control characters that usually does not exist in any text -RET: Carriage/Return -SYM: symbol (punctuation) that does not belong to word -NUM: 0 - 9 +255: Control characters that usually does not exist in any text +254: Carriage/Return +253: symbol (punctuation) that does not belong to word +252: 0 - 9 *****************************************************************/ @@ -50,14 +50,14 @@ NUM: 0 - 9 static const unsigned char Latin5_BulgarianCharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40 -110,186,108, 91, 74,119, 84, 96,111,187,115,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60 -116,195, 85, 93, 97,113,196,197,198,199,200,SYM,SYM,SYM,SYM,SYM, //70 +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, //50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, //70 194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, //80 210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, //90 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, //a0 @@ -65,27 +65,27 @@ SYM, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, //c0 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //d0 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, //e0 - 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,NUM,SYM, //f0 + 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, //f0 }; static const unsigned char win1251BulgarianCharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40 -110,186,108, 91, 74,119, 84, 96,111,187,115,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60 -116,195, 85, 93, 97,113,196,197,198,199,200,SYM,SYM,SYM,SYM,SYM, //70 +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40 +110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, //50 +253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60 +116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, //70 206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, //80 -221, 78, 64, 83,121, 98,117,105,ILL,223,224,225,226,227,228,229, //90 +221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, //90 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, //a0 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, //b0 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, //c0 - 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,NUM, 60, 56, //d0 + 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, //d0 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //e0 - 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,SYM, 42, 16, //f0 + 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, //f0 }; //Model Table: @@ -226,22 +226,18 @@ static const PRUint8 BulgarianLangModel[] = 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, }; -const SequenceModel Latin5BulgarianModel = -{ +const SequenceModel Latin5BulgarianModel( Latin5_BulgarianCharToOrderMap, BulgarianLangModel, - 64, (float)0.969392, PR_FALSE, "ISO-8859-5" -}; +); -const SequenceModel Win1251BulgarianModel = -{ +const SequenceModel Win1251BulgarianModel( win1251BulgarianCharToOrderMap, BulgarianLangModel, - 64, (float)0.969392, PR_FALSE, - "WINDOWS-1251" -}; + "windows-1251" +); diff --git a/PowerEditor/src/uchardet/LangModels/LangRussianModel.cpp b/PowerEditor/src/uchardet/LangCyrillicModel.cpp similarity index 80% rename from PowerEditor/src/uchardet/LangModels/LangRussianModel.cpp rename to PowerEditor/src/uchardet/LangCyrillicModel.cpp index a5320494..42f28876 100644 --- a/PowerEditor/src/uchardet/LangModels/LangRussianModel.cpp +++ b/PowerEditor/src/uchardet/LangCyrillicModel.cpp @@ -35,7 +35,7 @@ * * ***** END LICENSE BLOCK ***** */ -#include "../nsSBCharSetProber.h" +#include "nsSBCharSetProber.h" @@ -43,18 +43,18 @@ //Character Mapping Table: static const unsigned char KOI8R_CharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 -155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 - 67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70 +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70 191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, //80 207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, //90 223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, //a0 -238,239,240,241,242,243,244,245,246,247,248,249,250,251,NUM,SYM, //b0 +238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, //b0 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, //c0 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, //d0 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, //e0 @@ -63,18 +63,18 @@ SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 static const unsigned char win1251_CharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 -155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 - 67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70 +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70 191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,ILL,216,217,218,219,220,221,222, +207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, 223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, -239,240,241,242,243,244,245,246, 68,247,248,249,250,251,NUM,SYM, +239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, @@ -83,14 +83,14 @@ SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 static const unsigned char latin5_CharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 -155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 - 67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70 +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70 191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, 207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, 223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, @@ -98,39 +98,39 @@ SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, -239, 68,240,241,242,243,244,245,246,247,248,249,250,251,NUM,CTR, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, }; static const unsigned char macCyrillic_CharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 -155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 - 67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70 +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, 191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, 207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, 223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, -239,240,241,242,243,244,245,246,247,248,249,250,251,NUM, 68, 16, +239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,CTR, + 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, }; static const unsigned char IBM855_CharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 -155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 - 67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70 +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70 191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, 206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, @@ -138,19 +138,19 @@ SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, -250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,NUM,CTR, +250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, }; static const unsigned char IBM866_CharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 -155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 - 67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70 +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40 +155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50 +253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 + 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, @@ -158,7 +158,7 @@ SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60 207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, 223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, -239, 68,240,241,242,243,244,245,246,247,248,249,250,251,NUM,CTR, +239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, }; //Model Table: @@ -300,62 +300,50 @@ static const PRUint8 RussianLangModel[] = }; -const SequenceModel Koi8rRussianModel = -{ +const SequenceModel Koi8rModel( KOI8R_CharToOrderMap, RussianLangModel, - 64, (float)0.976601, PR_FALSE, "KOI8-R" -}; +); -const SequenceModel Win1251RussianModel = -{ +const SequenceModel Win1251Model( win1251_CharToOrderMap, RussianLangModel, - 64, (float)0.976601, PR_FALSE, - "WINDOWS-1251" -}; + "windows-1251" +); -const SequenceModel Latin5RussianModel = -{ +const SequenceModel Latin5Model( latin5_CharToOrderMap, RussianLangModel, - 64, (float)0.976601, PR_FALSE, "ISO-8859-5" -}; +); -const SequenceModel MacCyrillicRussianModel = -{ +const SequenceModel MacCyrillicModel( macCyrillic_CharToOrderMap, RussianLangModel, - 64, (float)0.976601, PR_FALSE, - "MAC-CYRILLIC" -}; + "x-mac-cyrillic" +); -const SequenceModel Ibm866RussianModel = -{ +const SequenceModel Ibm866Model( IBM866_CharToOrderMap, RussianLangModel, - 64, (float)0.976601, PR_FALSE, "IBM866" -}; +); -const SequenceModel Ibm855RussianModel = -{ +const SequenceModel Ibm855Model( IBM855_CharToOrderMap, RussianLangModel, - 64, (float)0.976601, PR_FALSE, "IBM855" -}; +); diff --git a/PowerEditor/src/uchardet/LangGreekModel.cpp b/PowerEditor/src/uchardet/LangGreekModel.cpp new file mode 100644 index 00000000..d90ced9d --- /dev/null +++ b/PowerEditor/src/uchardet/LangGreekModel.cpp @@ -0,0 +1,242 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsSBCharSetProber.h" +/**************************************************************** +255: Control characters that usually does not exist in any text +254: Carriage/Return +253: symbol (punctuation) that does not belong to word +252: 0 - 9 + +*****************************************************************/ + +//Character Mapping Table: +static const unsigned char Latin7_CharToOrderMap[] = +{ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, //50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, //70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //90 ++253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, //a0 +253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, //b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, //f0 +}; + + + +static const unsigned char win1253_CharToOrderMap[] = +{ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40 + 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, //50 +253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60 + 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, //70 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //80 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //90 ++253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, //a0 +253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, //b0 +110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0 + 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0 +124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0 + 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, //f0 +}; + +//Model Table: +//total sequences: 100% +//first 512 sequences: 98.2851% +//first 1024 sequences:1.7001% +//rest sequences: 0.0359% +//negative sequences: 0.0148% +static const PRUint8 GreekLangModel[] = +{ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, +2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, +2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, +2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, +0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, +3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, +3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, +2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, +2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, +0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, +0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, +0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, +0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, +0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, +0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, +0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, +0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, +0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, +0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, +0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, +0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, +0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, +0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, +0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, +0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, +0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, +0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, +0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, +0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, +0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, +0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, +0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, +0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, +0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + +const SequenceModel Latin7Model ( + Latin7_CharToOrderMap, + GreekLangModel, + (float)0.982851, + PR_FALSE, + "ISO-8859-7" +); + +const SequenceModel Win1253Model( + win1253_CharToOrderMap, + GreekLangModel, + (float)0.982851, + PR_FALSE, + "windows-1253" +); diff --git a/PowerEditor/src/uchardet/LangModels/LangHebrewModel.cpp b/PowerEditor/src/uchardet/LangHebrewModel.cpp similarity index 92% rename from PowerEditor/src/uchardet/LangModels/LangHebrewModel.cpp rename to PowerEditor/src/uchardet/LangHebrewModel.cpp index af9ac2b0..99a36e72 100644 --- a/PowerEditor/src/uchardet/LangModels/LangHebrewModel.cpp +++ b/PowerEditor/src/uchardet/LangHebrewModel.cpp @@ -37,14 +37,14 @@ * * ***** END LICENSE BLOCK ***** */ -#include "../nsSBCharSetProber.h" +#include "nsSBCharSetProber.h" /**************************************************************** -CTR: Control characters that usually does not exist in any text -RET: Carriage/Return -SYM: symbol (punctuation) that does not belong to word -NUM: 0 - 9 +255: Control characters that usually does not exist in any text +254: Carriage/Return +253: symbol (punctuation) that does not belong to word +252: 0 - 9 *****************************************************************/ @@ -52,22 +52,22 @@ NUM: 0 - 9 //Character Mapping Table: static const unsigned char win1255_CharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, //40 - 78,121, 86, 71, 67,102,107, 84,114,103,115,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, //60 - 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,SYM,SYM,SYM,SYM,SYM, //70 -124,ILL,203,204,205, 40, 58,206,207,208,ILL,210,ILL,ILL,ILL,ILL, -ILL, 83, 52, 47, 46, 72, 32, 94,216,113,ILL,109,ILL,ILL,ILL,ILL, +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, //40 + 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, //50 +253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, //60 + 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, //70 +124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, +215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, 106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, -238, 38, 45,239,240,241,242,243,127,ILL,ILL,ILL,ILL,ILL,ILL,ILL, +238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, - 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,ILL,ILL,128, 96,ILL, + 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, }; //Model Table: @@ -208,13 +208,10 @@ static const PRUint8 HebrewLangModel[] = 0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, }; -const SequenceModel Win1255Model = -{ +const SequenceModel Win1255Model( win1255_CharToOrderMap, HebrewLangModel, - 64, (float)0.984004, PR_FALSE, - "WINDOWS-1255" -}; + "windows-1255"); diff --git a/PowerEditor/src/uchardet/LangHungarianModel.cpp b/PowerEditor/src/uchardet/LangHungarianModel.cpp new file mode 100644 index 00000000..856644af --- /dev/null +++ b/PowerEditor/src/uchardet/LangHungarianModel.cpp @@ -0,0 +1,238 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsSBCharSetProber.h" +/**************************************************************** +255: Control characters that usually does not exist in any text +254: Carriage/Return +253: symbol (punctuation) that does not belong to word +252: 0 - 9 + +*****************************************************************/ + +//Character Mapping Table: +static const unsigned char Latin2_HungarianCharToOrderMap[] = +{ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, +175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, + 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, + 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, +245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, +}; + +static const unsigned char win1250HungarianCharToOrderMap[] = +{ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, + 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, +253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, + 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, +161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, +177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, +191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, + 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, +221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, +232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, + 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, +245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, +}; + +//Model Table: +//total sequences: 100% +//first 512 sequences: 94.7368% +//first 1024 sequences:5.2623% +//rest sequences: 0.8894% +//negative sequences: 0.0009% +static const PRUint8 HungarianLangModel[] = +{ +0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, +3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, +3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, +0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, +3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, +3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, +2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, +0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, +3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, +1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, +1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, +3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, +2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, +2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, +2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, +2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, +2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, +1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, +1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, +3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, +1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, +1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, +2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, +2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, +2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, +3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, +2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, +1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, +1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, +2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, +1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, +2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, +1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, +1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, +2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, +2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, +1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, +1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, +0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, +2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, +1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, +2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, +1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, +1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, +2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, +2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, +2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, +1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, +2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, +0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, +2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, +}; + +const SequenceModel Latin2HungarianModel( + Latin2_HungarianCharToOrderMap, + HungarianLangModel, + (float)0.947368, + PR_TRUE, + "ISO-8859-2"); + +const SequenceModel Win1250HungarianModel( + win1250HungarianCharToOrderMap, + HungarianLangModel, + (float)0.947368, + PR_TRUE, + "windows-1250"); diff --git a/PowerEditor/src/uchardet/LangModels/LangArabicModel.cpp b/PowerEditor/src/uchardet/LangModels/LangArabicModel.cpp deleted file mode 100644 index ba404ceb..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangArabicModel.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Arabic *********/ - -/** - * Generated by BuildLangModel.py - * On: 2015-12-13 18:33:58.848027 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Iso_8859_6_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 52, 72, 61, 68, 74, 69, 59, 78, 60, 90, 86, 67, 65, 71, 75, /* 4X */ - 64, 85, 76, 55, 57, 79, 81, 70, 82, 87, 91,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 37, 58, 49, 47, 38, 54, 66, 46, 39, 88, 63, 45, 51, 43, 40, /* 6X */ - 62, 89, 42, 44, 41, 50, 77, 73, 83, 56, 80,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,ILL,ILL,ILL,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,SYM,ILL,ILL, /* AX */ - ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,ILL,ILL,ILL,SYM, /* BX */ - ILL, 32, 34, 15, 35, 22, 31, 0, 9, 8, 7, 27, 19, 18, 25, 11, /* CX */ - 30, 5, 26, 12, 21, 23, 28,SYM, 33, 10, 29,ILL,ILL,ILL,ILL,ILL, /* DX */ - 36, 13, 14, 17, 1, 3, 6, 16, 4, 24, 2,SYM,SYM,SYM,SYM,SYM, /* EX */ - SYM,SYM,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Windows_1256_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 52, 72, 61, 68, 74, 69, 59, 78, 60, 90, 86, 67, 65, 71, 75, /* 4X */ - 64, 85, 76, 55, 57, 79, 81, 70, 82, 87, 91,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 37, 58, 49, 47, 38, 54, 66, 46, 39, 88, 63, 45, 51, 43, 40, /* 6X */ - 62, 89, 42, 44, 41, 50, 77, 73, 83, 56, 80,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM, 48,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 95,SYM, 96, 92, 97, 98, /* 8X */ - 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 84,SYM, 99,SYM,100,SYM,SYM,101, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,102,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 103, 32, 34, 15, 35, 22, 31, 0, 9, 8, 7, 27, 19, 18, 25, 11, /* CX */ - 30, 5, 26, 12, 21, 23, 28,SYM, 20, 33, 10, 29, 36, 13, 14, 17, /* DX */ - 104, 1, 93, 3, 6, 16, 4,105,106, 94,107,108, 24, 2,109,110, /* EX */ - SYM,SYM,SYM,SYM,111,SYM,SYM,SYM,SYM,112,SYM,113,114,SYM,SYM,115, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 1479 - * First 512 sequences: 0.9696025116913417 - * Next 512 sequences (512-1024): 0.029166911858880054 - * Rest: 0.0012305764497782395 - * Negative sequences: TODO - */ -static const PRUint8 ArabicLangModel[] = -{ - 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,3,1,3,3,3,3,2,2,3, - 3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 1,2,3,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,1,3,3,3,3,2,2,3, - 2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,3,2,3,3,3,2,2,2,2, - 0,2,1,3,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,3,2,3,3,2,3, - 1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,0,3,2,2,3,2,2,2,3,2, - 0,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,2,2, - 0,3,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,0,0,0,0,0,1,3,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,2,2,1,2,2,2,2,2,2,2, - 1,2,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,0,2,2,3,0,3,2,0,3,3,3,0,2,0, - 0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,0,2,0,0,3,3,2,3,0,2,0,2, - 2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,3,3,1,0,0,2,2,0,1,0,1,0,1, - 0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,2,2,1,3,2,2,2, - 1,3,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,0,2,1,3,2,0,3,2,0,2,0,3,0,2,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,2,3,2,3,2,3,2,2, - 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,1,3,2,1,2,0,2,2,0,3,2,2,0,0,2,0,2,1,2,0,3,0, - 0,1,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,2,2,2,2,2,2,1,0,2,3,3,0,1,3,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,2,1,3,3,3,3,0,2,3,0,3,2,2,0,3,2,0,3,2,3,0,2,0, - 0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,1,2,1,0,1,0,0,1,0,3,2,0,2,2,2, - 0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,2,3,3,2,2,3,2,3,2,2,0,2,1,2,1,1,0,2,1,0,0,0,1,0,2, - 1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,3,2,3,2,3,3,2,1,2,2,2,3,3,2,2,2,0,0,0,2,3,1,0,0,2,1,2, - 0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,1,2,3,2,0,2,3,3,3,2,3,0,2,2,2,3,2,2,0,3,0,2,2,2,3,2,3,1, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,0,2,1,3,0,2,0,0,2,2,2,0,0,0,2,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0, - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,2,3,2,3,2,2,0,0,2,0,0,1,3,2,0,3,0,1,2,0,2,0,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,2,3,3,2,2,0,2,2,1,2,2,2,2,0,0,0,0,1,2,2,0,0,1,0,2, - 2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,2,2,1,1,2,3,1,2,2,0,0,0,0,0,0,1,0,0,2,0,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,2,3,2,3,2,0,2,0,1,2,0,2,1,2,0,0,0,2,2,0,0,0,2,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,2,3,2,1,2,2,2,0,0,2,0,0,2,2,1,0,2,1,0,2,0,2,0,2,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,2,2,2,2,2,2,0,0,0,2,2,0,3,3,0,2,0,0,0,0,2,2,0,0,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,2,2,3,2,2,2,2,2,2,0,2,2,2,2,2,2,0,1,0,1,2,0,1,1,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,1,1,1,0,0,2,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,3,2,2,1,2,3,2,0,0,0,2,0,0,3,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,2,2,2,3,2,2,0,2,0,2,2,2,2,0,1,2,1,1,0,2,0,1,0,3,1,2,0,1,2,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,3,2,1,2,1,1,0,2,2,0,2,0,2,2,0,0,0,2,0,0,2,2,1,2,0,0,0,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0, - 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,2,2,1,2,2,2,2,2,1,2,0,2,1,2,0,0,1,0,1,0,1,0,0,0,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,1,1,2,2,2,2,2,0,2,0,2,1,2,0,0,1,0,0,0,2,0,0,0,1,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,1,2,2,2,2,2,2,0,2,0,2,1,2,0,0,1,0,0,0,1,0,0,0,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,1,2,2,2,2,2,1,1,2,0,2,2,2,0,0,2,0,0,0,1,0,0,0,2,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,2,1,2,2,2,1,0,1,1,1,0,0,0,0,2,0,2,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,1,2,2,2,0,1,0,2,1,2,0,0,0,0,2,0,1,0,0,0,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,0,1,2,1,1,2,0,2,1,0,0,0,1,0,1,0,0,0,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,1,2,0,0,2,1,2,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,1,0,0,1,2,0,2,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0, - 2,2,1,0,2,2,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,1,0,0,1,2,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,1,1,0,2,2,2,2,1,0,2,0,1,0,2,0,0,0,0,0,0,2,0,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,2,0,0,0,0,0,1,0, - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,2,2,2,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,1, - 2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,1,0,2,2,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,1,0,0,1,2,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,2,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,0,2,0,2,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,1,0,2,1,1,0,0,0,0,0,0,1,0,0,2,0,1,0,2,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, -}; - - -const SequenceModel Iso_8859_6ArabicModel = -{ - Iso_8859_6_CharToOrderMap, - ArabicLangModel, - 64, - (float)0.9696025116913417, - PR_FALSE, - "ISO-8859-6" -}; - -const SequenceModel Windows_1256ArabicModel = -{ - Windows_1256_CharToOrderMap, - ArabicLangModel, - 64, - (float)0.9696025116913417, - PR_FALSE, - "WINDOWS-1256" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangDanishModel.cpp b/PowerEditor/src/uchardet/LangModels/LangDanishModel.cpp deleted file mode 100644 index 46b6f289..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangDanishModel.cpp +++ /dev/null @@ -1,198 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Danish *********/ - -/** - * Generated by BuildLangModel.py - * On: 2016-02-19 17:56:42.163975 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Iso_8859_15_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */ - 17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */ - 17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 53, 42,SYM,SYM, 54,SYM,SYM,SYM, 55, 56, 57,SYM, /* BX */ - 58, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 59, 34, 60, 50, /* CX */ - 43, 47, 51, 36, 52, 61, 30,SYM, 19, 62, 37, 44, 31, 46, 63, 48, /* DX */ - 64, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 65, 34, 66, 50, /* EX */ - 43, 47, 51, 36, 52, 67, 30,SYM, 19, 68, 37, 44, 31, 46, 69, 70, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_1_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */ - 17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */ - 17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 71, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 72, 34, 73, 50, /* CX */ - 43, 47, 51, 36, 52, 74, 30,SYM, 19, 75, 37, 44, 31, 46, 76, 48, /* DX */ - 77, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 78, 34, 79, 50, /* EX */ - 43, 47, 51, 36, 52, 80, 30,SYM, 19, 81, 37, 44, 31, 46, 82, 83, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Windows_1252_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */ - 17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */ - 17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 85,ILL, 86,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 87,ILL, 88, 89, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 90, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 91, 34, 92, 50, /* CX */ - 43, 47, 51, 36, 52, 93, 30,SYM, 19, 94, 37, 44, 31, 46, 95, 48, /* DX */ - 96, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 97, 34, 98, 50, /* EX */ - 43, 47, 51, 36, 52, 99, 30,SYM, 19,100, 37, 44, 31, 46,101,102, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 964 - * First 512 sequences: 0.9968082796759031 - * Next 512 sequences (512-1024): 0.0031917203240968304 - * Rest: 3.903127820947816e-17 - * Negative sequences: TODO - */ -static const PRUint8 DanishLangModel[] = -{ - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,3,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,3,2,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,3,2, - 3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,2,3,2,3,3,3,3,3,2,2,2,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,0, - 3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,2,2,2,2,3,3,3,2,2,0,0,2,0, - 3,3,3,3,3,3,3,2,3,3,2,2,2,2,2,3,3,2,2,3,3,3,3,3,2,2,0,0,2,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,3,0,2,2,3,2,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,2,2,0,2,0,2,0, - 3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,3,2,2,3,3,3,3,3,2,3,2,2,2,0, - 3,3,3,3,2,2,3,3,3,2,3,3,3,2,3,3,0,2,2,2,2,0,0,3,0,0,2,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,0,0,0,2,2,2,0,0,0, - 3,3,3,3,2,0,3,3,3,2,3,3,2,2,3,3,0,2,2,2,0,0,0,0,0,0,0,0,0,0, - 2,3,3,3,0,3,3,3,3,2,3,3,3,3,3,3,2,2,2,0,0,0,0,0,2,0,0,0,0,0, - 3,3,2,3,3,3,3,3,3,3,2,2,2,2,2,2,3,2,2,3,3,2,3,2,2,0,0,0,0,0, - 3,3,2,3,3,3,2,2,3,3,2,3,2,2,0,2,3,2,3,0,3,0,0,2,3,2,2,0,2,2, - 3,2,2,2,3,3,2,2,2,3,0,2,2,2,0,2,2,0,2,0,2,0,0,0,2,2,2,0,0,0, - 3,2,2,2,3,3,2,2,0,3,0,2,2,0,0,2,2,2,2,2,2,0,0,2,2,0,2,0,0,0, - 3,2,0,2,2,3,2,0,2,2,0,0,2,2,2,2,2,2,2,2,0,0,0,0,2,2,0,0,2,0, - 2,3,2,2,2,0,2,2,2,2,2,2,2,0,2,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0, - 0,0,0,0,3,2,2,2,2,2,0,0,0,0,2,2,3,0,2,0,0,0,0,0,0,0,0,0,0,2, -}; - - -const SequenceModel Iso_8859_15DanishModel = -{ - Iso_8859_15_CharToOrderMap, - DanishLangModel, - 30, - (float)0.9968082796759031, - PR_TRUE, - "ISO-8859-15" -}; - -const SequenceModel Iso_8859_1DanishModel = -{ - Iso_8859_1_CharToOrderMap, - DanishLangModel, - 30, - (float)0.9968082796759031, - PR_TRUE, - "ISO-8859-1" -}; - -const SequenceModel Windows_1252DanishModel = -{ - Windows_1252_CharToOrderMap, - DanishLangModel, - 30, - (float)0.9968082796759031, - PR_TRUE, - "WINDOWS-1252" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangEsperantoModel.cpp b/PowerEditor/src/uchardet/LangModels/LangEsperantoModel.cpp deleted file mode 100644 index 4993abcd..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangEsperantoModel.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Esperanto *********/ - -/** - * Generated by BuildLangModel.py - * On: 2015-12-04 01:27:38.177516 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Iso_8859_3_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 7, 13, 4, 1, /* 4X */ - 14, 32, 5, 8, 6, 12, 16, 27, 33, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 7, 13, 4, 1, /* 6X */ - 14, 32, 5, 8, 6, 12, 16, 27, 33, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 56,SYM,SYM,SYM,ILL, 34,SYM,SYM, 57, 53, 58, 28,SYM,ILL, 40, /* AX */ - SYM, 59,SYM,SYM,SYM,SYM, 34,SYM,SYM, 60, 53, 61, 28,SYM,ILL, 40, /* BX */ - 44, 29, 46,ILL, 43, 62, 24, 38, 41, 31, 48, 50, 54, 35, 49, 52, /* CX */ - ILL, 42, 63, 30, 47, 64, 36,SYM, 22, 51, 39, 55, 37, 23, 26, 45, /* DX */ - 44, 29, 46,ILL, 43, 65, 24, 38, 41, 31, 48, 50, 54, 35, 49, 52, /* EX */ - ILL, 42, 66, 30, 47, 67, 36,SYM, 22, 51, 39, 55, 37, 23, 26,SYM, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 989 - * First 512 sequences: 0.9942980632768038 - * Next 512 sequences (512-1024): 0.0057019367231962385 - * Rest: -5.0306980803327406e-17 - * Negative sequences: TODO - */ -static const PRUint8 EsperantoLangModel[] = -{ - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,2,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,0,0,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,2,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,3,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,3,3,3,2,2,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,3,3,0,0,2,3,2,2,2,3,3,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,0,3,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,0,0,3,0,2,0,3,2,3,2,2,0, - 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,2,3,3,3,0,0,0,3,2,0,2,3,2,2,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,2,2,2,3,3,0,0,2,3,0,3,2,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,2,0,2,2,2,2,2,2,0,0,0,0,0,0,3,3,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,3,2,0,0,0,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,2,3,3,3,2,0,0,0,2,3,2,2,0,3,2,2,0,0,0, - 3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,2,0,2,2,2,2,3,0,0,0,2,2,0,0,3,2,2,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,0,3,3,2,2,3,2,2,2,2,3,0,2,2,3,2,2,2,2,2,3,0,2,0, - 3,3,3,3,2,3,2,2,2,2,2,3,3,2,2,2,0,0,2,0,2,2,0,0,2,2,0,0,0,3,2,2,0,0,0, - 3,3,3,3,0,3,3,3,3,3,2,0,3,2,2,2,0,3,2,2,3,3,0,0,0,3,0,0,0,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,3,2,0,2,0,0,0,3,2,0,0,3,3,3,0,0,0, - 3,3,3,3,0,3,3,3,2,2,2,2,3,3,2,3,2,0,2,3,0,0,0,0,0,2,0,0,0,0,0,2,0,3,0, - 3,3,3,3,3,2,2,3,3,3,2,2,3,2,2,2,2,3,3,2,2,0,0,0,0,3,2,2,0,2,2,2,2,0,0, - 3,3,3,3,3,3,3,3,2,2,2,0,3,3,2,0,2,0,2,2,0,2,0,0,0,2,0,2,0,2,2,2,0,2,0, - 3,3,3,3,0,0,2,3,0,0,2,2,3,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,0,2,2,3,2,0,0,2,0,3,0,0,0,0,0,0,0,0, - 3,3,3,3,0,0,2,2,0,2,3,2,3,3,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,2,0,2,0,0,0, - 3,3,3,3,2,2,3,2,0,2,0,2,3,2,2,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,2,2,2,3,2,0,0,2,0,0,0,0,0,0,2,0,2,0,0,0,2,0,3,0,0,2,0,0,0,0, - 3,3,2,2,2,2,0,2,0,2,0,0,3,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,3,3,3,3,3,2,3,0,0,2,2,2,2,3,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,3,3,2,2,2,2,2,2,0,0,2,2,2,0,2,2,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0, - 2,2,2,0,3,3,3,3,3,2,2,0,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0, - 2,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,3,0,0,2,2,0,0,0,0,2,2,2,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0, - 3,3,3,2,2,0,2,0,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -}; - - -const SequenceModel Iso_8859_3EsperantoModel = -{ - Iso_8859_3_CharToOrderMap, - EsperantoLangModel, - 35, - (float)0.9942980632768038, - PR_FALSE, - "ISO-8859-3" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangFrenchModel.cpp b/PowerEditor/src/uchardet/LangModels/LangFrenchModel.cpp deleted file mode 100644 index 4c05498a..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangFrenchModel.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: French *********/ - -/** - * Generated by BuildLangModel.py - * On: 2015-12-03 21:10:27.685575 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Windows_1252_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 56,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 57,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 58, 59, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 24, 38, 32, 46, 49, 61, 47, 27, 23, 14, 28, 41, 62, 39, 33, 36, /* CX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 63, /* DX */ - 24, 38, 32, 46, 49, 64, 47, 27, 23, 14, 28, 41, 65, 39, 33, 36, /* EX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 66, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_1_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 67,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 24, 38, 32, 46, 49, 68, 47, 27, 23, 14, 28, 41, 69, 39, 33, 36, /* CX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 70, /* DX */ - 24, 38, 32, 46, 49, 71, 47, 27, 23, 14, 28, 41, 72, 39, 33, 36, /* EX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 73, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_15_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 74, 75,SYM,SYM, 76,SYM,SYM,SYM, 35, 35, 77,SYM, /* BX */ - 24, 38, 32, 46, 49, 78, 47, 27, 23, 14, 28, 41, 79, 39, 33, 36, /* CX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 80, /* DX */ - 24, 38, 32, 46, 49, 81, 47, 27, 23, 14, 28, 41, 82, 39, 33, 36, /* EX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 83, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 914 - * First 512 sequences: 0.997057879992383 - * Next 512 sequences (512-1024): 0.002942120007616917 - * Rest: 3.8163916471489756e-17 - * Negative sequences: TODO - */ -static const PRUint8 FrenchLangModel[] = -{ - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,0,0,0,2,0,2,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,3,3,0,0,3,0,0,2,3,0,0,0,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,2,2,3,0,0,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,2,3,2,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,0,2,3,2,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,2,3,3,3,0,2,0,0,0, - 3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,3,2,0,2,0,3,3,2,3,2,0,0,0,0,0, - 3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,3,2,3,0,0,2,2,2,2,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,0,0,3,3,0,0,2,3,0,3,3, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,3,3,2,3,3,2,0,0,0,0,0,2,0, - 3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,0,0,3,3,0,3,0,0,2,2,3,2,2,2,3,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,3,3,0,3,3,0,0,3,0,2,2,2,3,2,0,0,2,0,0, - 3,3,3,2,3,3,3,3,3,3,2,2,3,2,3,0,0,2,2,3,0,0,3,3,0,0,2,2,3,2,2,3,2,0,0,0,0,0, - 3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,0,2,3,2,0,0,3,3,0,2,2,0,3,0,2,2,3,0,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,3,2,2,0,3,0,0,2,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,2,3,3,0,2,3,3,0,0,0,0,2,0,2,0,2,0,0,0,0,0, - 3,2,3,2,3,3,0,2,3,3,0,0,0,2,3,0,2,2,0,0,0,0,2,3,0,0,2,0,3,0,0,0,0,0,0,2,0,0, - 3,3,3,2,3,3,3,3,3,3,2,2,2,3,3,2,0,3,0,0,0,0,0,3,0,2,0,0,3,0,0,0,0,0,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,2,0,3,2,0,0,3,2,0,3,0,0,0,0,0,0,3,2,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,0,2,0,3,3,0,0,2,2,0,0,0,3,3,0,2,2,0,2,2,2,3,3,0,0,2,0,0, - 0,0,2,0,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,3,0,3,2,3,2,2,3,3,2,3,0,3,2,2,2,2,3,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,2,2,0,3,2,0,0,2,2,0,0,0,0,0,0,0, - 0,3,0,3,0,3,3,3,0,0,3,3,2,3,0,3,3,2,3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,2,3,2,2,2,3,3,2,2,2,2,3,0,0,0,0,0,0,0,0,0,3,2,0,0,0,0,0,0,2,0,0,0,0,0, - 3,3,3,2,3,3,2,3,3,3,0,0,2,3,2,2,2,2,2,3,0,0,3,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0, - 0,0,3,0,0,0,0,0,3,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,0,3,2,0,0,0,3,0,3,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,2,3,2,0,2,3,3,0,2,0,2,2,2,0,0,2,2,2,0,3,0,0,0,2,0,0,3,2,0,0,0,0,0,0,0, - 3,2,3,2,3,2,2,2,3,2,0,2,0,0,2,0,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0, - 0,2,0,3,0,0,3,3,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,2,2,0,3,3,0,0,0,3,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,0,0,3,3,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,3,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,0,2,0,2,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,0,2,2,3,0,0,2,2,0,2,0,2,0,2,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -}; - - -const SequenceModel Windows_1252FrenchModel = -{ - Windows_1252_CharToOrderMap, - FrenchLangModel, - 38, - (float)0.997057879992383, - PR_TRUE, - "WINDOWS-1252" -}; - -const SequenceModel Iso_8859_1FrenchModel = -{ - Iso_8859_1_CharToOrderMap, - FrenchLangModel, - 38, - (float)0.997057879992383, - PR_TRUE, - "ISO-8859-1" -}; - -const SequenceModel Iso_8859_15FrenchModel = -{ - Iso_8859_15_CharToOrderMap, - FrenchLangModel, - 38, - (float)0.997057879992383, - PR_TRUE, - "ISO-8859-15" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangGermanModel.cpp b/PowerEditor/src/uchardet/LangModels/LangGermanModel.cpp deleted file mode 100644 index 7a2436b8..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangGermanModel.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: German *********/ - -/** - * Generated by BuildLangModel.py - * On: 2015-12-03 22:50:46.518374 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Windows_1252_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 4X */ - 18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 6X */ - 18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 59,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 54,ILL, 42,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 54,ILL, 42, 56, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* CX */ - 53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 61, 24, 45, 62, 27, /* DX */ - 41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* EX */ - 53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 63, 24, 45, 64, 56, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_1_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 4X */ - 18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 6X */ - 18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* CX */ - 53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 66, 24, 45, 67, 27, /* DX */ - 41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* EX */ - 53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 68, 24, 45, 69, 56, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 1188 - * First 512 sequences: 0.9934041448127945 - * Next 512 sequences (512-1024): 0.006482829516922903 - * Rest: 0.0001130256702826099 - * Negative sequences: TODO - */ -static const PRUint8 GermanLangModel[] = -{ - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,3,3,2,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,0,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,0,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,3,3,3,0,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,2,2,3,2,3,3,2,0,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,3,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,3,3,1,2, - 3,3,2,3,2,3,3,3,2,3,3,3,3,2,2,2,3,2,2,2,2,2,2,2,1,3,2,0,1,2,3, - 3,3,2,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,2,2,2,3,2,3,3,3,0,0,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,2,3,2,3,3,2,0,2,2,1, - 3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,2,2,3,2,3,3,3,0,0,2,0, - 3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,2,3,3,3,2,2,2,3,2,3,3,3,0,1,2,1, - 3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,3,2,2,2,2,3,2,3,2,3,0,0,2,0, - 3,3,2,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,2,3,2,2,2,2,0,0,2,0, - 3,3,3,3,3,3,2,2,2,2,3,3,1,2,2,2,2,2,2,2,2,2,3,3,3,2,3,0,0,0,0, - 3,2,2,3,3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,3,3,2,2,2,3,3,3,0,0,2,2, - 3,2,2,3,2,3,2,0,2,2,2,3,1,2,2,2,2,2,2,2,2,2,2,1,0,2,3,0,0,2,1, - 2,3,3,3,3,2,3,3,3,3,3,2,3,3,3,2,2,3,2,0,2,2,0,0,0,0,0,2,0,0,2, - 3,2,2,3,2,3,2,2,2,2,3,3,2,2,2,1,2,1,2,0,2,0,3,2,3,2,2,0,0,2,0, - 2,3,3,0,3,1,3,3,3,3,0,0,3,2,3,3,2,2,2,1,1,0,0,0,0,0,0,2,0,0,0, - 3,3,3,2,3,3,2,2,2,3,2,3,3,3,2,2,3,2,3,2,2,2,0,2,2,2,1,0,0,1,0, - 2,3,3,2,3,0,3,3,2,3,0,1,3,3,3,2,2,3,2,2,2,2,0,0,0,0,1,3,1,0,0, - 3,2,2,3,2,2,3,2,1,2,2,2,0,2,2,3,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0, - 3,1,2,3,1,3,3,2,1,2,2,2,2,0,0,2,2,2,3,2,0,2,0,0,0,2,0,0,2,2,0, - 2,3,2,0,2,2,2,2,2,2,2,2,2,2,2,3,2,2,2,1,2,2,0,2,0,0,0,0,0,0,2, - 0,1,0,2,0,2,0,0,0,0,3,2,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, -}; - - -const SequenceModel Windows_1252GermanModel = -{ - Windows_1252_CharToOrderMap, - GermanLangModel, - 31, - (float)0.9934041448127945, - PR_TRUE, - "WINDOWS-1252" -}; - -const SequenceModel Iso_8859_1GermanModel = -{ - Iso_8859_1_CharToOrderMap, - GermanLangModel, - 31, - (float)0.9934041448127945, - PR_TRUE, - "ISO-8859-1" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangGreekModel.cpp b/PowerEditor/src/uchardet/LangModels/LangGreekModel.cpp deleted file mode 100644 index 499affe7..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangGreekModel.cpp +++ /dev/null @@ -1,229 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Greek *********/ - -/** - * Generated by BuildLangModel.py - * On: 2016-05-25 15:21:50.073117 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Windows_1253_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */ - 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */ - 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */ - SYM,SYM, 17,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 62,SYM,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */ - 55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */ - 11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */ - 61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */ - 11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_7_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */ - 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */ - 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */ - 55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */ - 11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */ - 61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */ - 11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 1579 - * First 512 sequences: 0.958419074626211 - * Next 512 sequences (512-1024): 0.03968891876305471 - * Rest: 0.0018920066107342773 - * Negative sequences: TODO - */ -static const PRUint8 GreekLangModel[] = -{ - 1,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,1,2, - 3,3,3,3,3,1,3,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,2, - 2,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,2,3,2,3,1,2, - 3,3,3,3,3,2,2,0,2,0,0,0,0,0,0,0,0,1,0,0,1,0,2, - 3,3,2,3,2,3,3,3,2,3,3,1,3,2,2,3,3,3,2,3,0,3,3, - 2,2,2,2,2,3,3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,1,3,3,3,3,3,3,2, - 3,1,3,3,2,3,3,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,2, - 3,3,3,3,3,3,2,3,2,2,3,1,2,2,2,3,3,3,3,3,3,3,3, - 2,2,1,3,2,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,2,3,1,3,3,1, - 3,3,3,3,3,2,2,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,2, - 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3, - 3,3,2,3,2,3,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,1, - 3,3,3,3,2,3,2,3,3,0,3,3,3,3,2,3,3,3,2,3,2,3,3, - 3,3,2,2,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,2,3,2,3,2,3,2,3,3,3,3,1,3,3,3,3, - 2,3,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0, - 1,1,0,1,1,1,0,1,1,0,2,1,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, - 1,1,3,0,3,2,3,3,3,3,0,3,0,3,3,1,0,0,3,1,2,0,0, - 2,1,1,3,2,0,0,0,2,0,0,1,0,0,0,0,0,0,1,0,0,0,2, - 3,3,3,3,2,3,3,2,1,1,3,2,3,1,3,3,3,3,1,3,0,3,3, - 1,2,1,1,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,2,3,2,3,3,3,3,2,3,0,3,3,2,2,3,3,2,3,1,2, - 3,0,3,3,2,1,3,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,2, - 3,3,1,3,2,3,1,2,1,2,3,3,2,3,1,3,3,3,1,3,1,3,3, - 1,2,3,0,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,3,3,2,3,1,2,2,2,3,2,3,3,3,3,3,3,2,3,2,3,3, - 2,3,2,2,2,3,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1, - 3,3,3,1,3,3,3,3,3,3,2,3,0,3,3,0,0,0,3,0,3,3,0, - 3,0,2,3,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 2,2,3,2,3,3,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0, - 3,1,2,2,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 2,2,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0, - 3,0,3,3,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,0,3,3,3,3,0,3,0,3,0,2,3,3,3,3,3,3,3,2,3,3, - 2,2,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,3,0, - 3,0,3,3,3,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,1,3,2,3,3,1,0,0,3,0,3,1,0,3,3,3,0,3,0,3,3, - 0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,3,2,3,1,3,3,2,3,1,3,1,3,2,2,1,2,3,1,2,0,2, - 2,0,3,3,2,1,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,3,1,3,1,3,3,3,3,1,2,0,3,3,0,0,0,2,0,2,1,0, - 2,0,1,3,2,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 3,3,3,3,3,3,3,1,0,1,3,1,2,2,2,3,2,3,0,3,0,3,3, - 0,2,1,3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,2,3,3,3,3,2,3,2,3,0,3,3,0,0,0,3,0,2,1,0, - 2,0,2,3,2,0,2,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,2, - 3,3,1,3,2,3,3,1,1,1,2,1,2,0,3,3,3,3,2,3,2,2,2, - 0,2,2,0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,3,3,3,1,1,0,3,0,3,3,3,2,2,3,1,3,0,2,3, - 0,2,0,0,1,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,1,3,3,3,2,0,3,1,3,1,2,3,3,3,2,3,0,3,3, - 0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,3,2,3,0,3,3,2,3,2,3,0,3,2,0,0,0,1,0,2,1,0, - 1,0,2,2,1,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,1,3,1,3,1,1,1,0,2,0,2,2,1,2,2,2,1,2,0,3,2, - 0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,1,2,2,2,3,3,2,3,2,2,2,2,2,2,0, - 3,3,1,3,1,3,0,0,1,0,3,1,2,1,1,2,2,3,1,2,0,2,2, - 0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0, - 0,0,0,1,1,0,0,2,0,2,2,1,3,3,3,2,3,2,2,2,2,2,0, - 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,1,0,0,0,0,2,0,3,2,3,2,3,3,3,2,2,3,1,2,2,0, - 0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0, - 0,1,0,1,0,0,0,2,0,2,2,2,3,3,2,2,2,2,2,2,2,2,0, - 0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,0,0,0,3,0,3,3,3,2,2,2,2,2,2,2,1,2,2,0, - 0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,3,2,2,1,2,2,2,2,3,2,1,2,1,0, - 1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,0,0,0,0,1,3,0,3,3,3,2,1,2,2,2,1,1,3,2,2,0, - 0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,2,1,1,3,2,2,1,2,2,2,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,3,3,2,1,1,2,2,2,2,1,1,2,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,2,2,2,2,1,1,2,2,1,2,1,2,1,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,2,2,1,2,1,2,2,2,3,2,1,0, - 0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,2,2,2,2,2,2,1,2,1,1,1,2,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,2,0,2,2,1,2,2,2,2,2,2,2,1,1,2,0, - 1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, - 0,0,0,0,0,0,0,3,0,2,2,2,1,1,1,2,2,1,1,1,2,2,0, - 2,2,0,2,0,3,0,0,0,0,3,0,2,0,0,2,1,1,0,1,0,1,2, - 0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -}; - - -const SequenceModel Windows_1253GreekModel = -{ - Windows_1253_CharToOrderMap, - GreekLangModel, - 46, - (float)0.958419074626211, - PR_FALSE, - "WINDOWS-1253" -}; - -const SequenceModel Iso_8859_7GreekModel = -{ - Iso_8859_7_CharToOrderMap, - GreekLangModel, - 46, - (float)0.958419074626211, - PR_FALSE, - "ISO-8859-7" -}; diff --git a/PowerEditor/src/uchardet/LangModels/LangHungarianModel.cpp b/PowerEditor/src/uchardet/LangModels/LangHungarianModel.cpp deleted file mode 100644 index 54708b2e..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangHungarianModel.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Hungarian *********/ - -/** - * Generated by BuildLangModel.py - * On: 2015-12-12 18:02:46.730481 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Iso_8859_2_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 4X */ - 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 6X */ - 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 55,SYM, 42,SYM, 56, 46,SYM,SYM, 37, 52, 57, 58,SYM, 48, 59, /* AX */ - SYM, 60,SYM, 42,SYM, 61, 46,SYM,SYM, 37, 52, 62, 63,SYM, 48, 64, /* BX */ - 65, 11, 40, 36, 35, 66, 38, 39, 41, 14, 50, 67, 53, 28, 45, 68, /* CX */ - 49, 43, 54, 26, 69, 27, 25,SYM, 44, 70, 30, 31, 29, 47, 51, 71, /* DX */ - 72, 11, 40, 36, 35, 73, 38, 39, 41, 14, 50, 74, 53, 28, 45, 75, /* EX */ - 49, 43, 54, 26, 76, 27, 25,SYM, 44, 77, 30, 31, 29, 47, 51,SYM, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Windows_1250_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 4X */ - 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 6X */ - 21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 37,SYM, 46, 78, 48, 79, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 37,SYM, 46, 80, 48, 81, /* 9X */ - SYM,SYM,SYM, 42,SYM, 82,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM, 83, /* AX */ - SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM, 84, 52,SYM, 85,SYM, 86, 87, /* BX */ - 88, 11, 40, 36, 35, 89, 38, 39, 41, 14, 50, 90, 53, 28, 45, 91, /* CX */ - 49, 43, 54, 26, 92, 27, 25,SYM, 44, 93, 30, 31, 29, 47, 51, 94, /* DX */ - 95, 11, 40, 36, 35, 96, 38, 39, 41, 14, 50, 97, 53, 28, 45, 98, /* EX */ - 49, 43, 54, 26, 99, 27, 25,SYM, 44,100, 30, 31, 29, 47, 51,SYM, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 1084 - * First 512 sequences: 0.9748272224933486 - * Next 512 sequences (512-1024): 0.024983863604162403 - * Rest: 0.0001889139024889644 - * Negative sequences: TODO - */ -static const PRUint8 HungarianLangModel[] = -{ - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,1,0,2,2,0,0, - 3,2,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,2,2,1,2,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,2,2,3,3,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,2,3,2,2,3,3,3,3,3,2, - 3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,2, - 3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,3,3,3,2,3,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,3,3,2,3,3,2,3,0,2,2,2,2, - 3,2,3,3,3,3,3,2,2,3,3,2,3,3,0,3,3,3,2,3,3,3,2,3,3,0,2,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,2,2,3,2,2,2,2,2,3,3,2,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,3,3,3,3,3,2,2, - 1,2,3,3,3,3,3,3,2,3,3,0,3,3,2,3,3,3,2,2,2,3,3,3,2,0,0,0,2,0,0,0, - 3,3,3,2,3,2,2,3,3,2,3,3,3,2,3,3,2,2,2,3,2,3,2,2,2,2,3,2,2,2,2,3, - 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,3,3,2,3,2,2,3,3,2,3,2,2,2, - 0,1,3,3,3,3,3,2,2,3,3,0,3,3,2,3,3,3,0,0,2,3,2,3,0,0,0,0,0,2,0,0, - 3,3,2,3,3,3,2,3,3,2,2,3,2,1,3,3,3,2,2,3,1,2,2,2,2,2,3,3,3,2,2,2, - 3,3,3,3,2,3,3,3,3,2,2,3,3,2,3,2,2,3,2,3,2,2,3,2,2,3,3,3,3,2,2,2, - 3,3,2,2,2,2,2,3,3,2,0,3,0,2,3,2,2,2,1,2,2,0,2,1,2,3,2,3,3,2,2,2, - 3,3,3,3,2,2,3,3,3,2,3,3,3,2,3,3,2,3,1,3,3,2,2,2,2,2,2,2,2,2,2,3, - 3,2,3,3,3,3,3,2,2,3,2,3,3,3,0,3,3,2,2,2,2,2,2,3,2,0,0,0,1,0,0,0, - 3,3,2,2,2,2,2,3,3,2,0,3,2,2,2,2,2,2,2,3,2,0,2,2,2,2,2,2,3,2,2,2, - 3,3,3,3,3,3,2,3,3,2,2,3,1,2,3,2,2,2,2,3,2,3,3,3,2,2,2,2,3,3,2,0, - 3,3,3,2,2,2,3,2,3,2,2,3,2,2,3,2,3,2,0,3,2,2,2,2,2,2,3,0,2,2,3,2, - 3,3,2,3,2,2,2,3,3,3,3,2,2,2,3,2,2,2,2,2,3,0,0,2,2,2,2,0,3,0,0,0, - 3,3,2,2,2,3,2,3,3,0,0,2,2,2,3,2,2,2,2,3,0,2,2,2,2,3,2,3,2,3,2,2, - 2,0,3,3,3,3,3,0,0,3,3,0,2,3,0,3,3,3,0,0,2,2,2,2,1,0,0,0,0,0,0,0, - 2,2,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,0,0,2,3,3,2,2,2,0,0,1,2,2,0, - 2,2,3,3,3,3,2,3,2,3,3,2,2,2,2,3,3,2,0,0,2,2,3,2,2,1,0,0,1,2,1,0, - 0,2,3,2,2,3,3,2,2,2,3,0,3,3,0,2,2,3,0,2,1,2,3,2,2,0,0,0,0,0,0,0, - 0,0,3,2,3,2,3,0,0,3,2,0,2,3,0,0,2,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0, - 2,2,3,3,3,2,3,0,0,2,2,0,0,3,0,2,2,2,0,0,2,2,3,2,1,0,0,0,0,0,0,0, - 2,2,2,2,3,2,2,2,0,3,2,0,2,2,0,2,2,3,0,2,2,0,2,2,2,0,0,0,0,0,0,0, -}; - - -const SequenceModel Iso_8859_2HungarianModel = -{ - Iso_8859_2_CharToOrderMap, - HungarianLangModel, - 32, - (float)0.9748272224933486, - PR_FALSE, - "ISO-8859-2" -}; - -const SequenceModel Windows_1250HungarianModel = -{ - Windows_1250_CharToOrderMap, - HungarianLangModel, - 32, - (float)0.9748272224933486, - PR_FALSE, - "WINDOWS-1250" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangSpanishModel.cpp b/PowerEditor/src/uchardet/LangModels/LangSpanishModel.cpp deleted file mode 100644 index 362bc5ea..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangSpanishModel.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Spanish *********/ - -/** - * Generated by BuildLangModel.py - * On: 2015-12-12 18:39:02.290370 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Iso_8859_1_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 53, 22, 41, 43, /* CX */ - 49, 29, 38, 19, 50, 54, 34,SYM, 44, 51, 30, 55, 32, 42, 56, 57, /* DX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 58, 22, 41, 43, /* EX */ - 49, 29, 38, 19, 50, 59, 34,SYM, 44, 51, 30, 60, 32, 42, 61, 62, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_15_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 63,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 65, 66,SYM,SYM, 67,SYM,SYM,SYM, 68, 69, 70,SYM, /* BX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 71, 22, 41, 43, /* CX */ - 49, 29, 38, 19, 50, 72, 34,SYM, 44, 51, 30, 73, 32, 42, 74, 75, /* DX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 76, 22, 41, 43, /* EX */ - 49, 29, 38, 19, 50, 77, 34,SYM, 44, 51, 30, 78, 32, 42, 79, 80, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Windows_1252_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 82,SYM, 83,ILL, 84,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 85,SYM, 86,ILL, 87, 88, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 89,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 90, 22, 41, 43, /* CX */ - 49, 29, 38, 19, 50, 91, 34,SYM, 44, 51, 30, 92, 32, 42, 93, 94, /* DX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 95, 22, 41, 43, /* EX */ - 49, 29, 38, 19, 50, 96, 34,SYM, 44, 51, 30, 97, 32, 42, 98, 99, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 897 - * First 512 sequences: 0.9970385677528184 - * Next 512 sequences (512-1024): 0.0029614322471815486 - * Rest: 4.597017211338539e-17 - * Negative sequences: TODO - */ -static const PRUint8 SpanishLangModel[] = -{ - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,2,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,2,2,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,3,3,0,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,0,3,2,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,2,0,2,2,0, - 3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,2,3,3,3,3,2,3,2,2,3,3,2,0,0,2,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,2,3,2,3,3,0,3,2,2,3,3,0,0,0,2,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,2,3,0,3,3,2,3,0,2,3,3,3,0,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,2,0, - 3,3,3,3,3,3,2,2,2,2,2,3,3,3,3,2,2,3,0,3,2,0,3,2,0,3,3,2,2,0,3,2,2, - 3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,0,2,2,2,3,3,0,3,2,0,3,3,2,0,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,3,2,2,3,3,0,3,2,2,0,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,2,3,3,0,3,2,2,2,3,2,0,0,3,2,3, - 3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,2,3,2,0,3,0,0,3,2,0,2,2,2,0,0,3,2,0, - 3,3,3,3,3,3,3,3,2,2,2,3,2,2,2,2,2,2,0,3,2,0,0,2,2,2,2,2,0,0,2,2,0, - 3,3,3,2,2,3,2,2,2,0,2,3,0,2,0,2,2,2,2,3,0,0,3,0,0,2,3,2,0,0,0,0,0, - 0,0,0,3,3,0,3,3,3,3,3,0,3,3,2,3,2,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,2,3,3,0,2,0,2,3,2,2,2,0,3,2,2,2,3,0,2,0,2,2,2, - 2,3,2,0,2,2,0,2,2,2,0,3,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,0,2,2,3,3,3,2,3,2,3,3,3,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0, - 3,3,3,2,0,3,2,2,2,2,0,3,2,2,0,0,0,0,0,3,0,0,2,2,0,2,3,0,0,0,2,0,2, - 3,3,3,2,0,3,2,0,2,2,2,3,2,2,2,3,0,2,0,3,2,3,2,0,3,3,2,2,0,0,2,0,0, - 2,0,0,3,3,2,3,3,2,3,3,2,3,3,2,3,3,2,2,0,2,2,0,2,2,0,0,0,2,2,0,0,0, - 2,3,2,3,3,2,3,3,3,3,3,2,2,3,2,3,2,2,2,0,0,0,0,2,0,0,0,0,3,0,0,0,0, - 3,3,3,2,3,3,3,3,2,2,2,3,3,0,2,2,2,3,2,0,2,0,2,0,0,0,0,2,0,0,2,2,0, - 3,3,3,2,2,3,2,2,2,3,3,3,2,3,2,0,2,2,3,2,2,2,0,2,0,2,2,2,3,0,0,2,0, - 3,3,3,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,3,0,0,2,0,0,0,0,0,0,0, - 2,3,2,3,3,0,2,3,2,3,2,0,3,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,0,2,0,0,0, - 3,3,3,3,2,3,2,2,2,2,2,2,0,0,2,0,2,2,0,0,2,0,0,2,0,2,0,2,0,0,0,2,0, - 3,0,0,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, -}; - - -const SequenceModel Iso_8859_1SpanishModel = -{ - Iso_8859_1_CharToOrderMap, - SpanishLangModel, - 33, - (float)0.9970385677528184, - PR_TRUE, - "ISO-8859-1" -}; - -const SequenceModel Iso_8859_15SpanishModel = -{ - Iso_8859_15_CharToOrderMap, - SpanishLangModel, - 33, - (float)0.9970385677528184, - PR_TRUE, - "ISO-8859-15" -}; - -const SequenceModel Windows_1252SpanishModel = -{ - Windows_1252_CharToOrderMap, - SpanishLangModel, - 33, - (float)0.9970385677528184, - PR_TRUE, - "WINDOWS-1252" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangThaiModel.cpp b/PowerEditor/src/uchardet/LangModels/LangThaiModel.cpp deleted file mode 100644 index 8e90afbb..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangThaiModel.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Thai *********/ - -/** - * Generated by BuildLangModel.py - * On: 2015-12-04 03:05:06.182099 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Tis_620_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 66, 70, 67, 80, 78, 87, 85, 73, 79, 93, 88, 84, 68, 77, 81, /* 4X */ - 75,101, 74, 61, 71, 86, 96, 90,103,100, 99,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 35, 64, 48, 52, 32, 60, 65, 54, 36, 97, 76, 46, 56, 41, 40, /* 6X */ - 59,104, 43, 45, 44, 55, 72, 82, 94, 57, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - ILL, 3, 23,105, 15,106, 89, 5, 21, 63, 26, 31,102, 42, 69, 58, /* AX */ - 49, 91, 83, 34, 9, 17, 30, 12, 39, 1, 16, 19, 33, 62, 22, 47, /* BX */ - 38, 7, 10, 2, 50, 11,107, 8, 28, 37, 13, 18, 98, 4, 53, 95, /* CX */ - 14,SYM, 0, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ - 6, 20, 27, 24, 25,108, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,109, /* EX */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,110,111,ILL,ILL,ILL,ILL, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_11_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 66, 70, 67, 80, 78, 87, 85, 73, 79, 93, 88, 84, 68, 77, 81, /* 4X */ - 75,101, 74, 61, 71, 86, 96, 90,103,100, 99,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 35, 64, 48, 52, 32, 60, 65, 54, 36, 97, 76, 46, 56, 41, 40, /* 6X */ - 59,104, 43, 45, 44, 55, 72, 82, 94, 57, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 3, 23,112, 15,113, 89, 5, 21, 63, 26, 31,102, 42, 69, 58, /* AX */ - 49, 91, 83, 34, 9, 17, 30, 12, 39, 1, 16, 19, 33, 62, 22, 47, /* BX */ - 38, 7, 10, 2, 50, 11,114, 8, 28, 37, 13, 18, 98, 4, 53, 95, /* CX */ - 14,SYM, 0, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */ - 6, 20, 27, 24, 25,115, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,116, /* EX */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,117,118,ILL,ILL,ILL,ILL, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 2324 - * First 512 sequences: 0.8815720594354438 - * Next 512 sequences (512-1024): 0.0920860122682917 - * Rest: 0.026341928296264486 - * Negative sequences: TODO - */ -static const PRUint8 ThaiLangModel[] = -{ - 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3, - 0,2,3,0,0,3,2,3,0,0,2,0,0,0,0,2,0,1,1,1,0,2,0,0,0,0,1,0,0,0,1,1, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3, - 0,3,0,0,0,1,3,3,0,0,1,0,0,0,0,2,0,2,1,2,0,1,0,0,0,0,0,0,0,0,2,1, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,1,3,2, - 0,2,3,0,0,2,2,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,2,1, - 3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3, - 0,2,1,0,0,3,2,1,0,0,0,0,0,0,0,1,0,3,3,1,0,1,0,0,0,0,3,0,0,0,1,1, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,3,3,2,2,1,2,2,2, - 0,2,0,0,0,0,2,2,0,0,1,0,0,0,0,2,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2, - 0,3,0,0,0,1,2,2,0,0,1,0,0,0,0,2,0,1,1,2,0,2,0,0,0,0,0,0,0,0,2,1, - 0,3,3,3,3,2,0,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,0,0,3,0,3,0,1,3, - 0,2,0,0,0,2,2,2,0,0,0,0,0,0,0,3,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,3, - 3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,1,0,2,1, - 0,2,2,0,1,2,2,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,2,2,2,3,3,3,2,2,2,2,2,2,0,2,2, - 0,1,2,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,3,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,3,3,3,3,2,3,2,3,3,3,3,0,3,2,3,2,2,3,2,2,3,3,3,2,2,1,3,2,1, - 0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,1,2,2, - 0,2,0,0,0,0,3,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,2,2,2,2,1,3,2,2,2,2,1,3,1,2, - 0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1, - 3,3,3,1,2,1,2,1,2,3,3,1,1,2,2,3,2,1,2,1,1,1,2,1,1,1,1,1,3,3,0,1, - 0,0,0,0,0,1,1,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,2,3,2,2,2,2,3,3,3,2,2,1,1,1,2,2,1,2,1,3,3,2, - 0,1,0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, - 0,3,3,3,3,1,3,3,3,3,3,2,3,3,0,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,2,2, - 0,2,1,0,0,0,2,2,0,0,1,0,0,0,0,1,0,1,1,0,0,2,0,0,0,0,1,0,0,0,1,1, - 3,3,3,1,3,2,2,3,3,2,2,3,1,1,2,2,1,2,1,2,1,3,1,1,1,1,1,2,0,3,0,1, - 0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0, - 3,3,3,3,3,1,3,2,3,3,2,3,3,3,1,3,3,3,3,3,3,2,2,2,3,3,2,2,2,2,2,2, - 0,2,0,0,0,0,2,1,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1, - 3,3,3,3,3,1,2,1,2,1,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,1,1,2,1,3,3,1, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 3,3,3,1,2,1,0,3,3,1,2,3,1,1,1,0,0,3,1,1,0,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,1,2,1,2,2,2,3,2,2,2,1,1,2,1,2,2,2,1,1,2,2,1,1,1,0,2,1, - 0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,3,0,0,0,0,0, - 0,3,3,3,3,1,0,3,2,2,2,3,3,3,0,3,3,3,3,3,0,1,2,2,0,0,1,0,0,0,3,3, - 0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0, - 3,3,3,3,3,1,3,2,2,2,1,1,2,2,3,2,1,2,1,1,2,3,3,2,2,2,1,2,0,3,1,2, - 0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, - 3,1,3,2,3,1,2,2,3,2,3,3,3,2,0,1,3,1,1,1,2,2,1,2,1,1,1,1,1,1,1,0, - 0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,1,1,3,0,1,1,2,1,2,1,2,1,0,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,1,1, - 0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,0,3,0,0,0,0,0,2,1,0,0,2,0,1,1,3,3,1,0,3,0,0,0,0,3,0,0,0,0,0, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,1,3,2,2,0,0,3,3,3,0,2,3,1,0,2,2,2,2,3,0,1,1,3,0,0,1,0,0,0,1,2, - 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,1,2,3,1,2,2,2,1,2,2,2,2,1,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1, - 0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,3,3,2,3,0,0,2,1,3,2,3,3,1,0,3,2,3,1,2,0,2,2,1,0,0,1,0,1,0,1,2, - 0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1, - 3,3,2,2,2,0,2,2,2,1,2,1,2,2,0,1,1,2,1,1,2,2,1,2,2,2,1,1,1,0,1,1, - 0,0,0,0,0,2,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0, - 0,3,3,3,2,2,3,2,2,2,1,3,2,2,0,3,2,2,3,1,3,1,2,2,3,2,1,2,1,0,2,1, - 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0, - 3,2,1,1,2,1,2,2,2,1,1,2,2,1,1,1,2,1,1,1,2,1,1,1,2,1,1,1,1,0,1,0, - 0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 3,3,1,1,3,2,2,1,1,1,1,2,1,0,1,1,1,2,0,1,1,0,0,0,0,1,1,1,0,0,0,1, - 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,0,0,0,2,3,0,3,2,3,3,0,2,0,0,0,2,0,1,2,2,1,0,2,2,1,0,0, - 1,2,0,1,0,1,1,1,1,1,2,3,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,2,2,1,1,1,1,1,1,1,1,2,2,3,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,1, - 0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,2,0,0,0,1,3,0,3,3,2,3,0,2,0,0,0,2,0,1,1,2,2,0,2,1,1,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,3,1,0,0,0,3,3,0,2,3,3,2,0,3,0,0,0,2,0,1,1,2,0,0,1,1,0,0,0, - 3,1,1,2,1,0,1,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1, - 0,1,3,0,0,1,2,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,1,0,0,0,1,0, - 3,0,2,1,1,0,0,1,0,0,1,0,2,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,1,3,1,2,1,1,2,1,1,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0, - 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,1,1,0,0,0,1,3,0,3,2,2,2,0,2,0,0,0,2,0,1,2,2,1,0,2,3,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,2,2,0,0,0,2,2,0,1,3,2,1,0,2,0,0,0,3,0,1,1,1,1,0,0,1,0,0,0, - 3,1,1,1,1,0,2,1,1,0,0,1,2,1,0,1,1,1,2,1,1,1,1,1,2,1,2,1,1,0,1,1, - 0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,3,3,0,0,0,2,2,0,2,2,2,1,0,2,0,0,0,2,0,1,1,1,2,0,1,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,2,3,0,0,0,2,1,0,2,2,2,1,0,1,0,0,0,1,0,3,2,1,2,0,1,1,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,1,2,0,0,0,2,1,0,1,3,2,1,0,2,0,0,0,1,0,2,1,1,1,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,2,2,0,0,0,2,2,0,0,1,1,2,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,0, - 1,1,3,2,2,0,2,1,1,1,1,2,1,1,0,1,1,2,1,0,1,1,1,1,1,1,1,1,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,0,0,0,2,0,0,1,2,1,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0, - 3,1,1,1,2,0,1,2,1,0,0,0,1,2,0,1,2,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1, - 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,0,0,0,0,0,2,0,0,1,0,0,1,0,2,2,0,0,1,0,0,0,0,0,0,2,0,1,0, - 0,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,0,0,0,2,0,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,2,0,0,2,0,0,0, - 2,1,1,0,2,0,2,1,1,1,1,2,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,2,2,0,0,0,2,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,1,0,0,0,0,2,0,2,2,2,2,0,2,0,0,0,2,0,1,0,1,1,0,1,1,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,2,0,0,0,2,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,2,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,1,0,0,0,1,1,0,0,1,2,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0, - 1,0,1,2,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,2,1,0,0,0,2,0,0,2,1,1,2,0,0,0,0,0,0,0,2,1,1,2,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,1,2,0,0,0,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,2,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,0,1,1,0,0,0,1,0,0,0,2,0,0,0,2,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0, - 0,1,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,1,0,0,0,0,1,1,1,1,2,0,0,1,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -}; - - -const SequenceModel Tis_620ThaiModel = -{ - Tis_620_CharToOrderMap, - ThaiLangModel, - 64, - (float)0.8815720594354438, - PR_FALSE, - "TIS-620" -}; - -const SequenceModel Iso_8859_11ThaiModel = -{ - Iso_8859_11_CharToOrderMap, - ThaiLangModel, - 64, - (float)0.8815720594354438, - PR_FALSE, - "ISO-8859-11" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangTurkishModel.cpp b/PowerEditor/src/uchardet/LangModels/LangTurkishModel.cpp deleted file mode 100644 index e68bcf6a..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangTurkishModel.cpp +++ /dev/null @@ -1,173 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Turkish *********/ - -/** - * Generated by BuildLangModel.py - * On: 2015-12-04 02:24:44.730727 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Iso_8859_3_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 15, 21, 7, 1, 26, 22, 19, 6, 28, 9, 5, 11, 3, 14, /* 4X */ - 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 15, 21, 7, 1, 26, 22, 19, 2, 28, 9, 5, 11, 3, 14, /* 6X */ - 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 48,SYM,SYM,SYM,ILL, 49,SYM,SYM, 2, 17, 25, 50,SYM,ILL, 51, /* AX */ - SYM, 52,SYM,SYM,SYM,SYM, 53,SYM,SYM, 6, 17, 25, 54,SYM,ILL, 55, /* BX */ - 41, 36, 30,ILL, 39, 56, 57, 24, 42, 33, 58, 45, 59, 37, 31, 60, /* CX */ - ILL, 47, 61, 38, 62, 63, 27,SYM, 64, 65, 40, 35, 16, 66, 67, 68, /* DX */ - 41, 36, 30,ILL, 39, 69, 70, 24, 42, 33, 71, 45, 72, 37, 31, 73, /* EX */ - ILL, 47, 74, 38, 75, 76, 27,SYM, 77, 78, 40, 35, 16, 79, 80,SYM, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_9_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 15, 21, 7, 1, 26, 22, 19, 6, 28, 9, 5, 11, 3, 14, /* 4X */ - 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 15, 21, 7, 1, 26, 22, 19, 2, 28, 9, 5, 11, 3, 14, /* 6X */ - 23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 41, 36, 30, 44, 39, 82, 46, 24, 42, 33, 83, 45, 84, 37, 31, 85, /* CX */ - 25, 47, 86, 38, 87, 88, 27,SYM, 43, 89, 40, 35, 16, 2, 17, 90, /* DX */ - 41, 36, 30, 44, 39, 91, 46, 24, 42, 33, 92, 45, 93, 37, 31, 94, /* EX */ - 25, 47, 95, 38, 96, 97, 27,SYM, 43, 98, 40, 35, 16, 6, 17, 99, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 935 - * First 512 sequences: 0.991865243864388 - * Next 512 sequences (512-1024): 0.008134756135611957 - * Rest: 2.949029909160572e-17 - * Negative sequences: TODO - */ -static const PRUint8 TurkishLangModel[] = -{ - 3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,0, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,2,0,3,0,2,0, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,2,2,2,0,2,0,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,2,2,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,2,2,2,2, - 3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,2,3,0,3,2,2,2,2,3,0,2,2,2, - 3,2,0,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,3,2,3,3,2,3,2,3,2,0,0,0,0,0,2,0,0,0, - 3,3,3,2,3,3,3,3,2,2,2,2,3,3,3,2,3,0,2,2,2,2,2,2,0,0,0,3,2,3,2,2,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,2,2,3,0,2,3,2,2,3,2,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,0,2,3,2,2,3,0,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,2,3,3,0,2,3,0,2,2,0,0,2,2,2, - 3,3,3,2,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,0,3,2,3,2,0,2,2,0,2,3,2,2,2,2,2, - 3,3,3,3,3,3,0,3,3,3,3,3,2,3,2,3,0,3,3,3,3,3,3,3,3,3,2,0,2,2,0,0,2,2,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,2,2,3,2,2,0,2,3,0,2,2,0,0,2,0,2, - 3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,0,0, - 3,3,3,3,3,3,3,3,0,2,2,3,3,3,3,3,3,0,2,2,2,2,0,2,0,0,0,3,2,2,2,0,0,2,0,0, - 2,2,2,3,3,3,0,3,3,3,3,3,0,3,2,3,0,3,3,3,3,3,2,3,3,3,3,0,2,0,0,0,0,0,0,0, - 3,3,3,0,2,3,3,2,3,3,2,3,3,2,2,3,3,2,0,2,2,2,2,2,3,0,2,2,0,0,2,2,0,0,0,0, - 3,3,3,2,2,3,3,3,2,2,0,3,3,3,3,2,3,0,2,2,0,3,3,0,0,0,0,2,0,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,2,2,2,2,0,2,3,0,2,0,0,2,3,2,0,2,0,2, - 3,3,3,2,3,3,2,2,0,2,3,2,3,3,3,2,2,2,2,2,3,2,2,0,0,0,2,0,0,0,2,2,0,0,0,0, - 3,3,3,2,3,3,3,2,3,3,2,2,3,2,3,2,3,0,2,3,0,2,0,0,0,0,0,2,0,0,2,0,0,2,2,2, - 3,3,3,2,3,3,3,2,2,2,2,0,3,2,3,0,3,0,2,3,2,0,2,2,0,0,2,3,2,2,2,0,0,2,0,0, - 3,3,3,0,3,3,3,2,3,2,3,3,3,2,3,2,2,0,2,3,0,2,2,3,2,0,2,0,0,2,2,0,2,2,0,0, - 3,3,3,0,2,3,3,2,3,2,0,3,3,2,3,2,3,2,0,0,0,0,2,2,0,0,0,3,0,0,0,0,0,0,0,0, - 3,3,3,0,3,3,3,3,0,0,0,3,3,0,0,2,3,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,2,3,2,2,0,3,3,3,2,2,0,0,2,0,2,2,0,2,0,2,2,2,0,2,2,0,0,0,0, - 0,0,0,3,3,3,0,3,3,3,3,3,0,3,0,2,0,2,3,2,2,0,0,2,3,3,2,0,2,0,0,0,0,0,0,0, - 3,3,3,0,0,2,2,2,0,2,0,0,3,0,3,0,2,0,0,0,0,2,2,2,0,0,0,2,0,0,2,0,0,0,0,0, - 3,3,3,2,2,2,0,0,0,2,2,2,2,2,3,2,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0, - 0,0,2,3,3,3,0,3,2,2,2,2,0,2,0,2,0,2,2,3,2,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,0,2,2,0,0,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 3,2,2,0,0,0,2,0,2,0,0,0,0,2,2,0,0,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0, - 2,0,2,2,2,2,0,2,2,0,2,2,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 2,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,2,2,0,2,0,0,2,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -}; - - -const SequenceModel Iso_8859_3TurkishModel = -{ - Iso_8859_3_CharToOrderMap, - TurkishLangModel, - 36, - (float)0.991865243864388, - PR_FALSE, - "ISO-8859-3" -}; - -const SequenceModel Iso_8859_9TurkishModel = -{ - Iso_8859_9_CharToOrderMap, - TurkishLangModel, - 36, - (float)0.991865243864388, - PR_FALSE, - "ISO-8859-9" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangModels/LangVietnameseModel.cpp b/PowerEditor/src/uchardet/LangModels/LangVietnameseModel.cpp deleted file mode 100644 index dff4a795..00000000 --- a/PowerEditor/src/uchardet/LangModels/LangVietnameseModel.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include "../nsSBCharSetProber.h" - -/********* Language model for: Vietnamese *********/ - -/** - * Generated by BuildLangModel.py - * On: 2016-02-13 03:42:06.561440 - **/ - -/* Character Mapping Table: - * ILL: illegal character. - * CTR: control character specific to the charset. - * RET: carriage/return. - * SYM: symbol (punctuation) that does not belong to word. - * NUM: 0 - 9. - * - * Other characters are ordered by probabilities - * (0 is the most common character in the language). - * - * Orders are generic to a language. So the codepoint with order X in - * CHARSET1 maps to the same character as the codepoint with the same - * order X in CHARSET2 for the same language. - * As such, it is possible to get missing order. For instance the - * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 - * even though they are both used for French. Same for the euro sign. - */ -static const unsigned char Windows_1258_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */ - 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */ - 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,102, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,103,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 12, 15, 25, 51, 97,104, 98, 91, 90, 62, 27,105,SYM, 47,106,107, /* CX */ - 10,108,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,109, 96, 18,SYM, 99, /* DX */ - 12, 15, 25, 51, 97,110, 98, 91, 90, 62, 27,111,SYM, 47,112,113, /* EX */ - 10,114,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,115, 96, 18,116,117, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Viscii_CharToOrderMap[] = -{ - CTR,CTR, 88,CTR,CTR, 95, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR, 80,CTR,CTR,CTR,CTR, 79,CTR,CTR,CTR,CTR, 92,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */ - 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */ - 16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 30, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* 8X */ - 53, 60, 84, 31, 37, 40, 38, 59, 42, 81, 44, 73, 35, 72, 48, 76, /* 9X */ - 86, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* AX */ - 53, 60, 84, 87, 46, 31, 38, 59, 42, 56, 52, 55, 70, 46, 40, 18, /* BX */ - 12, 15, 25, 61, 34, 51, 88, 95, 90, 62, 27, 85, 50, 47, 64, 76, /* CX */ - 10, 52, 63, 33, 29, 30, 80, 55, 70, 58, 67, 79, 92, 68, 87, 18, /* DX */ - 12, 15, 25, 61, 34, 51, 26, 77, 90, 62, 27, 85, 50, 47, 64, 73, /* EX */ - 10, 56, 63, 33, 29, 86, 81, 44, 48, 58, 67, 72, 35, 68, 37, 26, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - - -/* Model Table: - * Total sequences: 1494 - * First 512 sequences: 0.9321889118082535 - * Next 512 sequences (512-1024): 0.06092051479986333 - * Rest: 0.0068905733918831966 - * Negative sequences: TODO - */ -static const PRUint8 VietnameseLangModel[] = -{ - 3,3,3,3,3,3,3,2,2,3,0,2,3,1,1,1,1,2,3,3,2,3,3,3,2,1,2, - 3,0,3,2,2,2,3,1,0,1,1,2,0,0,1,0,1,0,2,2,1,0,0,0,3,0,0,2, - 2,1,2,0,3,0,3,3,2,3,0,2,3,0,2,3,0,0,3,1,3,3,1,3,1,3,3, - 3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,0, - 2,3,2,2,3,1,3,3,1,3,1,3,3,2,2,3,2,0,3,2,2,3,1,3,0,3,0, - 3,1,3,3,3,3,2,3,2,0,0,2,1,2,2,2,2,0,0,1,3,2,3,2,2,2,2,0, - 2,3,2,2,3,0,3,3,2,3,0,2,2,1,2,3,1,1,2,2,2,3,1,0,2,2,0, - 0,0,3,2,3,2,3,3,3,1,1,2,0,0,2,0,3,0,0,2,0,2,2,0,2,3,1,1, - 3,1,3,3,3,3,3,2,3,3,1,3,2,2,3,3,2,2,0,3,1,3,3,3,2,0,3, - 3,3,1,0,0,3,1,3,0,2,0,2,3,3,2,0,0,2,3,0,0,0,1,0,1,0,0,2, - 2,3,2,2,3,1,3,3,1,3,0,3,3,0,2,2,0,1,3,2,2,3,1,1,1,2,3, - 0,0,3,3,1,2,2,0,1,0,2,2,0,0,1,1,3,3,0,0,0,1,1,2,1,0,3,0, - 3,2,3,3,3,2,2,3,3,3,0,3,0,2,3,0,2,3,0,3,3,2,3,0,2,0,0, - 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2, - 3,1,3,2,3,2,3,1,3,2,0,3,1,2,3,2,2,2,0,3,3,3,2,2,2,3,0, - 2,1,3,1,3,3,0,2,0,0,0,1,0,1,3,0,3,0,0,2,2,0,3,0,2,0,3,1, - 2,1,0,2,3,0,3,3,2,3,0,0,3,0,2,3,2,2,3,2,2,3,2,0,0,1,0, - 0,2,3,3,3,2,2,1,0,0,0,2,0,3,3,0,1,2,2,0,0,3,2,2,1,2,1,1, - 3,2,3,2,3,2,3,3,3,2,0,3,3,2,3,3,2,3,0,3,2,2,3,0,2,0,0, - 0,0,0,3,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2, - 0,0,0,0,3,0,3,2,0,3,0,1,3,0,0,3,0,1,3,0,0,1,0,3,0,3,0, - 2,3,3,3,3,3,3,3,2,0,1,3,3,1,3,3,3,3,3,2,2,0,1,2,2,3,3,0, - 3,2,3,2,3,2,3,3,2,3,0,3,2,2,3,2,1,2,3,3,3,3,3,0,2,1,2, - 3,1,2,2,3,2,0,2,0,0,2,2,1,0,3,3,2,3,0,1,2,2,2,3,3,1,2,0, - 3,0,0,0,3,0,0,2,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,3,0,3,3,0,2,0,1,3,0,1,1,0,0,2,1,1,3,1,1,0,2,1, - 2,1,2,1,0,1,0,0,0,0,2,1,0,3,2,3,3,1,3,0,3,2,3,3,3,0,0,0, - 0,2,2,1,3,2,3,3,2,3,0,0,3,2,3,2,2,2,3,2,2,3,2,1,1,2,1, - 3,2,2,3,3,2,1,0,0,0,3,2,0,3,2,3,2,1,0,1,2,2,3,0,2,0,0,1, - 3,0,3,3,3,1,0,2,3,3,0,1,0,0,1,0,3,0,0,1,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,2,0,3,0,3,2,1,3,0,3,0,0,2,0,2,1,0,2,2,3,1,0,0,0,0, - 2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 2,1,0,2,3,1,3,3,0,3,0,3,3,0,3,3,0,3,1,2,2,3,1,1,1,0,0, - 2,1,0,2,3,3,2,3,0,0,0,1,0,2,2,3,2,0,1,0,2,1,2,3,0,2,3,0, - 3,0,1,1,2,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,3,3,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0, - 1,3,3,3,3,1,3,3,2,3,0,1,2,0,2,3,2,2,2,3,2,3,2,0,2,2,0, - 0,0,2,1,0,3,2,2,0,1,1,1,1,1,1,0,0,0,0,2,0,1,0,0,1,2,1,0, - 2,0,1,2,1,0,2,2,1,2,0,2,0,0,1,1,2,1,0,2,0,2,1,3,1,0,0, - 3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0, - 3,2,3,2,2,2,3,2,3,3,0,3,0,2,3,1,2,2,0,3,2,3,3,0,2,0,0, - 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, - 1,1,1,2,3,1,3,3,0,3,0,3,3,1,2,1,0,0,3,2,2,3,2,0,1,3,1, - 1,0,0,3,1,1,1,0,0,0,0,1,0,0,3,3,2,1,0,1,0,3,2,1,1,2,1,0, - 3,0,3,2,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,1,0,3,1,3,2,0,2,0,2,0,1,2,0,0,1,0,2,2,2,0,3,1,0,0, - 2,0,1,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,3,0,0,2,0,0,0,1, - 3,0,1,1,0,0,0,3,3,0,0,0,0,0,1,0,1,0,0,0,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,1,0,0,0,3,3,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,2,3,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0, - 0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,0,2,3,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,2,3,0,3,0,2,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,3,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,3,0,0,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,3,3,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,1,3,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,0,0,2,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,3,1,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,1,0,0,0,2,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,1,1,1,0,0,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,2,3,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,2,1,2,0,3,3,0,1,0,0,0,2,0,3,1,2,2,0,1,3,0,2,0,2,0, - 2,0,2,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,1,2,0,0,1,1,2,0,2, -}; - - -const SequenceModel Windows_1258VietnameseModel = -{ - Windows_1258_CharToOrderMap, - VietnameseLangModel, - 55, - (float)0.9321889118082535, - PR_FALSE, - "WINDOWS-1258" -}; - -const SequenceModel VisciiVietnameseModel = -{ - Viscii_CharToOrderMap, - VietnameseLangModel, - 55, - (float)0.9321889118082535, - PR_FALSE, - "VISCII" -}; \ No newline at end of file diff --git a/PowerEditor/src/uchardet/LangThaiModel.cpp b/PowerEditor/src/uchardet/LangThaiModel.cpp new file mode 100644 index 00000000..11b8e75e --- /dev/null +++ b/PowerEditor/src/uchardet/LangThaiModel.cpp @@ -0,0 +1,220 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsSBCharSetProber.h" + + +/**************************************************************** +255: Control characters that usually does not exist in any text +254: Carriage/Return +253: symbol (punctuation) that does not belong to word +252: 0 - 9 + +*****************************************************************/ + +//The following result for thai was collected from a limited sample (1M). + +//Character Mapping Table: +static const unsigned char TIS620CharToOrderMap[] = +{ +255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00 +255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10 ++253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20 +252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30 +253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, //40 +188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, //50 +253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, //60 + 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, //70 +209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, +223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, +236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, + 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, + 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, + 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, + 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, + 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, +}; + + + + +//Model Table: +//total sequences: 100% +//first 512 sequences: 92.6386% +//first 1024 sequences:7.3177% +//rest sequences: 1.0230% +//negative sequences: 0.0436% +static const PRUint8 ThaiLangModel[] = +{ +0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, +0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, +3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, +0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, +3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, +3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, +3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, +3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, +3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, +2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, +3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, +0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, +1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, +3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, +3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, +1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, +0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, +0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, +3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, +2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, +3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, +0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, +3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, +3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, +2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, +2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, +3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, +3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, +3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, +3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, +3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, +1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, +0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, +3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, +0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, +3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, +3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, +1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, +3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, +3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, +0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, +0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, +1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, +1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, +3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, +0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, +0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, +3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, +0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, +0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, +0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, +0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, +0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, +0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, +3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, +0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, +0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, +3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, +2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, +0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, +3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, +0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, +1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, +1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, +1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, +1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, +1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + + +const SequenceModel TIS620ThaiModel( + TIS620CharToOrderMap, + ThaiLangModel, + (float)0.926386, + PR_FALSE, + "TIS-620" +); diff --git a/PowerEditor/src/uchardet/README.TXT b/PowerEditor/src/uchardet/README.TXT new file mode 100644 index 00000000..ad75bd56 --- /dev/null +++ b/PowerEditor/src/uchardet/README.TXT @@ -0,0 +1,10 @@ +Uchardet is a C language binding of the original C++ implementation of the universal charset detection library by Mozilla. +The source code of universalchardet is available at https://github.com/BYVoid/uchardet + +uchardet is an encoding detector library, which takes a sequence of bytes in an unknown character encoding without any additional information, and attempts to determine the encoding of the text. + +The original code of universalchardet is available at http://lxr.mozilla.org/seamonkey/source/extensions/universalchardet/ + +Techniques used by universalchardet are described at http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html + +Uchardet is licensed under Mozilla Public License Version 1.1 (http://www.mozilla.org/MPL/1.1/) diff --git a/PowerEditor/src/uchardet/README.md b/PowerEditor/src/uchardet/README.md deleted file mode 100644 index b6a2feae..00000000 --- a/PowerEditor/src/uchardet/README.md +++ /dev/null @@ -1,294 +0,0 @@ -# uchardet - -[uchardet](https://www.freedesktop.org/wiki/Software/uchardet/) is an encoding detector library, which takes a sequence of bytes in an unknown character encoding without any additional information, and attempts to determine the encoding of the text. Returned encoding names are [iconv](https://www.gnu.org/software/libiconv/)-compatible. - -uchardet started as a C language binding of the original C++ implementation of the universal charset detection library by Mozilla. It can now detect more charsets, and more reliably than the original implementation. - -The original code of universalchardet is available at http://lxr.mozilla.org/seamonkey/source/extensions/universalchardet/ - -Techniques used by universalchardet are described at http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html - -## Supported Languages/Encodings - - * International (Unicode) - * UTF-8 - * UTF-16BE / UTF-16LE - * UTF-32BE / UTF-32LE / X-ISO-10646-UCS-4-34121 / X-ISO-10646-UCS-4-21431 - * Arabic - * ISO-8859-6 - * WINDOWS-1256 - * Bulgarian - * ISO-8859-5 - * WINDOWS-1251 - * Chinese - * ISO-2022-CN - * BIG5 - * EUC-TW - * GB18030 - * HZ-GB-2312 - * Croatian: - * ISO-8859-2 - * ISO-8859-13 - * ISO-8859-16 - * Windows-1250 - * IBM852 - * MAC-CENTRALEUROPE - * Czech - * Windows-1250 - * ISO-8859-2 - * IBM852 - * MAC-CENTRALEUROPE - * Danish - * ISO-8859-1 - * ISO-8859-15 - * WINDOWS-1252 - * English - * ASCII - * Esperanto - * ISO-8859-3 - * Estonian - * ISO-8859-4 - * ISO-8859-13 - * ISO-8859-13 - * Windows-1252 - * Windows-1257 - * Finnish - * ISO-8859-1 - * ISO-8859-4 - * ISO-8859-9 - * ISO-8859-13 - * ISO-8859-15 - * WINDOWS-1252 - * French - * ISO-8859-1 - * ISO-8859-15 - * WINDOWS-1252 - * German - * ISO-8859-1 - * WINDOWS-1252 - * Greek - * ISO-8859-7 - * WINDOWS-1253 - * Hebrew - * ISO-8859-8 - * WINDOWS-1255 - * Hungarian: - * ISO-8859-2 - * WINDOWS-1250 - * Irish Gaelic - * ISO-8859-1 - * ISO-8859-9 - * ISO-8859-15 - * WINDOWS-1252 - * Italian - * ISO-8859-1 - * ISO-8859-3 - * ISO-8859-9 - * ISO-8859-15 - * WINDOWS-1252 - * Japanese - * ISO-2022-JP - * SHIFT_JIS - * EUC-JP - * Korean - * ISO-2022-KR - * EUC-KR / UHC - * Lithuanian - * ISO-8859-4 - * ISO-8859-10 - * ISO-8859-13 - * Latvian - * ISO-8859-4 - * ISO-8859-10 - * ISO-8859-13 - * Maltese - * ISO-8859-3 - * Polish: - * ISO-8859-2 - * ISO-8859-13 - * ISO-8859-16 - * Windows-1250 - * IBM852 - * MAC-CENTRALEUROPE - * Portuguese - * ISO-8859-1 - * ISO-8859-9 - * ISO-8859-15 - * WINDOWS-1252 - * Romanian: - * ISO-8859-2 - * ISO-8859-16 - * Windows-1250 - * IBM852 - * Russian - * ISO-8859-5 - * KOI8-R - * WINDOWS-1251 - * MAC-CYRILLIC - * IBM866 - * IBM855 - * Slovak - * Windows-1250 - * ISO-8859-2 - * IBM852 - * MAC-CENTRALEUROPE - * Slovene - * ISO-8859-2 - * ISO-8859-16 - * Windows-1250 - * IBM852 - * MAC-CENTRALEUROPE - * Spanish - * ISO-8859-1 - * ISO-8859-15 - * WINDOWS-1252 - * Swedish - * ISO-8859-1 - * ISO-8859-4 - * ISO-8859-9 - * ISO-8859-15 - * WINDOWS-1252 - * Thai - * TIS-620 - * ISO-8859-11 - * Turkish: - * ISO-8859-3 - * ISO-8859-9 - * Vietnamese: - * VISCII - * Windows-1258 - * Others - * WINDOWS-1252 - -## Installation - -### Debian/Ubuntu/Mint - - apt-get install uchardet libuchardet-dev - -### Mageia - - urpmi libuchardet libuchardet-devel - -### Fedora - - dnf install uchardet uchardet-devel - -### Gentoo - - emerge uchardet - -### Mac - - brew install uchardet - -### Windows - -Binary packages are provided in Fedora repository. There may exist other -pre-built packages but I am not aware of them. -Nevertheless the library is very easily and quickly compilable under -Windows as well, so finding a binary package is not necessary. -Some did it successfully with the [CMake Windows -installer](https://cmake.org/download/) and MinGW. It should be possible -to use MinGW-w64 instead of MinGW, in particular to build both 32 and -64-bit DLL libraries). - -Note also that it is very easily cross-buildable (for instance from a -GNU/Linux machine). - -### Build from source - -Releases are available from: -https://www.freedesktop.org/software/uchardet/releases/ - -If you prefer a development version, clone the git repository: - - git clone git://anongit.freedesktop.org/uchardet/uchardet - -The source can be browsed at: https://cgit.freedesktop.org/uchardet/uchardet/ - - cmake . - make - make install - -### Build with flatpak-builder - -Here is a working "module" section to include in your Flatpak's json manifest: - -``` -"modules": [ - { - "name": "uchardet", - "buildsystem": "cmake", - "builddir": true, - "config-opts": [ "-DCMAKE_INSTALL_LIBDIR=lib" ], - "sources": [ - { - ... - } - ] - } -] -``` - -## Usage - -### Command Line - -``` -uchardet Command Line Tool -Version 0.0.6 - -Authors: BYVoid, Jehan -Bug Report: https://bugs.freedesktop.org/enter_bug.cgi?product=uchardet - -Usage: - uchardet [Options] [File]... - -Options: - -v, --version Print version and build information. - -h, --help Print this help. -``` - -### Library - -See [uchardet.h](https://cgit.freedesktop.org/uchardet/uchardet/tree/src/uchardet.h) - -## Related Projects - - * [python-chardet](https://github.com/chardet/chardet) Python port - * [ruby-rchardet](http://rubyforge.org/projects/chardet/) Ruby port - * [juniversalchardet](http://code.google.com/p/juniversalchardet/) Java port of universalchardet - * [jchardet](http://jchardet.sourceforge.net/) Java port of chardet - * [nuniversalchardet](http://code.google.com/p/nuniversalchardet/) C# port of universalchardet - * [nchardet](http://www.conceptdevelopment.net/Localization/NCharDet/) C# port of chardet - * [uchardet-enhanced](https://bitbucket.org/medoc/uchardet-enhanced) A fork of mozilla universalchardet - * [rust-uchardet](https://github.com/emk/rust-uchardet) Rust language binding of uchardet - * [libchardet](https://ftp.oops.org/pub/oops/libchardet/) Another C/C++ API wrapping Mozilla code. - -## Used by - -* [mpv](https://mpv.io/) for subtitle detection -* [Tepl](https://wiki.gnome.org/Projects/Tepl) -* [Nextcloud IOS app](https://github.com/nextcloud/ios) -* … - -## Licenses - -* [Mozilla Public License Version 1.1](http://www.mozilla.org/MPL/1.1/) -* [GNU General Public License, version 2.0](http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) or later. -* [GNU Lesser General Public License, version 2.1](http://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html) or later. - -See the file `COPYING` for the complete text of these 3 licenses. - -## Code of Conduct - -The `uchardet` project is hosted by [freedesktop.org](https://www.freedesktop.org/) -and as such follows its code of conduct. In other words, it means we -will treat anyone with respect and expect anyone to do the same. - -Please read [freedesktop.org Code of Conduct](https://www.freedesktop.org/wiki/CodeOfConduct). - -In case of any problem regarding abusive behavior in uchardet project, -please contact the maintainer (Jehan) or create a bug report (possibly -private if needed). diff --git a/PowerEditor/src/uchardet/nsBig5Prober.h b/PowerEditor/src/uchardet/nsBig5Prober.h index 7d13be8c..5ae35764 100644 --- a/PowerEditor/src/uchardet/nsBig5Prober.h +++ b/PowerEditor/src/uchardet/nsBig5Prober.h @@ -50,7 +50,7 @@ public: Reset();} virtual ~nsBig5Prober(void){delete mCodingSM;} nsProbingState HandleData(const char* aBuf, PRUint32 aLen); - const char* GetCharSetName() {return "BIG5";} + const char* GetCharSetName() {return "Big5";} nsProbingState GetState(void) {return mState;} void Reset(void); float GetConfidence(void); diff --git a/PowerEditor/src/uchardet/nsCharSetProber.cpp b/PowerEditor/src/uchardet/nsCharSetProber.cpp index 6d31ef8b..5e45d2d8 100644 --- a/PowerEditor/src/uchardet/nsCharSetProber.cpp +++ b/PowerEditor/src/uchardet/nsCharSetProber.cpp @@ -35,7 +35,7 @@ * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ - + #include "nsCharSetProber.h" #include "prmem.h" @@ -74,7 +74,9 @@ PRBool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf, PRUint32 a if (meetMSB && curPtr > prevPtr) while (prevPtr < curPtr) *newptr++ = *prevPtr++; - newLen = static_cast(newptr - *newBuf); + auto np = reinterpret_cast(newptr); + auto nb = reinterpret_cast(*newBuf); + newLen = static_cast(np - nb); return PR_TRUE; } @@ -119,7 +121,9 @@ PRBool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, PRUint32 aLen while (prevPtr < curPtr) *newptr++ = *prevPtr++; - newLen = static_cast(newptr - *newBuf); + auto np = reinterpret_cast(newptr); + auto nb = reinterpret_cast(*newBuf); + newLen = static_cast(np - nb); return PR_TRUE; } diff --git a/PowerEditor/src/uchardet/nsCodingStateMachine.h b/PowerEditor/src/uchardet/nsCodingStateMachine.h index 819f9ab0..07eadee9 100644 --- a/PowerEditor/src/uchardet/nsCodingStateMachine.h +++ b/PowerEditor/src/uchardet/nsCodingStateMachine.h @@ -1,104 +1,107 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsCodingStateMachine_h__ -#define nsCodingStateMachine_h__ - -#include "nsPkgInt.h" - -typedef enum { - eStart = 0, - eError = 1, - eItsMe = 2 -} nsSMState; - -#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable) - -//state machine model -typedef struct -{ - nsPkgInt classTable; - PRUint32 classFactor; - nsPkgInt stateTable; - const PRUint32* charLenTable; - const char* name; -} SMModel; - -class nsCodingStateMachine { -public: - nsCodingStateMachine(const SMModel* sm) : mModel(sm) { mCurrentState = eStart; } - nsSMState NextState(char c){ - //for each byte we get its class , if it is first byte, we also get byte length - PRUint32 byteCls = GETCLASS(c); - if (mCurrentState == eStart) - { - mCurrentBytePos = 0; - mCurrentCharLen = mModel->charLenTable[byteCls]; - } - //from byte's class and stateTable, we get its next state - mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls, - mModel->stateTable); - mCurrentBytePos++; - return mCurrentState; - } - PRUint32 GetCurrentCharLen(void) {return mCurrentCharLen;} - void Reset(void) {mCurrentState = eStart;} - const char * GetCodingStateMachine() {return mModel->name;} - -protected: - nsSMState mCurrentState; - PRUint32 mCurrentCharLen; - PRUint32 mCurrentBytePos; - - const SMModel *mModel; -}; - -extern const SMModel UTF8SMModel; -extern const SMModel Big5SMModel; -extern const SMModel EUCJPSMModel; -extern const SMModel EUCKRSMModel; -extern const SMModel EUCTWSMModel; -extern const SMModel GB18030SMModel; -extern const SMModel SJISSMModel; - - -extern const SMModel HZSMModel; -extern const SMModel ISO2022CNSMModel; -extern const SMModel ISO2022JPSMModel; -extern const SMModel ISO2022KRSMModel; - -#endif /* nsCodingStateMachine_h__ */ - +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +#ifndef nsCodingStateMachine_h__ +#define nsCodingStateMachine_h__ + +#include "nsPkgInt.h" + +typedef enum { + eStart = 0, + eError = 1, + eItsMe = 2 +} nsSMState; + +#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable) + +//state machine model +struct SMModel +{ + nsPkgInt classTable; + PRUint32 classFactor; + nsPkgInt stateTable; + const PRUint32* charLenTable; + const char* name; + SMModel(){}; + SMModel(nsPkgInt a,PRUint32 b,nsPkgInt c,const PRUint32* d, const char* e): + classTable(a), classFactor(b), stateTable(c), charLenTable(d), name(e){}; +} ; + +class nsCodingStateMachine { +public: + nsCodingStateMachine(const SMModel* sm) : mModel(sm) { mCurrentState = eStart; } + nsSMState NextState(char c){ + //for each byte we get its class , if it is first byte, we also get byte length + PRUint32 byteCls = GETCLASS(c); + if (mCurrentState == eStart) + { + mCurrentBytePos = 0; + mCurrentCharLen = mModel->charLenTable[byteCls]; + } + //from byte's class and stateTable, we get its next state + mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls, + mModel->stateTable); + mCurrentBytePos++; + return mCurrentState; + } + PRUint32 GetCurrentCharLen(void) {return mCurrentCharLen;} + void Reset(void) {mCurrentState = eStart;} + const char * GetCodingStateMachine() {return mModel->name;} + +protected: + nsSMState mCurrentState; + PRUint32 mCurrentCharLen; + PRUint32 mCurrentBytePos; + + const SMModel *mModel; +}; + +extern const SMModel UTF8SMModel; +extern const SMModel Big5SMModel; +extern const SMModel EUCJPSMModel; +extern const SMModel EUCKRSMModel; +extern const SMModel EUCTWSMModel; +extern const SMModel GB18030SMModel; +extern const SMModel SJISSMModel; + + +extern const SMModel HZSMModel; +extern const SMModel ISO2022CNSMModel; +extern const SMModel ISO2022JPSMModel; +extern const SMModel ISO2022KRSMModel; + +#endif /* nsCodingStateMachine_h__ */ + diff --git a/PowerEditor/src/uchardet/nsEUCTWProber.h b/PowerEditor/src/uchardet/nsEUCTWProber.h index ee6376e2..911d50b0 100644 --- a/PowerEditor/src/uchardet/nsEUCTWProber.h +++ b/PowerEditor/src/uchardet/nsEUCTWProber.h @@ -50,7 +50,7 @@ public: Reset();} virtual ~nsEUCTWProber(void){delete mCodingSM;} nsProbingState HandleData(const char* aBuf, PRUint32 aLen); - const char* GetCharSetName() {return "EUC-TW";} + const char* GetCharSetName() {return "x-euc-tw";} nsProbingState GetState(void) {return mState;} void Reset(void); float GetConfidence(void); diff --git a/PowerEditor/src/uchardet/nsEscCharsetProber.cpp b/PowerEditor/src/uchardet/nsEscCharsetProber.cpp index 464c7534..128f0a22 100644 --- a/PowerEditor/src/uchardet/nsEscCharsetProber.cpp +++ b/PowerEditor/src/uchardet/nsEscCharsetProber.cpp @@ -75,17 +75,13 @@ void nsEscCharSetProber::Reset(void) nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, PRUint32 aLen) { - nsSMState codingState; - PRInt32 j; - PRUint32 i; - - for ( i = 0; i < aLen && mState == eDetecting; i++) + for (PRUint32 i = 0; i < aLen && mState == eDetecting; i++) { - for (j = mActiveSM-1; j>= 0; j--) + for (PRInt32 j = mActiveSM-1; j>= 0; j--) { if (mCodingSM[j]) { - codingState = mCodingSM[j]->NextState(aBuf[i]); + nsSMState codingState = mCodingSM[j]->NextState(aBuf[i]); if (codingState == eItsMe) { mState = eFoundIt; diff --git a/PowerEditor/src/uchardet/nsEscSM.cpp b/PowerEditor/src/uchardet/nsEscSM.cpp index eed1b7cf..7b1de390 100644 --- a/PowerEditor/src/uchardet/nsEscSM.cpp +++ b/PowerEditor/src/uchardet/nsEscSM.cpp @@ -34,6 +34,7 @@ * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ + #include "nsCodingStateMachine.h" static const PRUint32 HZ_cls[ 256 / 8 ] = { @@ -83,13 +84,12 @@ PCK4BITS( 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f static const PRUint32 HZCharLenTable[] = {0, 0, 0, 0, 0, 0}; -const SMModel HZSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls }, +const SMModel HZSMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls), 6, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st }, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st), HZCharLenTable, - "HZ-GB-2312", -}; + "HZ-GB-2312"); static const PRUint32 ISO2022CN_cls [ 256 / 8 ] = { @@ -141,13 +141,12 @@ PCK4BITS(eError,eError,eError,eError,eError,eItsMe,eError,eStart) //38-3f static const PRUint32 ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; -const SMModel ISO2022CNSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls }, +const SMModel ISO2022CNSMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls), 9, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st }, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st), ISO2022CNCharLenTable, - "ISO-2022-CN", -}; + "ISO-2022-CN"); static const PRUint32 ISO2022JP_cls [ 256 / 8 ] = { PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07 @@ -199,13 +198,12 @@ PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eStart,eStart) //40-47 static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0}; -const SMModel ISO2022JPSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls }, +const SMModel ISO2022JPSMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls), 10, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st }, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st), ISO2022JPCharLenTable, - "ISO-2022-JP", -}; + "ISO-2022-JP"); static const PRUint32 ISO2022KR_cls [ 256 / 8 ] = { PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07 @@ -253,11 +251,10 @@ PCK4BITS(eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart) //20-27 static const PRUint32 ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0}; -const SMModel ISO2022KRSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls }, +const SMModel ISO2022KRSMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls), 6, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st }, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st), ISO2022KRCharLenTable, - "ISO-2022-KR", -}; + "ISO-2022-KR"); diff --git a/PowerEditor/src/uchardet/nsGB2312Prober.h b/PowerEditor/src/uchardet/nsGB2312Prober.h index 26ebf844..4bdac3bb 100644 --- a/PowerEditor/src/uchardet/nsGB2312Prober.h +++ b/PowerEditor/src/uchardet/nsGB2312Prober.h @@ -42,7 +42,7 @@ #include "nsCodingStateMachine.h" #include "CharDistribution.h" -// We use GB18030 to replace GB2312, because 18030 is a superset. +// We use gb18030 to replace gb2312, because 18030 is a superset. class nsGB18030Prober: public nsCharSetProber { public: @@ -52,7 +52,7 @@ public: Reset();} virtual ~nsGB18030Prober(void){delete mCodingSM;} nsProbingState HandleData(const char* aBuf, PRUint32 aLen); - const char* GetCharSetName() {return "GB18030";} + const char* GetCharSetName() {return "gb18030";} nsProbingState GetState(void) {return mState;} void Reset(void); float GetConfidence(void); diff --git a/PowerEditor/src/uchardet/nsHebrewProber.cpp b/PowerEditor/src/uchardet/nsHebrewProber.cpp index c503617c..b148ce3f 100644 --- a/PowerEditor/src/uchardet/nsHebrewProber.cpp +++ b/PowerEditor/src/uchardet/nsHebrewProber.cpp @@ -59,7 +59,7 @@ #define MIN_MODEL_DISTANCE (0.01) #define VISUAL_HEBREW_NAME ("ISO-8859-8") -#define LOGICAL_HEBREW_NAME ("WINDOWS-1255") +#define LOGICAL_HEBREW_NAME ("windows-1255") PRBool nsHebrewProber::isFinal(char c) { diff --git a/PowerEditor/src/uchardet/nsLatin1Prober.h b/PowerEditor/src/uchardet/nsLatin1Prober.h index 59118a7a..5145e965 100644 --- a/PowerEditor/src/uchardet/nsLatin1Prober.h +++ b/PowerEditor/src/uchardet/nsLatin1Prober.h @@ -48,7 +48,7 @@ public: nsLatin1Prober(void){Reset();} virtual ~nsLatin1Prober(void){} nsProbingState HandleData(const char* aBuf, PRUint32 aLen); - const char* GetCharSetName() {return "WINDOWS-1252";} + const char* GetCharSetName() {return "windows-1252";} nsProbingState GetState(void) {return mState;} void Reset(void); float GetConfidence(void); diff --git a/PowerEditor/src/uchardet/nsMBCSGroupProber.cpp b/PowerEditor/src/uchardet/nsMBCSGroupProber.cpp index 057ddb11..4fafb134 100644 --- a/PowerEditor/src/uchardet/nsMBCSGroupProber.cpp +++ b/PowerEditor/src/uchardet/nsMBCSGroupProber.cpp @@ -36,6 +36,7 @@ * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ + #include #include "nsMBCSGroupProber.h" @@ -44,13 +45,13 @@ #if defined(DEBUG_chardet) || defined(DEBUG_jgmyers) const char *ProberName[] = { - "UTF-8", + "UTF8", "SJIS", - "EUC-JP", + "EUCJP", "GB18030", - "EUC-KR", + "EUCKR", "Big5", - "EUC-TW", + "EUCTW", }; #endif diff --git a/PowerEditor/src/uchardet/nsMBCSSM.cpp b/PowerEditor/src/uchardet/nsMBCSSM.cpp index 50700968..bedf2b76 100644 --- a/PowerEditor/src/uchardet/nsMBCSSM.cpp +++ b/PowerEditor/src/uchardet/nsMBCSSM.cpp @@ -1,513 +1,507 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#include "nsCodingStateMachine.h" - -/* -Modification from frank tang's original work: -. 0x00 is allowed as a legal character. Since some web pages contains this char in - text stream. -*/ - -// BIG5 - -static const PRUint32 BIG5_cls [ 256 / 8 ] = { -//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as legal value -PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f -PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 -PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f -PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 -PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f -PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 -PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f -PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47 -PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f -PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57 -PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f -PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67 -PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f -PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77 -PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f -PCK4BITS(4,4,4,4,4,4,4,4), // 80 - 87 -PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f -PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97 -PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f -PCK4BITS(4,3,3,3,3,3,3,3), // a0 - a7 -PCK4BITS(3,3,3,3,3,3,3,3), // a8 - af -PCK4BITS(3,3,3,3,3,3,3,3), // b0 - b7 -PCK4BITS(3,3,3,3,3,3,3,3), // b8 - bf -PCK4BITS(3,3,3,3,3,3,3,3), // c0 - c7 -PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf -PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7 -PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df -PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7 -PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef -PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7 -PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff -}; - - -static const PRUint32 BIG5_st [ 3] = { -PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07 -PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError),//08-0f -PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart) //10-17 -}; - -static const PRUint32 Big5CharLenTable[] = {0, 1, 1, 2, 0}; - -SMModel const Big5SMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls }, - 5, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st }, - Big5CharLenTable, - "BIG5", -}; - -static const PRUint32 EUCJP_cls [ 256 / 8 ] = { -//PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07 -PCK4BITS(4,4,4,4,4,4,4,4), // 00 - 07 -PCK4BITS(4,4,4,4,4,4,5,5), // 08 - 0f -PCK4BITS(4,4,4,4,4,4,4,4), // 10 - 17 -PCK4BITS(4,4,4,5,4,4,4,4), // 18 - 1f -PCK4BITS(4,4,4,4,4,4,4,4), // 20 - 27 -PCK4BITS(4,4,4,4,4,4,4,4), // 28 - 2f -PCK4BITS(4,4,4,4,4,4,4,4), // 30 - 37 -PCK4BITS(4,4,4,4,4,4,4,4), // 38 - 3f -PCK4BITS(4,4,4,4,4,4,4,4), // 40 - 47 -PCK4BITS(4,4,4,4,4,4,4,4), // 48 - 4f -PCK4BITS(4,4,4,4,4,4,4,4), // 50 - 57 -PCK4BITS(4,4,4,4,4,4,4,4), // 58 - 5f -PCK4BITS(4,4,4,4,4,4,4,4), // 60 - 67 -PCK4BITS(4,4,4,4,4,4,4,4), // 68 - 6f -PCK4BITS(4,4,4,4,4,4,4,4), // 70 - 77 -PCK4BITS(4,4,4,4,4,4,4,4), // 78 - 7f -PCK4BITS(5,5,5,5,5,5,5,5), // 80 - 87 -PCK4BITS(5,5,5,5,5,5,1,3), // 88 - 8f -PCK4BITS(5,5,5,5,5,5,5,5), // 90 - 97 -PCK4BITS(5,5,5,5,5,5,5,5), // 98 - 9f -PCK4BITS(5,2,2,2,2,2,2,2), // a0 - a7 -PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af -PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 -PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf -PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 -PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf -PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 -PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df -PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7 -PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef -PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7 -PCK4BITS(0,0,0,0,0,0,0,5) // f8 - ff -}; - - -static const PRUint32 EUCJP_st [ 5] = { -PCK4BITS( 3, 4, 3, 5,eStart,eError,eError,eError),//00-07 -PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f -PCK4BITS(eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError),//10-17 -PCK4BITS(eError,eError,eStart,eError,eError,eError, 3,eError),//18-1f -PCK4BITS( 3,eError,eError,eError,eStart,eStart,eStart,eStart) //20-27 -}; - -static const PRUint32 EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0}; - -const SMModel EUCJPSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls }, - 6, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st }, - EUCJPCharLenTable, - "EUC-JP", -}; - -static const PRUint32 EUCKR_cls [ 256 / 8 ] = { -//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f -PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 -PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f -PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 -PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f -PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 -PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f -PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47 -PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f -PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57 -PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f -PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67 -PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f -PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77 -PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f -PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87 -PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f -PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97 -PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f -PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7 -PCK4BITS(2,2,2,2,2,3,3,3), // a8 - af -PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 -PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf -PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 -PCK4BITS(2,3,2,2,2,2,2,2), // c8 - cf -PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 -PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df -PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7 -PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef -PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7 -PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff -}; - - -static const PRUint32 EUCKR_st [ 2] = { -PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07 -PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f -}; - -static const PRUint32 EUCKRCharLenTable[] = {0, 1, 2, 0}; - -const SMModel EUCKRSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls }, - 4, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st }, - EUCKRCharLenTable, - "EUC-KR", -}; - -static const PRUint32 EUCTW_cls [ 256 / 8 ] = { -//PCK4BITS(0,2,2,2,2,2,2,2), // 00 - 07 -PCK4BITS(2,2,2,2,2,2,2,2), // 00 - 07 -PCK4BITS(2,2,2,2,2,2,0,0), // 08 - 0f -PCK4BITS(2,2,2,2,2,2,2,2), // 10 - 17 -PCK4BITS(2,2,2,0,2,2,2,2), // 18 - 1f -PCK4BITS(2,2,2,2,2,2,2,2), // 20 - 27 -PCK4BITS(2,2,2,2,2,2,2,2), // 28 - 2f -PCK4BITS(2,2,2,2,2,2,2,2), // 30 - 37 -PCK4BITS(2,2,2,2,2,2,2,2), // 38 - 3f -PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47 -PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f -PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57 -PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f -PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67 -PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f -PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77 -PCK4BITS(2,2,2,2,2,2,2,2), // 78 - 7f -PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87 -PCK4BITS(0,0,0,0,0,0,6,0), // 88 - 8f -PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97 -PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f -PCK4BITS(0,3,4,4,4,4,4,4), // a0 - a7 -PCK4BITS(5,5,1,1,1,1,1,1), // a8 - af -PCK4BITS(1,1,1,1,1,1,1,1), // b0 - b7 -PCK4BITS(1,1,1,1,1,1,1,1), // b8 - bf -PCK4BITS(1,1,3,1,3,3,3,3), // c0 - c7 -PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf -PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7 -PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df -PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7 -PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef -PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7 -PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff -}; - - -static const PRUint32 EUCTW_st [ 6] = { -PCK4BITS(eError,eError,eStart, 3, 3, 3, 4,eError),//00-07 -PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f -PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError),//10-17 -PCK4BITS(eStart,eStart,eStart,eError,eError,eError,eError,eError),//18-1f -PCK4BITS( 5,eError,eError,eError,eStart,eError,eStart,eStart),//20-27 -PCK4BITS(eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f -}; - -static const PRUint32 EUCTWCharLenTable[] = {0, 0, 1, 2, 2, 2, 3}; - -const SMModel EUCTWSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_cls }, - 7, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_st }, - EUCTWCharLenTable, - "EUC-TW", -}; - -/* obsolete GB2312 by GB18030 -static PRUint32 GB2312_cls [ 256 / 8 ] = { -//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f -PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 -PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f -PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 -PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f -PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 -PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f -PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47 -PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f -PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57 -PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f -PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67 -PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f -PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77 -PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f -PCK4BITS(1,0,0,0,0,0,0,0), // 80 - 87 -PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f -PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97 -PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f -PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7 -PCK4BITS(2,2,3,3,3,3,3,3), // a8 - af -PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 -PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf -PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 -PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf -PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 -PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df -PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7 -PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef -PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7 -PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff -}; - - -static PRUint32 GB2312_st [ 2] = { -PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07 -PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f -}; - -static const PRUint32 GB2312CharLenTable[] = {0, 1, 2, 0}; - -SMModel GB2312SMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_cls }, - 4, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_st }, - GB2312CharLenTable, - "GB2312", -}; -*/ - -// the following state machine data was created by perl script in -// intl/chardet/tools. It should be the same as in PSM detector. -static const PRUint32 GB18030_cls [ 256 / 8 ] = { -PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f -PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 -PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f -PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 -PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f -PCK4BITS(3,3,3,3,3,3,3,3), // 30 - 37 -PCK4BITS(3,3,1,1,1,1,1,1), // 38 - 3f -PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47 -PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f -PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57 -PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f -PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67 -PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f -PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77 -PCK4BITS(2,2,2,2,2,2,2,4), // 78 - 7f -PCK4BITS(5,6,6,6,6,6,6,6), // 80 - 87 -PCK4BITS(6,6,6,6,6,6,6,6), // 88 - 8f -PCK4BITS(6,6,6,6,6,6,6,6), // 90 - 97 -PCK4BITS(6,6,6,6,6,6,6,6), // 98 - 9f -PCK4BITS(6,6,6,6,6,6,6,6), // a0 - a7 -PCK4BITS(6,6,6,6,6,6,6,6), // a8 - af -PCK4BITS(6,6,6,6,6,6,6,6), // b0 - b7 -PCK4BITS(6,6,6,6,6,6,6,6), // b8 - bf -PCK4BITS(6,6,6,6,6,6,6,6), // c0 - c7 -PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf -PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7 -PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df -PCK4BITS(6,6,6,6,6,6,6,6), // e0 - e7 -PCK4BITS(6,6,6,6,6,6,6,6), // e8 - ef -PCK4BITS(6,6,6,6,6,6,6,6), // f0 - f7 -PCK4BITS(6,6,6,6,6,6,6,0) // f8 - ff -}; - - -static const PRUint32 GB18030_st [ 6] = { -PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart, 3,eError),//00-07 -PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f -PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart),//10-17 -PCK4BITS( 4,eError,eStart,eStart,eError,eError,eError,eError),//18-1f -PCK4BITS(eError,eError, 5,eError,eError,eError,eItsMe,eError),//20-27 -PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f -}; - -// To be accurate, the length of class 6 can be either 2 or 4. -// But it is not necessary to discriminate between the two since -// it is used for frequency analysis only, and we are validing -// each code range there as well. So it is safe to set it to be -// 2 here. -static const PRUint32 GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2}; - -const SMModel GB18030SMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls }, - 7, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st }, - GB18030CharLenTable, - "GB18030", -}; - -// sjis - -static const PRUint32 SJIS_cls [ 256 / 8 ] = { -//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f -PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 -PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f -PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 -PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f -PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 -PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f -PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47 -PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f -PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57 -PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f -PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67 -PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f -PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77 -PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f -PCK4BITS(3,3,3,3,3,3,3,3), // 80 - 87 -PCK4BITS(3,3,3,3,3,3,3,3), // 88 - 8f -PCK4BITS(3,3,3,3,3,3,3,3), // 90 - 97 -PCK4BITS(3,3,3,3,3,3,3,3), // 98 - 9f -//0xa0 is illegal in sjis encoding, but some pages does -//contain such byte. We need to be more error forgiven. -PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7 -PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af -PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 -PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf -PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 -PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf -PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 -PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df -PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7 -PCK4BITS(3,3,3,3,3,4,4,4), // e8 - ef -PCK4BITS(4,4,4,4,4,4,4,4), // f0 - f7 -PCK4BITS(4,4,4,4,4,0,0,0) // f8 - ff -}; - - -static const PRUint32 SJIS_st [ 3] = { -PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07 -PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f -PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17 -}; - -static const PRUint32 SJISCharLenTable[] = {0, 1, 1, 2, 0, 0}; - -const SMModel SJISSMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls }, - 6, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st }, - SJISCharLenTable, - "SHIFT_JIS", -}; - - -static const PRUint32 UTF8_cls [ 256 / 8 ] = { -//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 -PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as a legal value -PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f -PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 -PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f -PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 -PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f -PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 -PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f -PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47 -PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f -PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57 -PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f -PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67 -PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f -PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77 -PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f -PCK4BITS(2,2,2,2,3,3,3,3), // 80 - 87 -PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f -PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97 -PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f -PCK4BITS(5,5,5,5,5,5,5,5), // a0 - a7 -PCK4BITS(5,5,5,5,5,5,5,5), // a8 - af -PCK4BITS(5,5,5,5,5,5,5,5), // b0 - b7 -PCK4BITS(5,5,5,5,5,5,5,5), // b8 - bf -PCK4BITS(0,0,6,6,6,6,6,6), // c0 - c7 -PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf -PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7 -PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df -PCK4BITS(7,8,8,8,8,8,8,8), // e0 - e7 -PCK4BITS(8,8,8,8,8,9,8,8), // e8 - ef -PCK4BITS(10,11,11,11,11,11,11,11), // f0 - f7 -PCK4BITS(12,13,13,13,14,15,0,0) // f8 - ff -}; - - -static const PRUint32 UTF8_st [ 26] = { -PCK4BITS(eError,eStart,eError,eError,eError,eError, 12, 10),//00-07 -PCK4BITS( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//10-17 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//18-1f -PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//20-27 -PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//28-2f -PCK4BITS(eError,eError, 5, 5, 5, 5,eError,eError),//30-37 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//38-3f -PCK4BITS(eError,eError,eError, 5, 5, 5,eError,eError),//40-47 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//48-4f -PCK4BITS(eError,eError, 7, 7, 7, 7,eError,eError),//50-57 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//58-5f -PCK4BITS(eError,eError,eError,eError, 7, 7,eError,eError),//60-67 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//68-6f -PCK4BITS(eError,eError, 9, 9, 9, 9,eError,eError),//70-77 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//78-7f -PCK4BITS(eError,eError,eError,eError,eError, 9,eError,eError),//80-87 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//88-8f -PCK4BITS(eError,eError, 12, 12, 12, 12,eError,eError),//90-97 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//98-9f -PCK4BITS(eError,eError,eError,eError,eError, 12,eError,eError),//a0-a7 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//a8-af -PCK4BITS(eError,eError, 12, 12, 12,eError,eError,eError),//b0-b7 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//b8-bf -PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eError,eError),//c0-c7 -PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) //c8-cf -}; - -static const PRUint32 UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3, - 3, 3, 4, 4, 5, 5, 6, 6 }; - -const SMModel UTF8SMModel = { - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls }, - 16, - {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st }, - UTF8CharLenTable, - "UTF-8", -}; - +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsCodingStateMachine.h" + +/* +Modification from frank tang's original work: +. 0x00 is allowed as a legal character. Since some web pages contains this char in + text stream. +*/ + +// BIG5 + +static const PRUint32 BIG5_cls [ 256 / 8 ] = { +//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as legal value +PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f +PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 +PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f +PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 +PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f +PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 +PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f +PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47 +PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f +PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57 +PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f +PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67 +PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f +PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77 +PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f +PCK4BITS(4,4,4,4,4,4,4,4), // 80 - 87 +PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f +PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97 +PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f +PCK4BITS(4,3,3,3,3,3,3,3), // a0 - a7 +PCK4BITS(3,3,3,3,3,3,3,3), // a8 - af +PCK4BITS(3,3,3,3,3,3,3,3), // b0 - b7 +PCK4BITS(3,3,3,3,3,3,3,3), // b8 - bf +PCK4BITS(3,3,3,3,3,3,3,3), // c0 - c7 +PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf +PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7 +PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df +PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7 +PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef +PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7 +PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff +}; + + +static const PRUint32 BIG5_st [ 3] = { +PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07 +PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError),//08-0f +PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart) //10-17 +}; + +static const PRUint32 Big5CharLenTable[] = {0, 1, 1, 2, 0}; + +const SMModel Big5SMModel( +nsPkgInt( eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls ), + 5, +nsPkgInt( eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st ), + Big5CharLenTable, +"Big5"); + +static const PRUint32 EUCJP_cls [ 256 / 8 ] = { +//PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07 +PCK4BITS(4,4,4,4,4,4,4,4), // 00 - 07 +PCK4BITS(4,4,4,4,4,4,5,5), // 08 - 0f +PCK4BITS(4,4,4,4,4,4,4,4), // 10 - 17 +PCK4BITS(4,4,4,5,4,4,4,4), // 18 - 1f +PCK4BITS(4,4,4,4,4,4,4,4), // 20 - 27 +PCK4BITS(4,4,4,4,4,4,4,4), // 28 - 2f +PCK4BITS(4,4,4,4,4,4,4,4), // 30 - 37 +PCK4BITS(4,4,4,4,4,4,4,4), // 38 - 3f +PCK4BITS(4,4,4,4,4,4,4,4), // 40 - 47 +PCK4BITS(4,4,4,4,4,4,4,4), // 48 - 4f +PCK4BITS(4,4,4,4,4,4,4,4), // 50 - 57 +PCK4BITS(4,4,4,4,4,4,4,4), // 58 - 5f +PCK4BITS(4,4,4,4,4,4,4,4), // 60 - 67 +PCK4BITS(4,4,4,4,4,4,4,4), // 68 - 6f +PCK4BITS(4,4,4,4,4,4,4,4), // 70 - 77 +PCK4BITS(4,4,4,4,4,4,4,4), // 78 - 7f +PCK4BITS(5,5,5,5,5,5,5,5), // 80 - 87 +PCK4BITS(5,5,5,5,5,5,1,3), // 88 - 8f +PCK4BITS(5,5,5,5,5,5,5,5), // 90 - 97 +PCK4BITS(5,5,5,5,5,5,5,5), // 98 - 9f +PCK4BITS(5,2,2,2,2,2,2,2), // a0 - a7 +PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af +PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 +PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf +PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 +PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf +PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 +PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df +PCK4BITS(0,0,0,0,0,0,0,0), // e0 - e7 +PCK4BITS(0,0,0,0,0,0,0,0), // e8 - ef +PCK4BITS(0,0,0,0,0,0,0,0), // f0 - f7 +PCK4BITS(0,0,0,0,0,0,0,5) // f8 - ff +}; + + +static const PRUint32 EUCJP_st [ 5] = { +PCK4BITS( 3, 4, 3, 5,eStart,eError,eError,eError),//00-07 +PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f +PCK4BITS(eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError),//10-17 +PCK4BITS(eError,eError,eStart,eError,eError,eError, 3,eError),//18-1f +PCK4BITS( 3,eError,eError,eError,eStart,eStart,eStart,eStart) //20-27 +}; + +static const PRUint32 EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0}; + +const SMModel EUCJPSMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls), + 6, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st), + EUCJPCharLenTable, + "EUC-JP"); + +static const PRUint32 EUCKR_cls [ 256 / 8 ] = { +//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f +PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 +PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f +PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 +PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f +PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 +PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f +PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47 +PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f +PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57 +PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f +PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67 +PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f +PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77 +PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f +PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87 +PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f +PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97 +PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f +PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7 +PCK4BITS(2,2,2,2,2,3,3,3), // a8 - af +PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 +PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf +PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 +PCK4BITS(2,3,2,2,2,2,2,2), // c8 - cf +PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 +PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df +PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7 +PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef +PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7 +PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff +}; + + +static const PRUint32 EUCKR_st [ 2] = { +PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07 +PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f +}; + +static const PRUint32 EUCKRCharLenTable[] = {0, 1, 2, 0}; + +const SMModel EUCKRSMModel ( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls), + 4, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st), + EUCKRCharLenTable, + "EUC-KR"); + +static const PRUint32 EUCTW_cls [ 256 / 8 ] = { +//PCK4BITS(0,2,2,2,2,2,2,2), // 00 - 07 +PCK4BITS(2,2,2,2,2,2,2,2), // 00 - 07 +PCK4BITS(2,2,2,2,2,2,0,0), // 08 - 0f +PCK4BITS(2,2,2,2,2,2,2,2), // 10 - 17 +PCK4BITS(2,2,2,0,2,2,2,2), // 18 - 1f +PCK4BITS(2,2,2,2,2,2,2,2), // 20 - 27 +PCK4BITS(2,2,2,2,2,2,2,2), // 28 - 2f +PCK4BITS(2,2,2,2,2,2,2,2), // 30 - 37 +PCK4BITS(2,2,2,2,2,2,2,2), // 38 - 3f +PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47 +PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f +PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57 +PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f +PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67 +PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f +PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77 +PCK4BITS(2,2,2,2,2,2,2,2), // 78 - 7f +PCK4BITS(0,0,0,0,0,0,0,0), // 80 - 87 +PCK4BITS(0,0,0,0,0,0,6,0), // 88 - 8f +PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97 +PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f +PCK4BITS(0,3,4,4,4,4,4,4), // a0 - a7 +PCK4BITS(5,5,1,1,1,1,1,1), // a8 - af +PCK4BITS(1,1,1,1,1,1,1,1), // b0 - b7 +PCK4BITS(1,1,1,1,1,1,1,1), // b8 - bf +PCK4BITS(1,1,3,1,3,3,3,3), // c0 - c7 +PCK4BITS(3,3,3,3,3,3,3,3), // c8 - cf +PCK4BITS(3,3,3,3,3,3,3,3), // d0 - d7 +PCK4BITS(3,3,3,3,3,3,3,3), // d8 - df +PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7 +PCK4BITS(3,3,3,3,3,3,3,3), // e8 - ef +PCK4BITS(3,3,3,3,3,3,3,3), // f0 - f7 +PCK4BITS(3,3,3,3,3,3,3,0) // f8 - ff +}; + + +static const PRUint32 EUCTW_st [ 6] = { +PCK4BITS(eError,eError,eStart, 3, 3, 3, 4,eError),//00-07 +PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f +PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError),//10-17 +PCK4BITS(eStart,eStart,eStart,eError,eError,eError,eError,eError),//18-1f +PCK4BITS( 5,eError,eError,eError,eStart,eError,eStart,eStart),//20-27 +PCK4BITS(eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f +}; + +static const PRUint32 EUCTWCharLenTable[] = {0, 0, 1, 2, 2, 2, 3}; + +const SMModel EUCTWSMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_cls), + 7, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_st), + EUCTWCharLenTable, + "x-euc-tw"); + +/* obsolete GB2312 by gb18030 +static PRUint32 GB2312_cls [ 256 / 8 ] = { +//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f +PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 +PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f +PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 +PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f +PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 +PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f +PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47 +PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f +PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57 +PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f +PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67 +PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f +PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77 +PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f +PCK4BITS(1,0,0,0,0,0,0,0), // 80 - 87 +PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f +PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97 +PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f +PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7 +PCK4BITS(2,2,3,3,3,3,3,3), // a8 - af +PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 +PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf +PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 +PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf +PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 +PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df +PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7 +PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef +PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7 +PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff +}; + + +static PRUint32 GB2312_st [ 2] = { +PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07 +PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f +}; + +static const PRUint32 GB2312CharLenTable[] = {0, 1, 2, 0}; + +SMModel GB2312SMModel = { + {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_cls }, + 4, + {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_st }, + GB2312CharLenTable, + "GB2312", +}; +*/ + +// the following state machine data was created by perl script in +// intl/chardet/tools. It should be the same as in PSM detector. +static const PRUint32 GB18030_cls [ 256 / 8 ] = { +PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f +PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 +PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f +PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 +PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f +PCK4BITS(3,3,3,3,3,3,3,3), // 30 - 37 +PCK4BITS(3,3,1,1,1,1,1,1), // 38 - 3f +PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47 +PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f +PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57 +PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f +PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67 +PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f +PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77 +PCK4BITS(2,2,2,2,2,2,2,4), // 78 - 7f +PCK4BITS(5,6,6,6,6,6,6,6), // 80 - 87 +PCK4BITS(6,6,6,6,6,6,6,6), // 88 - 8f +PCK4BITS(6,6,6,6,6,6,6,6), // 90 - 97 +PCK4BITS(6,6,6,6,6,6,6,6), // 98 - 9f +PCK4BITS(6,6,6,6,6,6,6,6), // a0 - a7 +PCK4BITS(6,6,6,6,6,6,6,6), // a8 - af +PCK4BITS(6,6,6,6,6,6,6,6), // b0 - b7 +PCK4BITS(6,6,6,6,6,6,6,6), // b8 - bf +PCK4BITS(6,6,6,6,6,6,6,6), // c0 - c7 +PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf +PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7 +PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df +PCK4BITS(6,6,6,6,6,6,6,6), // e0 - e7 +PCK4BITS(6,6,6,6,6,6,6,6), // e8 - ef +PCK4BITS(6,6,6,6,6,6,6,6), // f0 - f7 +PCK4BITS(6,6,6,6,6,6,6,0) // f8 - ff +}; + + +static const PRUint32 GB18030_st [ 6] = { +PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart, 3,eError),//00-07 +PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f +PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart),//10-17 +PCK4BITS( 4,eError,eStart,eStart,eError,eError,eError,eError),//18-1f +PCK4BITS(eError,eError, 5,eError,eError,eError,eItsMe,eError),//20-27 +PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f +}; + +// To be accurate, the length of class 6 can be either 2 or 4. +// But it is not necessary to discriminate between the two since +// it is used for frequency analysis only, and we are validing +// each code range there as well. So it is safe to set it to be +// 2 here. +static const PRUint32 GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2}; + +const SMModel GB18030SMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls ), + 7, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st ), + GB18030CharLenTable, + "GB18030"); + +// sjis + +static const PRUint32 SJIS_cls [ 256 / 8 ] = { +//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f +PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 +PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f +PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 +PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f +PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 +PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f +PCK4BITS(2,2,2,2,2,2,2,2), // 40 - 47 +PCK4BITS(2,2,2,2,2,2,2,2), // 48 - 4f +PCK4BITS(2,2,2,2,2,2,2,2), // 50 - 57 +PCK4BITS(2,2,2,2,2,2,2,2), // 58 - 5f +PCK4BITS(2,2,2,2,2,2,2,2), // 60 - 67 +PCK4BITS(2,2,2,2,2,2,2,2), // 68 - 6f +PCK4BITS(2,2,2,2,2,2,2,2), // 70 - 77 +PCK4BITS(2,2,2,2,2,2,2,1), // 78 - 7f +PCK4BITS(3,3,3,3,3,3,3,3), // 80 - 87 +PCK4BITS(3,3,3,3,3,3,3,3), // 88 - 8f +PCK4BITS(3,3,3,3,3,3,3,3), // 90 - 97 +PCK4BITS(3,3,3,3,3,3,3,3), // 98 - 9f +//0xa0 is illegal in sjis encoding, but some pages does +//contain such byte. We need to be more error forgiven. +PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7 +PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af +PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7 +PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf +PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7 +PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf +PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7 +PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df +PCK4BITS(3,3,3,3,3,3,3,3), // e0 - e7 +PCK4BITS(3,3,3,3,3,4,4,4), // e8 - ef +PCK4BITS(4,4,4,4,4,4,4,4), // f0 - f7 +PCK4BITS(4,4,4,4,4,0,0,0) // f8 - ff +}; + + +static const PRUint32 SJIS_st [ 3] = { +PCK4BITS(eError,eStart,eStart, 3,eError,eError,eError,eError),//00-07 +PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f +PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17 +}; + +static const PRUint32 SJISCharLenTable[] = {0, 1, 1, 2, 0, 0}; + +const SMModel SJISSMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls), + 6, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st), + SJISCharLenTable, + "Shift_JIS"); + + +static const PRUint32 UTF8_cls [ 256 / 8 ] = { +//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07 +PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07 //allow 0x00 as a legal value +PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f +PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17 +PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f +PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27 +PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f +PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37 +PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f +PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47 +PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f +PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57 +PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f +PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67 +PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f +PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77 +PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f +PCK4BITS(2,2,2,2,3,3,3,3), // 80 - 87 +PCK4BITS(4,4,4,4,4,4,4,4), // 88 - 8f +PCK4BITS(4,4,4,4,4,4,4,4), // 90 - 97 +PCK4BITS(4,4,4,4,4,4,4,4), // 98 - 9f +PCK4BITS(5,5,5,5,5,5,5,5), // a0 - a7 +PCK4BITS(5,5,5,5,5,5,5,5), // a8 - af +PCK4BITS(5,5,5,5,5,5,5,5), // b0 - b7 +PCK4BITS(5,5,5,5,5,5,5,5), // b8 - bf +PCK4BITS(0,0,6,6,6,6,6,6), // c0 - c7 +PCK4BITS(6,6,6,6,6,6,6,6), // c8 - cf +PCK4BITS(6,6,6,6,6,6,6,6), // d0 - d7 +PCK4BITS(6,6,6,6,6,6,6,6), // d8 - df +PCK4BITS(7,8,8,8,8,8,8,8), // e0 - e7 +PCK4BITS(8,8,8,8,8,9,8,8), // e8 - ef +PCK4BITS(10,11,11,11,11,11,11,11), // f0 - f7 +PCK4BITS(12,13,13,13,14,15,0,0) // f8 - ff +}; + + +static const PRUint32 UTF8_st [ 26] = { +PCK4BITS(eError,eStart,eError,eError,eError,eError, 12, 10),//00-07 +PCK4BITS( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//10-17 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//18-1f +PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//20-27 +PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//28-2f +PCK4BITS(eError,eError, 5, 5, 5, 5,eError,eError),//30-37 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//38-3f +PCK4BITS(eError,eError,eError, 5, 5, 5,eError,eError),//40-47 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//48-4f +PCK4BITS(eError,eError, 7, 7, 7, 7,eError,eError),//50-57 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//58-5f +PCK4BITS(eError,eError,eError,eError, 7, 7,eError,eError),//60-67 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//68-6f +PCK4BITS(eError,eError, 9, 9, 9, 9,eError,eError),//70-77 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//78-7f +PCK4BITS(eError,eError,eError,eError,eError, 9,eError,eError),//80-87 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//88-8f +PCK4BITS(eError,eError, 12, 12, 12, 12,eError,eError),//90-97 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//98-9f +PCK4BITS(eError,eError,eError,eError,eError, 12,eError,eError),//a0-a7 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//a8-af +PCK4BITS(eError,eError, 12, 12, 12,eError,eError,eError),//b0-b7 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//b8-bf +PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eError,eError),//c0-c7 +PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) //c8-cf +}; + +static const PRUint32 UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3, + 3, 3, 4, 4, 5, 5, 6, 6 }; + +const SMModel UTF8SMModel( + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls), + 16, + nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st), + UTF8CharLenTable, + "UTF-8"); + diff --git a/PowerEditor/src/uchardet/nsPkgInt.h b/PowerEditor/src/uchardet/nsPkgInt.h index 3caa9122..b1e66785 100644 --- a/PowerEditor/src/uchardet/nsPkgInt.h +++ b/PowerEditor/src/uchardet/nsPkgInt.h @@ -1,89 +1,93 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is mozilla.org code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef nsPkgInt_h__ -#define nsPkgInt_h__ -#include "nscore.h" - -typedef enum { - eIdxSft4bits = 3, - eIdxSft8bits = 2, - eIdxSft16bits = 1 -} nsIdxSft; - -typedef enum { - eSftMsk4bits = 7, - eSftMsk8bits = 3, - eSftMsk16bits = 1 -} nsSftMsk; - -typedef enum { - eBitSft4bits = 2, - eBitSft8bits = 3, - eBitSft16bits = 4 -} nsBitSft; - -typedef enum { - eUnitMsk4bits = 0x0000000FL, - eUnitMsk8bits = 0x000000FFL, - eUnitMsk16bits = 0x0000FFFFL -} nsUnitMsk; - -typedef struct nsPkgInt { - nsIdxSft idxsft; - nsSftMsk sftmsk; - nsBitSft bitsft; - nsUnitMsk unitmsk; - const PRUint32* const data; -} nsPkgInt; - - -#define PCK16BITS(a,b) ((PRUint32)(((b) << 16) | (a))) - -#define PCK8BITS(a,b,c,d) PCK16BITS( ((PRUint32)(((b) << 8) | (a))), \ - ((PRUint32)(((d) << 8) | (c)))) - -#define PCK4BITS(a,b,c,d,e,f,g,h) PCK8BITS( ((PRUint32)(((b) << 4) | (a))), \ - ((PRUint32)(((d) << 4) | (c))), \ - ((PRUint32)(((f) << 4) | (e))), \ - ((PRUint32)(((h) << 4) | (g))) ) - -#define GETFROMPCK(i, c) \ - (((((c).data)[(i)>>(c).idxsft])>>(((i)&(c).sftmsk)<<(c).bitsft))&(c).unitmsk) - -#endif /* nsPkgInt_h__ */ - +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef nsPkgInt_h__ +#define nsPkgInt_h__ +#include "nscore.h" + +typedef enum { + eIdxSft4bits = 3, + eIdxSft8bits = 2, + eIdxSft16bits = 1 +} nsIdxSft; + +typedef enum { + eSftMsk4bits = 7, + eSftMsk8bits = 3, + eSftMsk16bits = 1 +} nsSftMsk; + +typedef enum { + eBitSft4bits = 2, + eBitSft8bits = 3, + eBitSft16bits = 4 +} nsBitSft; + +typedef enum { + eUnitMsk4bits = 0x0000000FL, + eUnitMsk8bits = 0x000000FFL, + eUnitMsk16bits = 0x0000FFFFL +} nsUnitMsk; + +struct nsPkgInt { + nsIdxSft idxsft; + nsSftMsk sftmsk; + nsBitSft bitsft; + nsUnitMsk unitmsk; + const PRUint32* const data; + nsPkgInt(nsIdxSft a,nsSftMsk b, nsBitSft c,nsUnitMsk d,const PRUint32* const e) + :idxsft(a), sftmsk(b), bitsft(c), unitmsk(d), data(e){} + nsPkgInt(); + nsPkgInt operator= (const nsPkgInt&); +}; + + +#define PCK16BITS(a,b) ((PRUint32)(((b) << 16) | (a))) + +#define PCK8BITS(a,b,c,d) PCK16BITS( ((PRUint32)(((b) << 8) | (a))), \ + ((PRUint32)(((d) << 8) | (c)))) + +#define PCK4BITS(a,b,c,d,e,f,g,h) PCK8BITS( ((PRUint32)(((b) << 4) | (a))), \ + ((PRUint32)(((d) << 4) | (c))), \ + ((PRUint32)(((f) << 4) | (e))), \ + ((PRUint32)(((h) << 4) | (g))) ) + +#define GETFROMPCK(i, c) \ + (((((c).data)[(i)>>(c).idxsft])>>(((i)&(c).sftmsk)<<(c).bitsft))&(c).unitmsk) + +#endif /* nsPkgInt_h__ */ + diff --git a/PowerEditor/src/uchardet/nsSBCSGroupProber.cpp b/PowerEditor/src/uchardet/nsSBCSGroupProber.cpp index b1a60cc0..d8fef879 100644 --- a/PowerEditor/src/uchardet/nsSBCSGroupProber.cpp +++ b/PowerEditor/src/uchardet/nsSBCSGroupProber.cpp @@ -46,70 +46,42 @@ nsSBCSGroupProber::nsSBCSGroupProber() { - mProbers[0] = new nsSingleByteCharSetProber(&Win1251RussianModel); - mProbers[1] = new nsSingleByteCharSetProber(&Koi8rRussianModel); - mProbers[2] = new nsSingleByteCharSetProber(&Latin5RussianModel); - mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel); - mProbers[4] = new nsSingleByteCharSetProber(&Ibm866RussianModel); - mProbers[5] = new nsSingleByteCharSetProber(&Ibm855RussianModel); - - mProbers[6] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel); - mProbers[7] = new nsSingleByteCharSetProber(&Windows_1253GreekModel); - + mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model); + mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel); + mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model); + mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel); + mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model); + mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model); + mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model); + mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model); mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel); mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel); + mProbers[10] = new nsSingleByteCharSetProber(&TIS620ThaiModel); nsHebrewProber *hebprober = new nsHebrewProber(); // Notice: Any change in these indexes - 10,11,12 must be reflected // in the code below as well. - mProbers[10] = hebprober; - mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew - mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew + mProbers[11] = hebprober; + mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew + mProbers[13] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew // Tell the Hebrew prober about the logical and visual probers - if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null + if (mProbers[11] && mProbers[12] && mProbers[13]) // all are not null { - hebprober->SetModelProbers(mProbers[11], mProbers[12]); + hebprober->SetModelProbers(mProbers[12], mProbers[13]); } else // One or more is null. avoid any Hebrew probing, null them all { - for (PRUint32 i = 10; i <= 12; ++i) - { - delete mProbers[i]; - mProbers[i] = 0; + for (PRUint32 i = 11; i <= 13; ++i) + { + delete mProbers[i]; + mProbers[i] = 0; } } - mProbers[13] = new nsSingleByteCharSetProber(&Tis_620ThaiModel); - mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel); - - mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel); - mProbers[16] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel); - mProbers[17] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel); - - mProbers[18] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel); - mProbers[19] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel); - mProbers[20] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel); - - mProbers[21] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel); - mProbers[22] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel); - - mProbers[23] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel); - mProbers[24] = new nsSingleByteCharSetProber(&Windows_1252GermanModel); - - mProbers[25] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel); - - mProbers[26] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel); - mProbers[27] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel); - - mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel); - mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel); - - mProbers[30] = new nsSingleByteCharSetProber(&VisciiVietnameseModel); - mProbers[31] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel); - - mProbers[32] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel); - mProbers[33] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel); - mProbers[34] = new nsSingleByteCharSetProber(&Windows_1252DanishModel); + // disable latin2 before latin1 is available, otherwise all latin1 + // will be detected as latin2 because of their similarity. + //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel); + //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel); Reset(); } diff --git a/PowerEditor/src/uchardet/nsSBCSGroupProber.h b/PowerEditor/src/uchardet/nsSBCSGroupProber.h index c1ea4a11..cfbf7e16 100644 --- a/PowerEditor/src/uchardet/nsSBCSGroupProber.h +++ b/PowerEditor/src/uchardet/nsSBCSGroupProber.h @@ -40,7 +40,7 @@ #define nsSBCSGroupProber_h__ -#define NUM_OF_SBCS_PROBERS 35 +#define NUM_OF_SBCS_PROBERS 14 class nsCharSetProber; class nsSBCSGroupProber: public nsCharSetProber { diff --git a/PowerEditor/src/uchardet/nsSBCharSetProber.cpp b/PowerEditor/src/uchardet/nsSBCharSetProber.cpp index 1f7f4731..3a88fdf3 100644 --- a/PowerEditor/src/uchardet/nsSBCharSetProber.cpp +++ b/PowerEditor/src/uchardet/nsSBCharSetProber.cpp @@ -35,6 +35,7 @@ * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ + #include #include "nsSBCharSetProber.h" @@ -47,31 +48,18 @@ nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32 order = mModel->charToOrderMap[(unsigned char)aBuf[i]]; if (order < SYMBOL_CAT_ORDER) - { mTotalChar++; - } - else if (order == ILL) - { - /* When encountering an illegal codepoint, no need - * to continue analyzing data. */ - mState = eNotMe; - break; - } - else if (order == CTR) - { - mCtrlChar++; - } - if (order < mModel->freqCharCount) + if (order < SAMPLE_SIZE) { mFreqChar++; - if (mLastOrder < mModel->freqCharCount) + if (mLastOrder < SAMPLE_SIZE) { mTotalSeqs++; if (!mReversed) - ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*mModel->freqCharCount+order]]); + ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]); else // reverse the order of the letters in the lookup - ++(mSeqCounters[mModel->precedenceMatrix[order*mModel->freqCharCount+mLastOrder]]); + ++(mSeqCounters[mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]); } } mLastOrder = order; @@ -98,7 +86,6 @@ void nsSingleByteCharSetProber::Reset(void) mSeqCounters[i] = 0; mTotalSeqs = 0; mTotalChar = 0; - mCtrlChar = 0; mFreqChar = 0; } @@ -116,19 +103,6 @@ float nsSingleByteCharSetProber::GetConfidence(void) if (mTotalSeqs > 0) { r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio; - /* Multiply by a ratio of positive sequences per characters. - * This would help in particular to distinguish close winners. - * Indeed if you add a letter, you'd expect the positive sequence count - * to increase as well. If it doesn't, it may mean that this new codepoint - * may not have been a letter, but instead a symbol (or some other - * character). This could make the difference between very closely related - * charsets used for the same language. - */ - r = r * (mSeqCounters[POSITIVE_CAT] + (float) mSeqCounters[PROBABLE_CAT] / 4) / mTotalChar; - /* The more control characters (proportionnaly to the size of the text), the - * less confident we become in the current charset. - */ - r = r * (mTotalChar - mCtrlChar) / mTotalChar; r = r*mFreqChar/mTotalChar; if (r >= (float)1.00) r = (float)0.99; @@ -138,7 +112,7 @@ float nsSingleByteCharSetProber::GetConfidence(void) #endif } -const char* nsSingleByteCharSetProber::GetCharSetName() +const char* nsSingleByteCharSetProber::GetCharSetName() { if (!mNameProber) return mModel->charsetName; diff --git a/PowerEditor/src/uchardet/nsSBCharSetProber.h b/PowerEditor/src/uchardet/nsSBCharSetProber.h index 211846e4..f2d055f9 100644 --- a/PowerEditor/src/uchardet/nsSBCharSetProber.h +++ b/PowerEditor/src/uchardet/nsSBCharSetProber.h @@ -1,176 +1,130 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Universal charset detector code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 2001 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Shy Shalom - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ -#ifndef nsSingleByteCharSetProber_h__ -#define nsSingleByteCharSetProber_h__ - -#include "nsCharSetProber.h" - -/** Codepoints **/ - -/* Illegal codepoints.*/ -#define ILL 255 -/* Control character. */ -#define CTR 254 -/* Symbols and punctuation that does not belong to words. */ -#define SYM 253 -/* Return/Line feeds. */ -#define RET 252 -/* Numbers 0-9. */ -#define NUM 251 - -#define SB_ENOUGH_REL_THRESHOLD 1024 -#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95 -#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05 -#define SYMBOL_CAT_ORDER 250 - -#define NUMBER_OF_SEQ_CAT 4 -#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1) -#define PROBABLE_CAT (NUMBER_OF_SEQ_CAT-2) -#define NEUTRAL_CAT (NUMBER_OF_SEQ_CAT-3) -#define NEGATIVE_CAT 0 - -typedef struct -{ - /* [256] table mapping codepoints to chararacter orders. */ - const unsigned char* const charToOrderMap; - /* freqCharCount x freqCharCount table of 2-char sequence's frequencies. */ - const PRUint8* const precedenceMatrix; - /* The count of frequent characters. */ - int freqCharCount; - float mTypicalPositiveRatio; // = freqSeqs / totalSeqs - PRBool keepEnglishLetter; // says if this script contains English characters (not implemented) - const char* const charsetName; -} SequenceModel; - - -class nsSingleByteCharSetProber : public nsCharSetProber{ -public: - nsSingleByteCharSetProber(const SequenceModel *model) - :mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); } - nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber) - :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); } - - virtual const char* GetCharSetName(); - virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen); - virtual nsProbingState GetState(void) {return mState;} - virtual void Reset(void); - virtual float GetConfidence(void); - virtual void SetOpion() {} - - // This feature is not implemented yet. any current language model - // contain this parameter as PR_FALSE. No one is looking at this - // parameter or calling this method. - // Moreover, the nsSBCSGroupProber which calls the HandleData of this - // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid - // of the English letters. - PRBool KeepEnglishLetters() {return mModel->keepEnglishLetter;} // (not implemented) - -#ifdef DEBUG_chardet - virtual void DumpStatus(); -#endif - -protected: - nsProbingState mState; - const SequenceModel* const mModel; - const PRBool mReversed; // PR_TRUE if we need to reverse every pair in the model lookup - - //char order of last character - unsigned char mLastOrder; - - PRUint32 mTotalSeqs; - PRUint32 mSeqCounters[NUMBER_OF_SEQ_CAT]; - - PRUint32 mTotalChar; - PRUint32 mCtrlChar; - //characters that fall in our sampling range - PRUint32 mFreqChar; - - // Optional auxiliary prober for name decision. created and destroyed by the GroupProber - nsCharSetProber* mNameProber; - -}; - -extern const SequenceModel Windows_1256ArabicModel; -extern const SequenceModel Iso_8859_6ArabicModel; - -extern const SequenceModel Koi8rRussianModel; -extern const SequenceModel Win1251RussianModel; -extern const SequenceModel Latin5RussianModel; -extern const SequenceModel MacCyrillicRussianModel; -extern const SequenceModel Ibm866RussianModel; -extern const SequenceModel Ibm855RussianModel; - -extern const SequenceModel Iso_8859_7GreekModel; -extern const SequenceModel Windows_1253GreekModel; - -extern const SequenceModel Latin5BulgarianModel; -extern const SequenceModel Win1251BulgarianModel; - -extern const SequenceModel Iso_8859_2HungarianModel; -extern const SequenceModel Windows_1250HungarianModel; - -extern const SequenceModel Win1255Model; - -extern const SequenceModel Tis_620ThaiModel; -extern const SequenceModel Iso_8859_11ThaiModel; - -extern const SequenceModel Iso_8859_15FrenchModel; -extern const SequenceModel Iso_8859_1FrenchModel; -extern const SequenceModel Windows_1252FrenchModel; - -extern const SequenceModel Iso_8859_15SpanishModel; -extern const SequenceModel Iso_8859_1SpanishModel; -extern const SequenceModel Windows_1252SpanishModel; - -extern const SequenceModel Iso_8859_1GermanModel; -extern const SequenceModel Windows_1252GermanModel; - -extern const SequenceModel Iso_8859_3EsperantoModel; - -extern const SequenceModel Iso_8859_3TurkishModel; -extern const SequenceModel Iso_8859_9TurkishModel; - -extern const SequenceModel VisciiVietnameseModel; -extern const SequenceModel Windows_1258VietnameseModel; - -extern const SequenceModel Iso_8859_15DanishModel; -extern const SequenceModel Iso_8859_1DanishModel; -extern const SequenceModel Windows_1252DanishModel; - -#endif /* nsSingleByteCharSetProber_h__ */ - +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Universal charset detector code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 2001 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Shy Shalom + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +#ifndef nsSingleByteCharSetProber_h__ +#define nsSingleByteCharSetProber_h__ + +#include "nsCharSetProber.h" + +#define SAMPLE_SIZE 64 +#define SB_ENOUGH_REL_THRESHOLD 1024 +#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95 +#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05 +#define SYMBOL_CAT_ORDER 250 +#define NUMBER_OF_SEQ_CAT 4 +#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1) +#define NEGATIVE_CAT 0 + +struct SequenceModel +{ + const unsigned char* const charToOrderMap; // [256] table use to find a char's order + const PRUint8* const precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency + float mTypicalPositiveRatio; // = freqSeqs / totalSeqs + PRBool keepEnglishLetter; // says if this script contains English characters (not implemented) + const char* const charsetName; + SequenceModel(void); + SequenceModel(const unsigned char* const a, const PRUint8* const b,float c,PRBool d,const char* const e) + : charToOrderMap(a), precedenceMatrix(b), mTypicalPositiveRatio(c), keepEnglishLetter(d), charsetName(e){} + SequenceModel& operator=(const SequenceModel&); +} ; + + +class nsSingleByteCharSetProber : public nsCharSetProber{ +public: + nsSingleByteCharSetProber(const SequenceModel *model) + :mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); } + nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber) + :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); } + nsSingleByteCharSetProber(): mModel(0), mReversed(0){}; + virtual const char* GetCharSetName(); + virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen); + virtual nsProbingState GetState(void) {return mState;} + virtual void Reset(void); + virtual float GetConfidence(void); + virtual void SetOpion() {} + + // This feature is not implemented yet. any current language model + // contain this parameter as PR_FALSE. No one is looking at this + // parameter or calling this method. + // Moreover, the nsSBCSGroupProber which calls the HandleData of this + // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid + // of the English letters. + PRBool KeepEnglishLetters() {return mModel->keepEnglishLetter;} // (not implemented) + nsSingleByteCharSetProber operator=(const nsSingleByteCharSetProber&) = delete; + +#ifdef DEBUG_chardet + virtual void DumpStatus(); +#endif + +protected: + nsProbingState mState; + const SequenceModel* const mModel; + const PRBool mReversed; // PR_TRUE if we need to reverse every pair in the model lookup + + //char order of last character + unsigned char mLastOrder; + + PRUint32 mTotalSeqs; + PRUint32 mSeqCounters[NUMBER_OF_SEQ_CAT]; + + PRUint32 mTotalChar; + //characters that fall in our sampling range + PRUint32 mFreqChar; + + // Optional auxiliary prober for name decision. created and destroyed by the GroupProber + nsCharSetProber* mNameProber; + +}; + + +extern const SequenceModel Koi8rModel; +extern const SequenceModel Win1251Model; +extern const SequenceModel Latin5Model; +extern const SequenceModel MacCyrillicModel; +extern const SequenceModel Ibm866Model; +extern const SequenceModel Ibm855Model; +extern const SequenceModel Latin7Model; +extern const SequenceModel Win1253Model; +extern const SequenceModel Latin5BulgarianModel; +extern const SequenceModel Win1251BulgarianModel; +extern const SequenceModel Latin2HungarianModel; +extern const SequenceModel Win1250HungarianModel; +extern const SequenceModel Win1255Model; +extern const SequenceModel TIS620ThaiModel; + +#endif /* nsSingleByteCharSetProber_h__ */ + diff --git a/PowerEditor/src/uchardet/nsSJISProber.cpp b/PowerEditor/src/uchardet/nsSJISProber.cpp index c7842f6a..0b59e399 100644 --- a/PowerEditor/src/uchardet/nsSJISProber.cpp +++ b/PowerEditor/src/uchardet/nsSJISProber.cpp @@ -40,6 +40,7 @@ // 2, kana character often exist in group // 3, certain combination of kana is never used in japanese language + #include "nsSJISProber.h" void nsSJISProber::Reset(void) diff --git a/PowerEditor/src/uchardet/nsSJISProber.h b/PowerEditor/src/uchardet/nsSJISProber.h index f326ded2..1efb6e3d 100644 --- a/PowerEditor/src/uchardet/nsSJISProber.h +++ b/PowerEditor/src/uchardet/nsSJISProber.h @@ -57,7 +57,7 @@ public: Reset();} virtual ~nsSJISProber(void){delete mCodingSM;} nsProbingState HandleData(const char* aBuf, PRUint32 aLen); - const char* GetCharSetName() {return "SHIFT_JIS";} + const char* GetCharSetName() {return "Shift_JIS";} nsProbingState GetState(void) {return mState;} void Reset(void); float GetConfidence(void); diff --git a/PowerEditor/src/uchardet/nsUniversalDetector.cpp b/PowerEditor/src/uchardet/nsUniversalDetector.cpp index 8bcd8c58..dd74243c 100644 --- a/PowerEditor/src/uchardet/nsUniversalDetector.cpp +++ b/PowerEditor/src/uchardet/nsUniversalDetector.cpp @@ -47,7 +47,6 @@ nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter) { - mNbspFound = PR_FALSE; mDone = PR_FALSE; mBestGuess = -1; //illegal value as signal mInTag = PR_FALSE; @@ -65,7 +64,7 @@ nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter) mCharSetProbers[i] = nsnull; } -nsUniversalDetector::~nsUniversalDetector() +nsUniversalDetector::~nsUniversalDetector() { for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++) delete mCharSetProbers[i]; @@ -73,10 +72,9 @@ nsUniversalDetector::~nsUniversalDetector() delete mEscCharSetProber; } -void +void nsUniversalDetector::Reset() { - mNbspFound = PR_FALSE; mDone = PR_FALSE; mBestGuess = -1; //illegal value as signal mInTag = PR_FALSE; @@ -98,17 +96,17 @@ nsUniversalDetector::Reset() //--------------------------------------------------------------------- #define SHORTCUT_THRESHOLD (float)0.95 -#define MINIMUM_THRESHOLD (float)0.20 +#define MINIMUM_THRESHOLD (float)0.60 nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) { - if (mDone) + if(mDone) return NS_OK; if (aLen > 0) mGotData = PR_TRUE; - /* If the data starts with BOM, we know it is UTF. */ + //If the data starts with BOM, we know it is UTF if (mStart) { mStart = PR_FALSE; @@ -117,42 +115,20 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) { case '\xEF': if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) - /* EF BB BF: UTF-8 encoded BOM. */ + // EF BB BF UTF-8 encoded BOM mDetectedCharset = "UTF-8"; break; case '\xFE': if ('\xFF' == aBuf[1]) - /* FE FF: UTF-16, big endian BOM. */ + // FE FF UTF-16, big endian BOM mDetectedCharset = "UTF-16"; break; case '\xFF': if ('\xFE' == aBuf[1]) - { - if (aLen > 3 && - aBuf[2] == '\x00' && - aBuf[3] == '\x00') - { - /* FF FE 00 00: UTF-32 (LE). */ - mDetectedCharset = "UTF-32"; - } - else - { - /* FF FE: UTF-16, little endian BOM. */ - mDetectedCharset = "UTF-16"; - } - } - break; - case '\x00': - if (aLen > 3 && - aBuf[1] == '\x00' && - aBuf[2] == '\xFE' && - aBuf[3] == '\xFF') - { - /* 00 00 FE FF: UTF-32 (BE). */ - mDetectedCharset = "UTF-32"; - } - break; - } + // FF FE UTF-16, little endian BOM + mDetectedCharset = "UTF-16"; + break; + } // switch if (mDetectedCharset) { @@ -160,17 +136,14 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) return NS_OK; } } - + PRUint32 i; for (i = 0; i < aLen; i++) { - /* If every other character is ASCII or 0xA0, we don't run charset - * probers. - * 0xA0 (NBSP in a few charset) is apparently a rare exception - * of non-ASCII character often contained in nearly-ASCII text. */ - if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') + //other than 0xa0, if every othe character is ascii, the page is ascii + if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') //Since many Ascii only page contains NBSP { - /* We got a non-ASCII byte (high-byte) */ + //we got a non-ascii byte (high-byte) if (mInputState != eHighbyte) { //adjust state @@ -198,7 +171,7 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) } if (nsnull == mCharSetProbers[2]) { - mCharSetProbers[2] = new nsLatin1Prober; + mCharSetProbers[2] = new nsLatin1Prober; if (nsnull == mCharSetProbers[2]) return NS_ERROR_OUT_OF_MEMORY; } @@ -206,19 +179,11 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) } else { - /* Just pure ASCII or NBSP so far. */ - if (aBuf[i] == '\xA0') + //ok, just pure ascii so far + if ( ePureAscii == mInputState && + (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) ) { - /* ASCII with the only exception of NBSP seems quite common. - * I doubt it is really necessary to train a model here, so let's - * just make an exception. - */ - mNbspFound = PR_TRUE; - } - else if (mInputState == ePureAscii && - (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~'))) - { - /* We found an escape character or HZ "~{". */ + //found escape character or HZ "~{" mInputState = eEscAscii; } mLastChar = aBuf[i]; @@ -240,16 +205,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) mDone = PR_TRUE; mDetectedCharset = mEscCharSetProber->GetCharSetName(); } - else if (mNbspFound) - { - mDetectedCharset = "ISO-8859-1"; - } - else - { - /* ASCII with the ESC character (or the sequence "~{") is still - * ASCII until proven otherwise. */ - mDetectedCharset = "ASCII"; - } break; case eHighbyte: for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) @@ -257,29 +212,18 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) if (mCharSetProbers[i]) { st = mCharSetProbers[i]->HandleData(aBuf, aLen); - if (st == eFoundIt) + if (st == eFoundIt) { mDone = PR_TRUE; mDetectedCharset = mCharSetProbers[i]->GetCharSetName(); return NS_OK; } - } + } } break; - default: - if (mNbspFound) - { - /* ISO-8859-1 is a good result candidate for ASCII + NBSP. - * (though it could have been any ISO-8859 encoding). */ - mDetectedCharset = "ISO-8859-1"; - } - else - { - /* Pure ASCII */ - mDetectedCharset = "ASCII"; - } - break; + default: //pure ascii + ;//do nothing here } return NS_OK; } @@ -290,7 +234,7 @@ void nsUniversalDetector::DataEnd() { if (!mGotData) { - // we haven't got any data yet, return immediately + // we haven't got any data yet, return immediately // caller program sometimes call DataEnd before anything has been sent to detector return; } @@ -301,7 +245,7 @@ void nsUniversalDetector::DataEnd() Report(mDetectedCharset); return; } - + switch (mInputState) { case eHighbyte: diff --git a/PowerEditor/src/uchardet/nsUniversalDetector.h b/PowerEditor/src/uchardet/nsUniversalDetector.h index 9f0a4b18..525f7220 100644 --- a/PowerEditor/src/uchardet/nsUniversalDetector.h +++ b/PowerEditor/src/uchardet/nsUniversalDetector.h @@ -72,7 +72,6 @@ protected: virtual void Report(const char* aCharset) = 0; virtual void Reset(); nsInputState mInputState; - PRBool mNbspFound; PRBool mDone; PRBool mInTag; PRBool mStart; @@ -87,3 +86,4 @@ protected: }; #endif + diff --git a/PowerEditor/src/uchardet/uchardet.cpp b/PowerEditor/src/uchardet/uchardet.cpp index f1951d1a..35b84092 100644 --- a/PowerEditor/src/uchardet/uchardet.cpp +++ b/PowerEditor/src/uchardet/uchardet.cpp @@ -34,52 +34,47 @@ * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ + #include "uchardet.h" -#include -#include #include "nscore.h" #include "nsUniversalDetector.h" +#include + +using std::string; class HandleUniversalDetector : public nsUniversalDetector { protected: - char *m_charset; + string m_charset; public: HandleUniversalDetector() : nsUniversalDetector(NS_FILTER_ALL) - , m_charset(0) { + m_charset = ""; } virtual ~HandleUniversalDetector() - { - if (m_charset) - free(m_charset); - } + {} virtual void Report(const char* charset) { - if (m_charset) - free(m_charset); - m_charset = strdup(charset); + m_charset = charset; } virtual void Reset() { nsUniversalDetector::Reset(); - if (m_charset) - free(m_charset); - m_charset = strdup(""); + m_charset = ""; } const char* GetCharset() const { - return m_charset? m_charset : ""; + return m_charset.c_str(); } }; -uchardet_t uchardet_new(void) +uchardet_t uchardet_new() { return reinterpret_cast (new HandleUniversalDetector()); } diff --git a/PowerEditor/src/uchardet/uchardet.h b/PowerEditor/src/uchardet/uchardet.h index c1593eb2..533666aa 100644 --- a/PowerEditor/src/uchardet/uchardet.h +++ b/PowerEditor/src/uchardet/uchardet.h @@ -34,8 +34,8 @@ * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ -#ifndef UCHARDET_H___ -#define UCHARDET_H___ +#ifndef ___UCHARDET_H___ +#define ___UCHARDET_H___ #ifdef __cplusplus extern "C" { @@ -43,13 +43,13 @@ extern "C" { #include -typedef struct uchardet * uchardet_t; +typedef void * uchardet_t; /** * Create an encoding detector. * @return a handle of a instance of uchardet */ -uchardet_t uchardet_new(void); +uchardet_t uchardet_new(); /** * Delete an encoding detector. @@ -79,9 +79,9 @@ void uchardet_data_end(uchardet_t ud); void uchardet_reset(uchardet_t ud); /** - * Get an iconv-compatible name of the encoding that was detected. + * Get the name of encoding that was detected. * @param ud [in] handle of a instance of uchardet - * @return name of charset on success and "" on failure. + * @return name of charset on success and "" on failure or pure ascii. */ const char * uchardet_get_charset(uchardet_t ud); diff --git a/PowerEditor/visual.net/notepadPlus.vcxproj b/PowerEditor/visual.net/notepadPlus.vcxproj index 3a94d179..13f8b663 100755 --- a/PowerEditor/visual.net/notepadPlus.vcxproj +++ b/PowerEditor/visual.net/notepadPlus.vcxproj @@ -272,20 +272,6 @@ copy ..\src\contextMenu.xml ..\bin64\contextMenu.xml - - - - - - - - - - - - - - @@ -320,6 +306,12 @@ copy ..\src\contextMenu.xml ..\bin64\contextMenu.xml + + + + + +