Update uchardet to 0.0.6 to improve UTF-8 detection quality
This PR updates uchardet (https://www.freedesktop.org/wiki/Software/uchardet/) to the newest stable version (0.0.6). This version seems to improve UTF-8 detection quality, so updating this dependency is expected to squash numerous charset detection issues. Fixes #4878, Fixes #4767, Fixes #4428, Fixes #4246, Fixes #3705, Fixes #3588, Fixes #431, Closes #4925
This commit is contained in:
parent
ac09857656
commit
bfb7e863cc
@ -50,7 +50,7 @@ public:
|
|||||||
CharDistributionAnalysis() {Reset(PR_FALSE);}
|
CharDistributionAnalysis() {Reset(PR_FALSE);}
|
||||||
|
|
||||||
//feed a block of data and do distribution analysis
|
//feed a block of data and do distribution analysis
|
||||||
void HandleData(const char*, PRUint32) {}
|
void HandleData(const char* /*aBuf*/, PRUint32 /*aLen*/) {}
|
||||||
|
|
||||||
//Feed a character with known length
|
//Feed a character with known length
|
||||||
void HandleOneChar(const char* aStr, PRUint32 aCharLen)
|
void HandleOneChar(const char* aStr, PRUint32 aCharLen)
|
||||||
@ -96,7 +96,7 @@ protected:
|
|||||||
//we do not handle character base on its original encoding string, but
|
//we do not handle character base on its original encoding string, but
|
||||||
//convert this encoding string to a number, here called order.
|
//convert this encoding string to a number, here called order.
|
||||||
//This allow multiple encoding of a language to share one frequency table
|
//This allow multiple encoding of a language to share one frequency table
|
||||||
virtual PRInt32 GetOrder(const char* ) {return -1;}
|
virtual PRInt32 GetOrder(const char* /*str*/) {return -1;}
|
||||||
|
|
||||||
//If this flag is set to PR_TRUE, detection is done and conclusion has been made
|
//If this flag is set to PR_TRUE, detection is done and conclusion has been made
|
||||||
PRBool mDone;
|
PRBool mDone;
|
||||||
@ -128,12 +128,12 @@ public:
|
|||||||
EUCTWDistributionAnalysis();
|
EUCTWDistributionAnalysis();
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
//for euc-TW encoding, we are interested
|
//for EUC-TW encoding, we are interested
|
||||||
// first byte range: 0xc4 -- 0xfe
|
// first byte range: 0xc4 -- 0xfe
|
||||||
// second byte range: 0xa1 -- 0xfe
|
// second byte range: 0xa1 -- 0xfe
|
||||||
//no validation needed here. State machine has done that
|
//no validation needed here. State machine has done that
|
||||||
PRInt32 GetOrder(const char* str) {
|
PRInt32 GetOrder(const char* str)
|
||||||
if ((unsigned char)*str >= (unsigned char)0xc4)
|
{ if ((unsigned char)*str >= (unsigned char)0xc4)
|
||||||
return 94*((unsigned char)str[0]-(unsigned char)0xc4) + (unsigned char)str[1] - (unsigned char)0xa1;
|
return 94*((unsigned char)str[0]-(unsigned char)0xc4) + (unsigned char)str[1] - (unsigned char)0xa1;
|
||||||
else
|
else
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -195,16 +195,16 @@ float JapaneseContextAnalysis::GetConfidence(void)
|
|||||||
PRInt32 SJISContextAnalysis::GetOrder(const char* str, PRUint32 *charLen)
|
PRInt32 SJISContextAnalysis::GetOrder(const char* str, PRUint32 *charLen)
|
||||||
{
|
{
|
||||||
//find out current char's byte length
|
//find out current char's byte length
|
||||||
if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f ||
|
if (((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f) ||
|
||||||
(unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc )
|
((unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xfc))
|
||||||
*charLen = 2;
|
*charLen = 2;
|
||||||
else
|
else
|
||||||
*charLen = 1;
|
*charLen = 1;
|
||||||
|
|
||||||
//return its order if it is hiragana
|
//return its order if it is hiragana
|
||||||
if (*str == '\202' &&
|
if (*str == '\202' &&
|
||||||
(unsigned char)*(str+1) >= (unsigned char)0x9f &&
|
(unsigned char)*(str+1) >= (unsigned char)0x9f &&
|
||||||
(unsigned char)*(str+1) <= (unsigned char)0xf1)
|
(unsigned char)*(str+1) <= (unsigned char)0xf1)
|
||||||
return (unsigned char)*(str+1) - (unsigned char)0x9f;
|
return (unsigned char)*(str+1) - (unsigned char)0x9f;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -213,13 +213,13 @@ PRInt32 EUCJPContextAnalysis::GetOrder(const char* str, PRUint32 *charLen)
|
|||||||
{
|
{
|
||||||
//find out current char's byte length
|
//find out current char's byte length
|
||||||
if ((unsigned char)*str == (unsigned char)0x8e ||
|
if ((unsigned char)*str == (unsigned char)0x8e ||
|
||||||
(unsigned char)*str >= (unsigned char)0xa1 &&
|
((unsigned char)*str >= (unsigned char)0xa1 &&
|
||||||
(unsigned char)*str <= (unsigned char)0xfe)
|
(unsigned char)*str <= (unsigned char)0xfe))
|
||||||
*charLen = 2;
|
*charLen = 2;
|
||||||
else if ((unsigned char)*str == (unsigned char)0x8f)
|
else if ((unsigned char)*str == (unsigned char)0x8f)
|
||||||
*charLen = 3;
|
*charLen = 3;
|
||||||
else
|
else
|
||||||
*charLen = 1;
|
*charLen = 1;
|
||||||
|
|
||||||
//return its order if it is hiragana
|
//return its order if it is hiragana
|
||||||
if ((unsigned char)*str == (unsigned char)0xa4 &&
|
if ((unsigned char)*str == (unsigned char)0xa4 &&
|
||||||
|
@ -1,242 +0,0 @@
|
|||||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* The Original Code is Mozilla Communicator client code.
|
|
||||||
*
|
|
||||||
* The Initial Developer of the Original Code is
|
|
||||||
* Netscape Communications Corporation.
|
|
||||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
||||||
* the Initial Developer. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Contributor(s):
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include "nsSBCharSetProber.h"
|
|
||||||
/****************************************************************
|
|
||||||
255: Control characters that usually does not exist in any text
|
|
||||||
254: Carriage/Return
|
|
||||||
253: symbol (punctuation) that does not belong to word
|
|
||||||
252: 0 - 9
|
|
||||||
|
|
||||||
*****************************************************************/
|
|
||||||
|
|
||||||
//Character Mapping Table:
|
|
||||||
static const unsigned char Latin7_CharToOrderMap[] =
|
|
||||||
{
|
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
|
||||||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40
|
|
||||||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, //50
|
|
||||||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60
|
|
||||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, //70
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //80
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //90
|
|
||||||
+253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, //a0
|
|
||||||
253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, //b0
|
|
||||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0
|
|
||||||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
|
|
||||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
|
|
||||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, //f0
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static const unsigned char win1253_CharToOrderMap[] =
|
|
||||||
{
|
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
|
||||||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40
|
|
||||||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, //50
|
|
||||||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60
|
|
||||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, //70
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //80
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //90
|
|
||||||
+253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, //a0
|
|
||||||
253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, //b0
|
|
||||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0
|
|
||||||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
|
|
||||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
|
|
||||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, //f0
|
|
||||||
};
|
|
||||||
|
|
||||||
//Model Table:
|
|
||||||
//total sequences: 100%
|
|
||||||
//first 512 sequences: 98.2851%
|
|
||||||
//first 1024 sequences:1.7001%
|
|
||||||
//rest sequences: 0.0359%
|
|
||||||
//negative sequences: 0.0148%
|
|
||||||
static const PRUint8 GreekLangModel[] =
|
|
||||||
{
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
|
|
||||||
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
|
|
||||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
|
|
||||||
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
|
|
||||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
|
|
||||||
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
|
|
||||||
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
|
|
||||||
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
|
|
||||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
|
|
||||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
|
|
||||||
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
|
|
||||||
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
|
|
||||||
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
|
|
||||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
|
|
||||||
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
|
|
||||||
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
|
|
||||||
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
|
|
||||||
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
|
|
||||||
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
|
|
||||||
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
|
|
||||||
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
|
|
||||||
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
|
|
||||||
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
|
||||||
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
|
|
||||||
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
|
|
||||||
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
|
|
||||||
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
|
||||||
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
|
||||||
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
|
|
||||||
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
|
|
||||||
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
|
|
||||||
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
|
|
||||||
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
|
||||||
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
|
|
||||||
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
|
|
||||||
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
|
|
||||||
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
|
||||||
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
|
|
||||||
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
|
|
||||||
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
|
|
||||||
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
|
||||||
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
|
|
||||||
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
|
|
||||||
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
|
||||||
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
|
|
||||||
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
};
|
|
||||||
|
|
||||||
const SequenceModel Latin7Model (
|
|
||||||
Latin7_CharToOrderMap,
|
|
||||||
GreekLangModel,
|
|
||||||
(float)0.982851,
|
|
||||||
PR_FALSE,
|
|
||||||
"ISO-8859-7"
|
|
||||||
);
|
|
||||||
|
|
||||||
const SequenceModel Win1253Model(
|
|
||||||
win1253_CharToOrderMap,
|
|
||||||
GreekLangModel,
|
|
||||||
(float)0.982851,
|
|
||||||
PR_FALSE,
|
|
||||||
"windows-1253"
|
|
||||||
);
|
|
@ -1,238 +0,0 @@
|
|||||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* The Original Code is Mozilla Communicator client code.
|
|
||||||
*
|
|
||||||
* The Initial Developer of the Original Code is
|
|
||||||
* Netscape Communications Corporation.
|
|
||||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
||||||
* the Initial Developer. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Contributor(s):
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include "nsSBCharSetProber.h"
|
|
||||||
/****************************************************************
|
|
||||||
255: Control characters that usually does not exist in any text
|
|
||||||
254: Carriage/Return
|
|
||||||
253: symbol (punctuation) that does not belong to word
|
|
||||||
252: 0 - 9
|
|
||||||
|
|
||||||
*****************************************************************/
|
|
||||||
|
|
||||||
//Character Mapping Table:
|
|
||||||
static const unsigned char Latin2_HungarianCharToOrderMap[] =
|
|
||||||
{
|
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
|
||||||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
|
|
||||||
46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
|
|
||||||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
|
|
||||||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
|
|
||||||
159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
|
|
||||||
175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
|
|
||||||
191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
|
|
||||||
79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
|
|
||||||
221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
|
|
||||||
232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
|
|
||||||
82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
|
|
||||||
245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const unsigned char win1250HungarianCharToOrderMap[] =
|
|
||||||
{
|
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
|
||||||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
|
|
||||||
46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
|
|
||||||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
|
|
||||||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
|
|
||||||
161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
|
|
||||||
177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
|
|
||||||
191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
|
|
||||||
81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
|
|
||||||
221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
|
|
||||||
232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
|
|
||||||
84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
|
|
||||||
245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
|
|
||||||
};
|
|
||||||
|
|
||||||
//Model Table:
|
|
||||||
//total sequences: 100%
|
|
||||||
//first 512 sequences: 94.7368%
|
|
||||||
//first 1024 sequences:5.2623%
|
|
||||||
//rest sequences: 0.8894%
|
|
||||||
//negative sequences: 0.0009%
|
|
||||||
static const PRUint8 HungarianLangModel[] =
|
|
||||||
{
|
|
||||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
|
||||||
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
|
|
||||||
3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
|
|
||||||
3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
|
||||||
3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
|
||||||
3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
|
|
||||||
0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
|
||||||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
|
||||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
|
|
||||||
3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
|
||||||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
|
||||||
3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
|
||||||
3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
|
|
||||||
1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
|
|
||||||
1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
|
|
||||||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
|
|
||||||
3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
|
|
||||||
2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
|
|
||||||
2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
|
|
||||||
2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
|
|
||||||
2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
|
|
||||||
2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
|
|
||||||
3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
|
|
||||||
2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
|
|
||||||
2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
|
|
||||||
2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
|
|
||||||
1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
|
|
||||||
1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
|
|
||||||
3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
|
|
||||||
1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
|
|
||||||
1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
|
|
||||||
2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
|
|
||||||
2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
|
|
||||||
2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
|
|
||||||
3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
|
|
||||||
2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
|
|
||||||
1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
|
|
||||||
1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
|
|
||||||
2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
|
|
||||||
1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
|
|
||||||
1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
|
|
||||||
2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
|
|
||||||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
|
|
||||||
1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
|
|
||||||
2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
|
|
||||||
2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
|
|
||||||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
|
|
||||||
1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
|
|
||||||
1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
|
|
||||||
1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
|
|
||||||
0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
|
|
||||||
2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
|
|
||||||
2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
|
|
||||||
1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
|
|
||||||
2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
|
|
||||||
1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
|
|
||||||
1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
|
|
||||||
2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
|
|
||||||
2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
|
|
||||||
2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
|
|
||||||
1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
|
|
||||||
2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
|
|
||||||
0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
|
|
||||||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
};
|
|
||||||
|
|
||||||
const SequenceModel Latin2HungarianModel(
|
|
||||||
Latin2_HungarianCharToOrderMap,
|
|
||||||
HungarianLangModel,
|
|
||||||
(float)0.947368,
|
|
||||||
PR_TRUE,
|
|
||||||
"ISO-8859-2");
|
|
||||||
|
|
||||||
const SequenceModel Win1250HungarianModel(
|
|
||||||
win1250HungarianCharToOrderMap,
|
|
||||||
HungarianLangModel,
|
|
||||||
(float)0.947368,
|
|
||||||
PR_TRUE,
|
|
||||||
"windows-1250");
|
|
265
PowerEditor/src/uchardet/LangModels/LangArabicModel.cpp
Normal file
265
PowerEditor/src/uchardet/LangModels/LangArabicModel.cpp
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Arabic *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2015-12-13 18:33:58.848027
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Iso_8859_6_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 52, 72, 61, 68, 74, 69, 59, 78, 60, 90, 86, 67, 65, 71, 75, /* 4X */
|
||||||
|
64, 85, 76, 55, 57, 79, 81, 70, 82, 87, 91,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 37, 58, 49, 47, 38, 54, 66, 46, 39, 88, 63, 45, 51, 43, 40, /* 6X */
|
||||||
|
62, 89, 42, 44, 41, 50, 77, 73, 83, 56, 80,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,ILL,ILL,ILL,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,SYM,ILL,ILL, /* AX */
|
||||||
|
ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,ILL,ILL,ILL,SYM, /* BX */
|
||||||
|
ILL, 32, 34, 15, 35, 22, 31, 0, 9, 8, 7, 27, 19, 18, 25, 11, /* CX */
|
||||||
|
30, 5, 26, 12, 21, 23, 28,SYM, 33, 10, 29,ILL,ILL,ILL,ILL,ILL, /* DX */
|
||||||
|
36, 13, 14, 17, 1, 3, 6, 16, 4, 24, 2,SYM,SYM,SYM,SYM,SYM, /* EX */
|
||||||
|
SYM,SYM,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Windows_1256_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 52, 72, 61, 68, 74, 69, 59, 78, 60, 90, 86, 67, 65, 71, 75, /* 4X */
|
||||||
|
64, 85, 76, 55, 57, 79, 81, 70, 82, 87, 91,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 37, 58, 49, 47, 38, 54, 66, 46, 39, 88, 63, 45, 51, 43, 40, /* 6X */
|
||||||
|
62, 89, 42, 44, 41, 50, 77, 73, 83, 56, 80,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
SYM, 48,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 95,SYM, 96, 92, 97, 98, /* 8X */
|
||||||
|
53,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 84,SYM, 99,SYM,100,SYM,SYM,101, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,102,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
103, 32, 34, 15, 35, 22, 31, 0, 9, 8, 7, 27, 19, 18, 25, 11, /* CX */
|
||||||
|
30, 5, 26, 12, 21, 23, 28,SYM, 20, 33, 10, 29, 36, 13, 14, 17, /* DX */
|
||||||
|
104, 1, 93, 3, 6, 16, 4,105,106, 94,107,108, 24, 2,109,110, /* EX */
|
||||||
|
SYM,SYM,SYM,SYM,111,SYM,SYM,SYM,SYM,112,SYM,113,114,SYM,SYM,115, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 1479
|
||||||
|
* First 512 sequences: 0.9696025116913417
|
||||||
|
* Next 512 sequences (512-1024): 0.029166911858880054
|
||||||
|
* Rest: 0.0012305764497782395
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 ArabicLangModel[] =
|
||||||
|
{
|
||||||
|
2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,3,1,3,3,3,3,2,2,3,
|
||||||
|
3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
|
||||||
|
1,2,3,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,1,3,3,3,3,2,2,3,
|
||||||
|
2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,3,2,3,3,3,2,2,2,2,
|
||||||
|
0,2,1,3,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
|
||||||
|
2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,3,2,3,3,2,3,
|
||||||
|
1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,0,3,2,2,3,2,2,2,3,2,
|
||||||
|
0,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,2,2,
|
||||||
|
0,3,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,0,0,0,0,0,0,1,3,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,2,2,1,2,2,2,2,2,2,2,
|
||||||
|
1,2,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,0,2,2,3,0,3,2,0,3,3,3,0,2,0,
|
||||||
|
0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,0,2,0,0,3,3,2,3,0,2,0,2,
|
||||||
|
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,3,3,1,0,0,2,2,0,1,0,1,0,1,
|
||||||
|
0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,2,2,1,3,2,2,2,
|
||||||
|
1,3,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,0,2,1,3,2,0,3,2,0,2,0,3,0,2,0,
|
||||||
|
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,2,3,2,3,2,3,2,2,
|
||||||
|
0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,2,3,3,1,3,2,1,2,0,2,2,0,3,2,2,0,0,2,0,2,1,2,0,3,0,
|
||||||
|
0,1,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,2,2,2,2,2,2,1,0,2,3,3,0,1,3,0,
|
||||||
|
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,3,3,2,1,3,3,3,3,0,2,3,0,3,2,2,0,3,2,0,3,2,3,0,2,0,
|
||||||
|
0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,1,2,1,0,1,0,0,1,0,3,2,0,2,2,2,
|
||||||
|
0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,2,3,3,2,2,3,2,3,2,2,0,2,1,2,1,1,0,2,1,0,0,0,1,0,2,
|
||||||
|
1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,3,3,3,2,3,2,3,3,2,1,2,2,2,3,3,2,2,2,0,0,0,2,3,1,0,0,2,1,2,
|
||||||
|
0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,3,3,3,1,2,3,2,0,2,3,3,3,2,3,0,2,2,2,3,2,2,0,3,0,2,2,2,3,2,3,1,
|
||||||
|
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,0,2,1,3,0,2,0,0,2,2,2,0,0,0,2,0,0,
|
||||||
|
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,
|
||||||
|
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,3,2,3,2,3,2,2,0,0,2,0,0,1,3,2,0,3,0,1,2,0,2,0,2,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,2,3,3,2,2,0,2,2,1,2,2,2,2,0,0,0,0,1,2,2,0,0,1,0,2,
|
||||||
|
2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,2,3,2,2,1,1,2,3,1,2,2,0,0,0,0,0,0,1,0,0,2,0,1,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,2,3,2,3,2,0,2,0,1,2,0,2,1,2,0,0,0,2,2,0,0,0,2,0,2,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,2,3,2,1,2,2,2,0,0,2,0,0,2,2,1,0,2,1,0,2,0,2,0,2,0,
|
||||||
|
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,2,2,2,2,2,2,0,0,0,2,2,0,3,3,0,2,0,0,0,0,2,2,0,0,0,0,0,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,2,3,2,2,3,2,2,2,2,2,2,0,2,2,2,2,2,2,0,1,0,1,2,0,1,1,1,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,1,1,1,0,0,2,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,2,3,2,2,1,2,3,2,0,0,0,2,0,0,3,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,3,2,2,2,3,2,2,0,2,0,2,2,2,2,0,1,2,1,1,0,2,0,1,0,3,1,2,0,1,2,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,2,2,2,3,2,1,2,1,1,0,2,2,0,2,0,2,2,0,0,0,2,0,0,2,2,1,2,0,0,0,0,
|
||||||
|
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,
|
||||||
|
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,2,2,1,2,2,2,2,2,1,2,0,2,1,2,0,0,1,0,1,0,1,0,0,0,1,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,1,1,2,2,2,2,2,0,2,0,2,1,2,0,0,1,0,0,0,2,0,0,0,1,2,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,1,2,2,2,2,2,2,0,2,0,2,1,2,0,0,1,0,0,0,1,0,0,0,1,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,1,0,1,2,2,2,2,2,1,1,2,0,2,2,2,0,0,2,0,0,0,1,0,0,0,2,2,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,2,2,2,1,2,2,2,1,0,1,1,1,0,0,0,0,2,0,2,0,0,0,1,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,2,2,1,2,2,2,0,1,0,2,1,2,0,0,0,0,2,0,1,0,0,0,0,2,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,2,2,0,1,2,1,1,2,0,2,1,0,0,0,1,0,1,0,0,0,0,0,0,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,1,2,0,0,2,1,2,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,2,1,0,0,1,2,0,2,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
2,2,1,0,2,2,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,2,2,1,0,0,1,2,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,1,1,1,0,2,2,2,2,1,0,2,0,1,0,2,0,0,0,0,0,0,2,0,0,0,0,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,2,2,2,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,2,0,0,0,0,0,1,0,
|
||||||
|
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,2,2,2,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,1,
|
||||||
|
2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,1,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,1,0,2,2,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,1,0,0,1,2,2,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,1,2,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,0,0,2,0,2,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,1,0,2,1,1,0,0,0,0,0,0,1,0,0,2,0,1,0,2,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_6ArabicModel =
|
||||||
|
{
|
||||||
|
Iso_8859_6_CharToOrderMap,
|
||||||
|
ArabicLangModel,
|
||||||
|
64,
|
||||||
|
(float)0.9696025116913417,
|
||||||
|
PR_FALSE,
|
||||||
|
"ISO-8859-6"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Windows_1256ArabicModel =
|
||||||
|
{
|
||||||
|
Windows_1256_CharToOrderMap,
|
||||||
|
ArabicLangModel,
|
||||||
|
64,
|
||||||
|
(float)0.9696025116913417,
|
||||||
|
PR_FALSE,
|
||||||
|
"WINDOWS-1256"
|
||||||
|
};
|
@ -35,12 +35,12 @@
|
|||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include "nsSBCharSetProber.h"
|
#include "../nsSBCharSetProber.h"
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
255: Control characters that usually does not exist in any text
|
CTR: Control characters that usually does not exist in any text
|
||||||
254: Carriage/Return
|
RET: Carriage/Return
|
||||||
253: symbol (punctuation) that does not belong to word
|
SYM: symbol (punctuation) that does not belong to word
|
||||||
252: 0 - 9
|
NUM: 0 - 9
|
||||||
|
|
||||||
*****************************************************************/
|
*****************************************************************/
|
||||||
|
|
||||||
@ -50,14 +50,14 @@
|
|||||||
|
|
||||||
static const unsigned char Latin5_BulgarianCharToOrderMap[] =
|
static const unsigned char Latin5_BulgarianCharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40
|
SYM, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40
|
||||||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, //50
|
110,186,108, 91, 74,119, 84, 96,111,187,115,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60
|
SYM, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60
|
||||||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, //70
|
116,195, 85, 93, 97,113,196,197,198,199,200,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, //80
|
194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, //80
|
||||||
210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, //90
|
210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, //90
|
||||||
81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, //a0
|
81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, //a0
|
||||||
@ -65,27 +65,27 @@ static const unsigned char Latin5_BulgarianCharToOrderMap[] =
|
|||||||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, //c0
|
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, //c0
|
||||||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //d0
|
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //d0
|
||||||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, //e0
|
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, //e0
|
||||||
62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, //f0
|
62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,NUM,SYM, //f0
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char win1251BulgarianCharToOrderMap[] =
|
static const unsigned char win1251BulgarianCharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40
|
SYM, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40
|
||||||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, //50
|
110,186,108, 91, 74,119, 84, 96,111,187,115,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60
|
SYM, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60
|
||||||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, //70
|
116,195, 85, 93, 97,113,196,197,198,199,200,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, //80
|
206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, //80
|
||||||
221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, //90
|
221, 78, 64, 83,121, 98,117,105,ILL,223,224,225,226,227,228,229, //90
|
||||||
88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, //a0
|
88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, //a0
|
||||||
73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, //b0
|
73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, //b0
|
||||||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, //c0
|
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, //c0
|
||||||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, //d0
|
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,NUM, 60, 56, //d0
|
||||||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //e0
|
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //e0
|
||||||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, //f0
|
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,SYM, 42, 16, //f0
|
||||||
};
|
};
|
||||||
|
|
||||||
//Model Table:
|
//Model Table:
|
||||||
@ -226,18 +226,22 @@ static const PRUint8 BulgarianLangModel[] =
|
|||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||||
};
|
};
|
||||||
|
|
||||||
const SequenceModel Latin5BulgarianModel(
|
const SequenceModel Latin5BulgarianModel =
|
||||||
|
{
|
||||||
Latin5_BulgarianCharToOrderMap,
|
Latin5_BulgarianCharToOrderMap,
|
||||||
BulgarianLangModel,
|
BulgarianLangModel,
|
||||||
|
64,
|
||||||
(float)0.969392,
|
(float)0.969392,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"ISO-8859-5"
|
"ISO-8859-5"
|
||||||
);
|
};
|
||||||
|
|
||||||
const SequenceModel Win1251BulgarianModel(
|
const SequenceModel Win1251BulgarianModel =
|
||||||
|
{
|
||||||
win1251BulgarianCharToOrderMap,
|
win1251BulgarianCharToOrderMap,
|
||||||
BulgarianLangModel,
|
BulgarianLangModel,
|
||||||
|
64,
|
||||||
(float)0.969392,
|
(float)0.969392,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"windows-1251"
|
"WINDOWS-1251"
|
||||||
);
|
};
|
198
PowerEditor/src/uchardet/LangModels/LangDanishModel.cpp
Normal file
198
PowerEditor/src/uchardet/LangModels/LangDanishModel.cpp
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Danish *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2016-02-19 17:56:42.163975
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Iso_8859_15_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */
|
||||||
|
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */
|
||||||
|
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM, 53, 42,SYM,SYM, 54,SYM,SYM,SYM, 55, 56, 57,SYM, /* BX */
|
||||||
|
58, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 59, 34, 60, 50, /* CX */
|
||||||
|
43, 47, 51, 36, 52, 61, 30,SYM, 19, 62, 37, 44, 31, 46, 63, 48, /* DX */
|
||||||
|
64, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 65, 34, 66, 50, /* EX */
|
||||||
|
43, 47, 51, 36, 52, 67, 30,SYM, 19, 68, 37, 44, 31, 46, 69, 70, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Iso_8859_1_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */
|
||||||
|
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */
|
||||||
|
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
71, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 72, 34, 73, 50, /* CX */
|
||||||
|
43, 47, 51, 36, 52, 74, 30,SYM, 19, 75, 37, 44, 31, 46, 76, 48, /* DX */
|
||||||
|
77, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 78, 34, 79, 50, /* EX */
|
||||||
|
43, 47, 51, 36, 52, 80, 30,SYM, 19, 81, 37, 44, 31, 46, 82, 83, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Windows_1252_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */
|
||||||
|
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */
|
||||||
|
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 85,ILL, 86,ILL, /* 8X */
|
||||||
|
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 87,ILL, 88, 89, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
90, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 91, 34, 92, 50, /* CX */
|
||||||
|
43, 47, 51, 36, 52, 93, 30,SYM, 19, 94, 37, 44, 31, 46, 95, 48, /* DX */
|
||||||
|
96, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 97, 34, 98, 50, /* EX */
|
||||||
|
43, 47, 51, 36, 52, 99, 30,SYM, 19,100, 37, 44, 31, 46,101,102, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 964
|
||||||
|
* First 512 sequences: 0.9968082796759031
|
||||||
|
* Next 512 sequences (512-1024): 0.0031917203240968304
|
||||||
|
* Rest: 3.903127820947816e-17
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 DanishLangModel[] =
|
||||||
|
{
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,3,3,3,3,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,3,2,3,3,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,
|
||||||
|
3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,2,3,2,3,3,3,3,3,2,2,2,2,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,0,
|
||||||
|
3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,2,2,2,2,3,3,3,2,2,0,0,2,0,
|
||||||
|
3,3,3,3,3,3,3,2,3,3,2,2,2,2,2,3,3,2,2,3,3,3,3,3,2,2,0,0,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,3,0,2,2,3,2,3,3,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,2,2,0,2,0,2,0,
|
||||||
|
3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,3,2,2,3,3,3,3,3,2,3,2,2,2,0,
|
||||||
|
3,3,3,3,2,2,3,3,3,2,3,3,3,2,3,3,0,2,2,2,2,0,0,3,0,0,2,0,0,0,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,0,0,0,2,2,2,0,0,0,
|
||||||
|
3,3,3,3,2,0,3,3,3,2,3,3,2,2,3,3,0,2,2,2,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,3,3,3,0,3,3,3,3,2,3,3,3,3,3,3,2,2,2,0,0,0,0,0,2,0,0,0,0,0,
|
||||||
|
3,3,2,3,3,3,3,3,3,3,2,2,2,2,2,2,3,2,2,3,3,2,3,2,2,0,0,0,0,0,
|
||||||
|
3,3,2,3,3,3,2,2,3,3,2,3,2,2,0,2,3,2,3,0,3,0,0,2,3,2,2,0,2,2,
|
||||||
|
3,2,2,2,3,3,2,2,2,3,0,2,2,2,0,2,2,0,2,0,2,0,0,0,2,2,2,0,0,0,
|
||||||
|
3,2,2,2,3,3,2,2,0,3,0,2,2,0,0,2,2,2,2,2,2,0,0,2,2,0,2,0,0,0,
|
||||||
|
3,2,0,2,2,3,2,0,2,2,0,0,2,2,2,2,2,2,2,2,0,0,0,0,2,2,0,0,2,0,
|
||||||
|
2,3,2,2,2,0,2,2,2,2,2,2,2,0,2,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||||
|
0,0,0,0,3,2,2,2,2,2,0,0,0,0,2,2,3,0,2,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_15DanishModel =
|
||||||
|
{
|
||||||
|
Iso_8859_15_CharToOrderMap,
|
||||||
|
DanishLangModel,
|
||||||
|
30,
|
||||||
|
(float)0.9968082796759031,
|
||||||
|
PR_TRUE,
|
||||||
|
"ISO-8859-15"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_1DanishModel =
|
||||||
|
{
|
||||||
|
Iso_8859_1_CharToOrderMap,
|
||||||
|
DanishLangModel,
|
||||||
|
30,
|
||||||
|
(float)0.9968082796759031,
|
||||||
|
PR_TRUE,
|
||||||
|
"ISO-8859-1"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Windows_1252DanishModel =
|
||||||
|
{
|
||||||
|
Windows_1252_CharToOrderMap,
|
||||||
|
DanishLangModel,
|
||||||
|
30,
|
||||||
|
(float)0.9968082796759031,
|
||||||
|
PR_TRUE,
|
||||||
|
"WINDOWS-1252"
|
||||||
|
};
|
141
PowerEditor/src/uchardet/LangModels/LangEsperantoModel.cpp
Normal file
141
PowerEditor/src/uchardet/LangModels/LangEsperantoModel.cpp
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Esperanto *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2015-12-04 01:27:38.177516
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Iso_8859_3_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 7, 13, 4, 1, /* 4X */
|
||||||
|
14, 32, 5, 8, 6, 12, 16, 27, 33, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 0, 18, 17, 10, 2, 19, 15, 21, 3, 11, 9, 7, 13, 4, 1, /* 6X */
|
||||||
|
14, 32, 5, 8, 6, 12, 16, 27, 33, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM, 56,SYM,SYM,SYM,ILL, 34,SYM,SYM, 57, 53, 58, 28,SYM,ILL, 40, /* AX */
|
||||||
|
SYM, 59,SYM,SYM,SYM,SYM, 34,SYM,SYM, 60, 53, 61, 28,SYM,ILL, 40, /* BX */
|
||||||
|
44, 29, 46,ILL, 43, 62, 24, 38, 41, 31, 48, 50, 54, 35, 49, 52, /* CX */
|
||||||
|
ILL, 42, 63, 30, 47, 64, 36,SYM, 22, 51, 39, 55, 37, 23, 26, 45, /* DX */
|
||||||
|
44, 29, 46,ILL, 43, 65, 24, 38, 41, 31, 48, 50, 54, 35, 49, 52, /* EX */
|
||||||
|
ILL, 42, 66, 30, 47, 67, 36,SYM, 22, 51, 39, 55, 37, 23, 26,SYM, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 989
|
||||||
|
* First 512 sequences: 0.9942980632768038
|
||||||
|
* Next 512 sequences (512-1024): 0.0057019367231962385
|
||||||
|
* Rest: -5.0306980803327406e-17
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 EsperantoLangModel[] =
|
||||||
|
{
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,2,3,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,0,0,0,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,2,3,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,3,0,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,3,2,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,0,3,3,3,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,3,3,0,0,2,3,2,2,2,3,3,2,0,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,0,3,3,3,2,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,0,0,0,3,0,2,0,3,2,3,2,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,2,3,3,3,0,0,0,3,2,0,2,3,2,2,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,2,2,2,3,3,0,0,2,3,0,3,2,2,2,2,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,2,3,2,3,3,2,2,0,2,2,2,2,2,2,0,0,0,0,0,0,3,3,2,0,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,3,2,0,0,0,2,0,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,2,3,3,3,2,0,0,0,2,3,2,2,0,3,2,2,0,0,0,
|
||||||
|
3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,2,0,2,2,2,2,3,0,0,0,2,2,0,0,3,2,2,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,3,3,2,3,0,3,3,2,2,3,2,2,2,2,3,0,2,2,3,2,2,2,2,2,3,0,2,0,
|
||||||
|
3,3,3,3,2,3,2,2,2,2,2,3,3,2,2,2,0,0,2,0,2,2,0,0,2,2,0,0,0,3,2,2,0,0,0,
|
||||||
|
3,3,3,3,0,3,3,3,3,3,2,0,3,2,2,2,0,3,2,2,3,3,0,0,0,3,0,0,0,2,2,2,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,3,2,0,2,0,0,0,3,2,0,0,3,3,3,0,0,0,
|
||||||
|
3,3,3,3,0,3,3,3,2,2,2,2,3,3,2,3,2,0,2,3,0,0,0,0,0,2,0,0,0,0,0,2,0,3,0,
|
||||||
|
3,3,3,3,3,2,2,3,3,3,2,2,3,2,2,2,2,3,3,2,2,0,0,0,0,3,2,2,0,2,2,2,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,2,2,2,0,3,3,2,0,2,0,2,2,0,2,0,0,0,2,0,2,0,2,2,2,0,2,0,
|
||||||
|
3,3,3,3,0,0,2,3,0,0,2,2,3,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,0,2,2,3,2,0,0,2,0,3,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,0,0,2,2,0,2,3,2,3,3,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,2,0,2,0,2,0,0,0,
|
||||||
|
3,3,3,3,2,2,3,2,0,2,0,2,3,2,2,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,2,2,2,2,3,2,0,0,2,0,0,0,0,0,0,2,0,2,0,0,0,2,0,3,0,0,2,0,0,0,0,
|
||||||
|
3,3,2,2,2,2,0,2,0,2,0,0,3,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,2,0,3,3,3,3,3,2,3,0,0,2,2,2,2,3,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,3,3,2,2,2,2,2,2,0,0,2,2,2,0,2,2,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,
|
||||||
|
2,2,2,0,3,3,3,3,3,2,2,0,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
|
||||||
|
2,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,2,2,3,0,0,2,2,0,0,0,0,2,2,2,2,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,2,0,2,0,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_3EsperantoModel =
|
||||||
|
{
|
||||||
|
Iso_8859_3_CharToOrderMap,
|
||||||
|
EsperantoLangModel,
|
||||||
|
35,
|
||||||
|
(float)0.9942980632768038,
|
||||||
|
PR_FALSE,
|
||||||
|
"ISO-8859-3"
|
||||||
|
};
|
206
PowerEditor/src/uchardet/LangModels/LangFrenchModel.cpp
Normal file
206
PowerEditor/src/uchardet/LangModels/LangFrenchModel.cpp
Normal file
@ -0,0 +1,206 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: French *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2015-12-03 21:10:27.685575
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Windows_1252_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
|
||||||
|
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
|
||||||
|
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
SYM,ILL,SYM, 56,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 57,ILL, /* 8X */
|
||||||
|
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 58, 59, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
24, 38, 32, 46, 49, 61, 47, 27, 23, 14, 28, 41, 62, 39, 33, 36, /* CX */
|
||||||
|
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 63, /* DX */
|
||||||
|
24, 38, 32, 46, 49, 64, 47, 27, 23, 14, 28, 41, 65, 39, 33, 36, /* EX */
|
||||||
|
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 66, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Iso_8859_1_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
|
||||||
|
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
|
||||||
|
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 67,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
24, 38, 32, 46, 49, 68, 47, 27, 23, 14, 28, 41, 69, 39, 33, 36, /* CX */
|
||||||
|
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 70, /* DX */
|
||||||
|
24, 38, 32, 46, 49, 71, 47, 27, 23, 14, 28, 41, 72, 39, 33, 36, /* EX */
|
||||||
|
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 73, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Iso_8859_15_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
|
||||||
|
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
|
||||||
|
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM, 74, 75,SYM,SYM, 76,SYM,SYM,SYM, 35, 35, 77,SYM, /* BX */
|
||||||
|
24, 38, 32, 46, 49, 78, 47, 27, 23, 14, 28, 41, 79, 39, 33, 36, /* CX */
|
||||||
|
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 80, /* DX */
|
||||||
|
24, 38, 32, 46, 49, 81, 47, 27, 23, 14, 28, 41, 82, 39, 33, 36, /* EX */
|
||||||
|
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 83, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 914
|
||||||
|
* First 512 sequences: 0.997057879992383
|
||||||
|
* Next 512 sequences (512-1024): 0.002942120007616917
|
||||||
|
* Rest: 3.8163916471489756e-17
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 FrenchLangModel[] =
|
||||||
|
{
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,0,0,0,2,0,2,0,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,3,3,0,0,3,0,0,2,3,0,0,0,2,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,2,2,3,0,0,3,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,2,3,2,0,2,2,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,0,2,3,2,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,2,3,3,3,0,2,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,3,2,0,2,0,3,3,2,3,2,0,0,0,0,0,
|
||||||
|
3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,3,2,3,0,0,2,2,2,2,0,2,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,0,0,3,3,0,0,2,3,0,3,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,3,3,2,3,3,2,0,0,0,0,0,2,0,
|
||||||
|
3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,0,0,3,3,0,3,0,0,2,2,3,2,2,2,3,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,3,3,0,3,3,0,0,3,0,2,2,2,3,2,0,0,2,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,2,2,3,2,3,0,0,2,2,3,0,0,3,3,0,0,2,2,3,2,2,3,2,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,0,2,3,2,0,0,3,3,0,2,2,0,3,0,2,2,3,0,2,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,3,2,2,0,3,0,0,2,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,2,3,3,0,2,3,3,0,0,0,0,2,0,2,0,2,0,0,0,0,0,
|
||||||
|
3,2,3,2,3,3,0,2,3,3,0,0,0,2,3,0,2,2,0,0,0,0,2,3,0,0,2,0,3,0,0,0,0,0,0,2,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,2,2,2,3,3,2,0,3,0,0,0,0,0,3,0,2,0,0,3,0,0,0,0,0,2,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,2,0,3,2,0,0,3,2,0,3,0,0,0,0,0,0,3,2,0,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,0,2,0,3,3,0,0,2,2,0,0,0,3,3,0,2,2,0,2,2,2,3,3,0,0,2,0,0,
|
||||||
|
0,0,2,0,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,0,3,0,3,2,3,2,2,3,3,2,3,0,3,2,2,2,2,3,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,2,2,0,3,2,0,0,2,2,0,0,0,0,0,0,0,
|
||||||
|
0,3,0,3,0,3,3,3,0,0,3,3,2,3,0,3,3,2,3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,2,3,2,2,2,3,3,2,2,2,2,3,0,0,0,0,0,0,0,0,0,3,2,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,2,3,3,3,0,0,2,3,2,2,2,2,2,3,0,0,3,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
0,0,3,0,0,0,0,0,3,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,2,0,0,3,2,0,0,0,3,0,3,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,2,3,2,0,2,3,3,0,2,0,2,2,2,0,0,2,2,2,0,3,0,0,0,2,0,0,3,2,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,2,3,2,2,2,3,2,0,2,0,0,2,0,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
0,2,0,3,0,0,3,3,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,2,0,2,2,0,3,3,0,0,0,3,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,3,0,0,3,3,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,2,3,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,2,0,0,2,0,2,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,2,2,2,0,2,2,3,0,0,2,2,0,2,0,2,0,2,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Windows_1252FrenchModel =
|
||||||
|
{
|
||||||
|
Windows_1252_CharToOrderMap,
|
||||||
|
FrenchLangModel,
|
||||||
|
38,
|
||||||
|
(float)0.997057879992383,
|
||||||
|
PR_TRUE,
|
||||||
|
"WINDOWS-1252"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_1FrenchModel =
|
||||||
|
{
|
||||||
|
Iso_8859_1_CharToOrderMap,
|
||||||
|
FrenchLangModel,
|
||||||
|
38,
|
||||||
|
(float)0.997057879992383,
|
||||||
|
PR_TRUE,
|
||||||
|
"ISO-8859-1"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_15FrenchModel =
|
||||||
|
{
|
||||||
|
Iso_8859_15_CharToOrderMap,
|
||||||
|
FrenchLangModel,
|
||||||
|
38,
|
||||||
|
(float)0.997057879992383,
|
||||||
|
PR_TRUE,
|
||||||
|
"ISO-8859-15"
|
||||||
|
};
|
168
PowerEditor/src/uchardet/LangModels/LangGermanModel.cpp
Normal file
168
PowerEditor/src/uchardet/LangModels/LangGermanModel.cpp
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: German *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2015-12-03 22:50:46.518374
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Windows_1252_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 4X */
|
||||||
|
18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 6X */
|
||||||
|
18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
SYM,ILL,SYM, 59,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 54,ILL, 42,ILL, /* 8X */
|
||||||
|
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 54,ILL, 42, 56, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* CX */
|
||||||
|
53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 61, 24, 45, 62, 27, /* DX */
|
||||||
|
41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* EX */
|
||||||
|
53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 63, 24, 45, 64, 56, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Iso_8859_1_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 4X */
|
||||||
|
18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 6X */
|
||||||
|
18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* CX */
|
||||||
|
53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 66, 24, 45, 67, 27, /* DX */
|
||||||
|
41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* EX */
|
||||||
|
53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 68, 24, 45, 69, 56, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 1188
|
||||||
|
* First 512 sequences: 0.9934041448127945
|
||||||
|
* Next 512 sequences (512-1024): 0.006482829516922903
|
||||||
|
* Rest: 0.0001130256702826099
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 GermanLangModel[] =
|
||||||
|
{
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,3,3,2,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,0,3,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,3,3,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,0,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,3,3,3,0,0,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,2,2,3,2,3,3,2,0,0,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,3,3,3,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,3,3,1,2,
|
||||||
|
3,3,2,3,2,3,3,3,2,3,3,3,3,2,2,2,3,2,2,2,2,2,2,2,1,3,2,0,1,2,3,
|
||||||
|
3,3,2,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,2,2,2,3,2,3,3,3,0,0,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,2,3,2,3,3,2,0,2,2,1,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,2,2,3,2,3,3,3,0,0,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,2,3,3,3,2,2,2,3,2,3,3,3,0,1,2,1,
|
||||||
|
3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,3,2,2,2,2,3,2,3,2,3,0,0,2,0,
|
||||||
|
3,3,2,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,2,3,2,2,2,2,0,0,2,0,
|
||||||
|
3,3,3,3,3,3,2,2,2,2,3,3,1,2,2,2,2,2,2,2,2,2,3,3,3,2,3,0,0,0,0,
|
||||||
|
3,2,2,3,3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,3,3,2,2,2,3,3,3,0,0,2,2,
|
||||||
|
3,2,2,3,2,3,2,0,2,2,2,3,1,2,2,2,2,2,2,2,2,2,2,1,0,2,3,0,0,2,1,
|
||||||
|
2,3,3,3,3,2,3,3,3,3,3,2,3,3,3,2,2,3,2,0,2,2,0,0,0,0,0,2,0,0,2,
|
||||||
|
3,2,2,3,2,3,2,2,2,2,3,3,2,2,2,1,2,1,2,0,2,0,3,2,3,2,2,0,0,2,0,
|
||||||
|
2,3,3,0,3,1,3,3,3,3,0,0,3,2,3,3,2,2,2,1,1,0,0,0,0,0,0,2,0,0,0,
|
||||||
|
3,3,3,2,3,3,2,2,2,3,2,3,3,3,2,2,3,2,3,2,2,2,0,2,2,2,1,0,0,1,0,
|
||||||
|
2,3,3,2,3,0,3,3,2,3,0,1,3,3,3,2,2,3,2,2,2,2,0,0,0,0,1,3,1,0,0,
|
||||||
|
3,2,2,3,2,2,3,2,1,2,2,2,0,2,2,3,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,1,2,3,1,3,3,2,1,2,2,2,2,0,0,2,2,2,3,2,0,2,0,0,0,2,0,0,2,2,0,
|
||||||
|
2,3,2,0,2,2,2,2,2,2,2,2,2,2,2,3,2,2,2,1,2,2,0,2,0,0,0,0,0,0,2,
|
||||||
|
0,1,0,2,0,2,0,0,0,0,3,2,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Windows_1252GermanModel =
|
||||||
|
{
|
||||||
|
Windows_1252_CharToOrderMap,
|
||||||
|
GermanLangModel,
|
||||||
|
31,
|
||||||
|
(float)0.9934041448127945,
|
||||||
|
PR_TRUE,
|
||||||
|
"WINDOWS-1252"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_1GermanModel =
|
||||||
|
{
|
||||||
|
Iso_8859_1_CharToOrderMap,
|
||||||
|
GermanLangModel,
|
||||||
|
31,
|
||||||
|
(float)0.9934041448127945,
|
||||||
|
PR_TRUE,
|
||||||
|
"ISO-8859-1"
|
||||||
|
};
|
229
PowerEditor/src/uchardet/LangModels/LangGreekModel.cpp
Normal file
229
PowerEditor/src/uchardet/LangModels/LangGreekModel.cpp
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Greek *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2016-05-25 15:21:50.073117
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Windows_1253_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */
|
||||||
|
47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */
|
||||||
|
47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */
|
||||||
|
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */
|
||||||
|
SYM,SYM, 17,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 62,SYM,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */
|
||||||
|
55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */
|
||||||
|
11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */
|
||||||
|
61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */
|
||||||
|
11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Iso_8859_7_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */
|
||||||
|
47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */
|
||||||
|
47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */
|
||||||
|
55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */
|
||||||
|
11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */
|
||||||
|
61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */
|
||||||
|
11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 1579
|
||||||
|
* First 512 sequences: 0.958419074626211
|
||||||
|
* Next 512 sequences (512-1024): 0.03968891876305471
|
||||||
|
* Rest: 0.0018920066107342773
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 GreekLangModel[] =
|
||||||
|
{
|
||||||
|
1,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,1,2,
|
||||||
|
3,3,3,3,3,1,3,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,2,
|
||||||
|
2,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,2,3,2,3,1,2,
|
||||||
|
3,3,3,3,3,2,2,0,2,0,0,0,0,0,0,0,0,1,0,0,1,0,2,
|
||||||
|
3,3,2,3,2,3,3,3,2,3,3,1,3,2,2,3,3,3,2,3,0,3,3,
|
||||||
|
2,2,2,2,2,3,3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,1,3,3,3,3,3,3,2,
|
||||||
|
3,1,3,3,2,3,3,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,2,
|
||||||
|
3,3,3,3,3,3,2,3,2,2,3,1,2,2,2,3,3,3,3,3,3,3,3,
|
||||||
|
2,2,1,3,2,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,2,3,1,3,3,1,
|
||||||
|
3,3,3,3,3,2,2,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,
|
||||||
|
3,3,2,3,2,3,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||||
|
3,3,3,3,2,3,2,3,3,0,3,3,3,3,2,3,3,3,2,3,2,3,3,
|
||||||
|
3,3,2,2,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,2,3,3,2,3,2,3,2,3,2,3,3,3,3,1,3,3,3,3,
|
||||||
|
2,3,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,
|
||||||
|
1,1,0,1,1,1,0,1,1,0,2,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||||
|
1,1,3,0,3,2,3,3,3,3,0,3,0,3,3,1,0,0,3,1,2,0,0,
|
||||||
|
2,1,1,3,2,0,0,0,2,0,0,1,0,0,0,0,0,0,1,0,0,0,2,
|
||||||
|
3,3,3,3,2,3,3,2,1,1,3,2,3,1,3,3,3,3,1,3,0,3,3,
|
||||||
|
1,2,1,1,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,2,3,2,3,3,3,3,2,3,0,3,3,2,2,3,3,2,3,1,2,
|
||||||
|
3,0,3,3,2,1,3,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
3,3,1,3,2,3,1,2,1,2,3,3,2,3,1,3,3,3,1,3,1,3,3,
|
||||||
|
1,2,3,0,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
3,3,3,3,2,3,1,2,2,2,3,2,3,3,3,3,3,3,2,3,2,3,3,
|
||||||
|
2,3,2,2,2,3,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||||
|
3,3,3,1,3,3,3,3,3,3,2,3,0,3,3,0,0,0,3,0,3,3,0,
|
||||||
|
3,0,2,3,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
2,2,3,2,3,3,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0,
|
||||||
|
3,1,2,2,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
2,2,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0,
|
||||||
|
3,0,3,3,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
3,3,0,3,3,3,3,0,3,0,3,0,2,3,3,3,3,3,3,3,2,3,3,
|
||||||
|
2,2,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,3,0,
|
||||||
|
3,0,3,3,3,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
3,3,1,3,2,3,3,1,0,0,3,0,3,1,0,3,3,3,0,3,0,3,3,
|
||||||
|
0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,1,3,2,3,1,3,3,2,3,1,3,1,3,2,2,1,2,3,1,2,0,2,
|
||||||
|
2,0,3,3,2,1,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,3,1,3,1,3,3,3,3,1,2,0,3,3,0,0,0,2,0,2,1,0,
|
||||||
|
2,0,1,3,2,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
3,3,3,3,3,3,3,1,0,1,3,1,2,2,2,3,2,3,0,3,0,3,3,
|
||||||
|
0,2,1,3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,3,3,2,3,3,3,3,2,3,2,3,0,3,3,0,0,0,3,0,2,1,0,
|
||||||
|
2,0,2,3,2,0,2,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,2,
|
||||||
|
3,3,1,3,2,3,3,1,1,1,2,1,2,0,3,3,3,3,2,3,2,2,2,
|
||||||
|
0,2,2,0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,0,3,3,3,3,1,1,0,3,0,3,3,3,2,2,3,1,3,0,2,3,
|
||||||
|
0,2,0,0,1,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,2,3,1,3,3,3,2,0,3,1,3,1,2,3,3,3,2,3,0,3,3,
|
||||||
|
0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,1,3,2,3,0,3,3,2,3,2,3,0,3,2,0,0,0,1,0,2,1,0,
|
||||||
|
1,0,2,2,1,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,1,3,1,3,1,1,1,0,2,0,2,2,1,2,2,2,1,2,0,3,2,
|
||||||
|
0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,1,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,2,1,2,2,2,3,3,2,3,2,2,2,2,2,2,0,
|
||||||
|
3,3,1,3,1,3,0,0,1,0,3,1,2,1,1,2,2,3,1,2,0,2,2,
|
||||||
|
0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,
|
||||||
|
0,0,0,1,1,0,0,2,0,2,2,1,3,3,3,2,3,2,2,2,2,2,0,
|
||||||
|
0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
0,0,1,0,0,0,0,2,0,3,2,3,2,3,3,3,2,2,3,1,2,2,0,
|
||||||
|
0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
|
||||||
|
0,1,0,1,0,0,0,2,0,2,2,2,3,3,2,2,2,2,2,2,2,2,0,
|
||||||
|
0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,1,0,0,0,3,0,3,3,3,2,2,2,2,2,2,2,1,2,2,0,
|
||||||
|
0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,3,0,3,2,2,1,2,2,2,2,3,2,1,2,1,0,
|
||||||
|
1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
0,0,0,0,0,0,1,3,0,3,3,3,2,1,2,2,2,1,1,3,2,2,0,
|
||||||
|
0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,2,0,2,2,2,1,1,3,2,2,1,2,2,2,2,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,3,0,3,3,2,1,1,2,2,2,2,1,1,2,2,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,3,0,2,2,2,2,1,1,2,2,1,2,1,2,1,0,
|
||||||
|
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,2,0,2,2,2,2,1,2,1,2,2,2,3,2,1,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,3,0,2,2,2,2,2,2,1,2,1,1,1,2,2,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,2,0,2,2,1,2,2,2,2,2,2,2,1,1,2,0,
|
||||||
|
1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,3,0,2,2,2,1,1,1,2,2,1,1,1,2,2,0,
|
||||||
|
2,2,0,2,0,3,0,0,0,0,3,0,2,0,0,2,1,1,0,1,0,1,2,
|
||||||
|
0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Windows_1253GreekModel =
|
||||||
|
{
|
||||||
|
Windows_1253_CharToOrderMap,
|
||||||
|
GreekLangModel,
|
||||||
|
46,
|
||||||
|
(float)0.958419074626211,
|
||||||
|
PR_FALSE,
|
||||||
|
"WINDOWS-1253"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_7GreekModel =
|
||||||
|
{
|
||||||
|
Iso_8859_7_CharToOrderMap,
|
||||||
|
GreekLangModel,
|
||||||
|
46,
|
||||||
|
(float)0.958419074626211,
|
||||||
|
PR_FALSE,
|
||||||
|
"ISO-8859-7"
|
||||||
|
};
|
@ -37,14 +37,14 @@
|
|||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include "nsSBCharSetProber.h"
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
255: Control characters that usually does not exist in any text
|
CTR: Control characters that usually does not exist in any text
|
||||||
254: Carriage/Return
|
RET: Carriage/Return
|
||||||
253: symbol (punctuation) that does not belong to word
|
SYM: symbol (punctuation) that does not belong to word
|
||||||
252: 0 - 9
|
NUM: 0 - 9
|
||||||
|
|
||||||
*****************************************************************/
|
*****************************************************************/
|
||||||
|
|
||||||
@ -52,22 +52,22 @@
|
|||||||
//Character Mapping Table:
|
//Character Mapping Table:
|
||||||
static const unsigned char win1255_CharToOrderMap[] =
|
static const unsigned char win1255_CharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, //40
|
SYM, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, //40
|
||||||
78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, //50
|
78,121, 86, 71, 67,102,107, 84,114,103,115,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, //60
|
SYM, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, //60
|
||||||
66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, //70
|
66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
|
124,ILL,203,204,205, 40, 58,206,207,208,ILL,210,ILL,ILL,ILL,ILL,
|
||||||
215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
|
ILL, 83, 52, 47, 46, 72, 32, 94,216,113,ILL,109,ILL,ILL,ILL,ILL,
|
||||||
34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
|
34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
|
||||||
106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
|
106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
|
||||||
30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
|
30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
|
||||||
238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
|
238, 38, 45,239,240,241,242,243,127,ILL,ILL,ILL,ILL,ILL,ILL,ILL,
|
||||||
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
|
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
|
||||||
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
|
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,ILL,ILL,128, 96,ILL,
|
||||||
};
|
};
|
||||||
|
|
||||||
//Model Table:
|
//Model Table:
|
||||||
@ -208,10 +208,13 @@ static const PRUint8 HebrewLangModel[] =
|
|||||||
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
|
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
|
||||||
};
|
};
|
||||||
|
|
||||||
const SequenceModel Win1255Model(
|
const SequenceModel Win1255Model =
|
||||||
|
{
|
||||||
win1255_CharToOrderMap,
|
win1255_CharToOrderMap,
|
||||||
HebrewLangModel,
|
HebrewLangModel,
|
||||||
|
64,
|
||||||
(float)0.984004,
|
(float)0.984004,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"windows-1255");
|
"WINDOWS-1255"
|
||||||
|
};
|
||||||
|
|
169
PowerEditor/src/uchardet/LangModels/LangHungarianModel.cpp
Normal file
169
PowerEditor/src/uchardet/LangModels/LangHungarianModel.cpp
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Hungarian *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2015-12-12 18:02:46.730481
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Iso_8859_2_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 4X */
|
||||||
|
21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 6X */
|
||||||
|
21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM, 55,SYM, 42,SYM, 56, 46,SYM,SYM, 37, 52, 57, 58,SYM, 48, 59, /* AX */
|
||||||
|
SYM, 60,SYM, 42,SYM, 61, 46,SYM,SYM, 37, 52, 62, 63,SYM, 48, 64, /* BX */
|
||||||
|
65, 11, 40, 36, 35, 66, 38, 39, 41, 14, 50, 67, 53, 28, 45, 68, /* CX */
|
||||||
|
49, 43, 54, 26, 69, 27, 25,SYM, 44, 70, 30, 31, 29, 47, 51, 71, /* DX */
|
||||||
|
72, 11, 40, 36, 35, 73, 38, 39, 41, 14, 50, 74, 53, 28, 45, 75, /* EX */
|
||||||
|
49, 43, 54, 26, 76, 27, 25,SYM, 44, 77, 30, 31, 29, 47, 51,SYM, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Windows_1250_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 4X */
|
||||||
|
21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 1, 15, 23, 16, 0, 24, 13, 20, 7, 22, 9, 4, 12, 6, 8, /* 6X */
|
||||||
|
21, 34, 5, 3, 2, 19, 17, 32, 33, 18, 10,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 37,SYM, 46, 78, 48, 79, /* 8X */
|
||||||
|
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 37,SYM, 46, 80, 48, 81, /* 9X */
|
||||||
|
SYM,SYM,SYM, 42,SYM, 82,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM, 83, /* AX */
|
||||||
|
SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM, 84, 52,SYM, 85,SYM, 86, 87, /* BX */
|
||||||
|
88, 11, 40, 36, 35, 89, 38, 39, 41, 14, 50, 90, 53, 28, 45, 91, /* CX */
|
||||||
|
49, 43, 54, 26, 92, 27, 25,SYM, 44, 93, 30, 31, 29, 47, 51, 94, /* DX */
|
||||||
|
95, 11, 40, 36, 35, 96, 38, 39, 41, 14, 50, 97, 53, 28, 45, 98, /* EX */
|
||||||
|
49, 43, 54, 26, 99, 27, 25,SYM, 44,100, 30, 31, 29, 47, 51,SYM, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 1084
|
||||||
|
* First 512 sequences: 0.9748272224933486
|
||||||
|
* Next 512 sequences (512-1024): 0.024983863604162403
|
||||||
|
* Rest: 0.0001889139024889644
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 HungarianLangModel[] =
|
||||||
|
{
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,1,0,2,2,0,0,
|
||||||
|
3,2,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,0,0,2,2,1,2,1,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,2,2,3,3,3,3,3,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,2,3,2,2,3,3,3,3,3,2,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,2,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,3,3,3,2,3,2,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,3,3,2,3,3,2,3,0,2,2,2,2,
|
||||||
|
3,2,3,3,3,3,3,2,2,3,3,2,3,3,0,3,3,3,2,3,3,3,2,3,3,0,2,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,2,2,2,3,2,2,2,2,2,3,3,2,3,3,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,3,3,3,3,3,2,2,
|
||||||
|
1,2,3,3,3,3,3,3,2,3,3,0,3,3,2,3,3,3,2,2,2,3,3,3,2,0,0,0,2,0,0,0,
|
||||||
|
3,3,3,2,3,2,2,3,3,2,3,3,3,2,3,3,2,2,2,3,2,3,2,2,2,2,3,2,2,2,2,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,3,3,3,2,3,2,2,3,3,2,3,2,2,2,
|
||||||
|
0,1,3,3,3,3,3,2,2,3,3,0,3,3,2,3,3,3,0,0,2,3,2,3,0,0,0,0,0,2,0,0,
|
||||||
|
3,3,2,3,3,3,2,3,3,2,2,3,2,1,3,3,3,2,2,3,1,2,2,2,2,2,3,3,3,2,2,2,
|
||||||
|
3,3,3,3,2,3,3,3,3,2,2,3,3,2,3,2,2,3,2,3,2,2,3,2,2,3,3,3,3,2,2,2,
|
||||||
|
3,3,2,2,2,2,2,3,3,2,0,3,0,2,3,2,2,2,1,2,2,0,2,1,2,3,2,3,3,2,2,2,
|
||||||
|
3,3,3,3,2,2,3,3,3,2,3,3,3,2,3,3,2,3,1,3,3,2,2,2,2,2,2,2,2,2,2,3,
|
||||||
|
3,2,3,3,3,3,3,2,2,3,2,3,3,3,0,3,3,2,2,2,2,2,2,3,2,0,0,0,1,0,0,0,
|
||||||
|
3,3,2,2,2,2,2,3,3,2,0,3,2,2,2,2,2,2,2,3,2,0,2,2,2,2,2,2,3,2,2,2,
|
||||||
|
3,3,3,3,3,3,2,3,3,2,2,3,1,2,3,2,2,2,2,3,2,3,3,3,2,2,2,2,3,3,2,0,
|
||||||
|
3,3,3,2,2,2,3,2,3,2,2,3,2,2,3,2,3,2,0,3,2,2,2,2,2,2,3,0,2,2,3,2,
|
||||||
|
3,3,2,3,2,2,2,3,3,3,3,2,2,2,3,2,2,2,2,2,3,0,0,2,2,2,2,0,3,0,0,0,
|
||||||
|
3,3,2,2,2,3,2,3,3,0,0,2,2,2,3,2,2,2,2,3,0,2,2,2,2,3,2,3,2,3,2,2,
|
||||||
|
2,0,3,3,3,3,3,0,0,3,3,0,2,3,0,3,3,3,0,0,2,2,2,2,1,0,0,0,0,0,0,0,
|
||||||
|
2,2,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,3,0,0,2,3,3,2,2,2,0,0,1,2,2,0,
|
||||||
|
2,2,3,3,3,3,2,3,2,3,3,2,2,2,2,3,3,2,0,0,2,2,3,2,2,1,0,0,1,2,1,0,
|
||||||
|
0,2,3,2,2,3,3,2,2,2,3,0,3,3,0,2,2,3,0,2,1,2,3,2,2,0,0,0,0,0,0,0,
|
||||||
|
0,0,3,2,3,2,3,0,0,3,2,0,2,3,0,0,2,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,2,3,3,3,2,3,0,0,2,2,0,0,3,0,2,2,2,0,0,2,2,3,2,1,0,0,0,0,0,0,0,
|
||||||
|
2,2,2,2,3,2,2,2,0,3,2,0,2,2,0,2,2,3,0,2,2,0,2,2,2,0,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_2HungarianModel =
|
||||||
|
{
|
||||||
|
Iso_8859_2_CharToOrderMap,
|
||||||
|
HungarianLangModel,
|
||||||
|
32,
|
||||||
|
(float)0.9748272224933486,
|
||||||
|
PR_FALSE,
|
||||||
|
"ISO-8859-2"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Windows_1250HungarianModel =
|
||||||
|
{
|
||||||
|
Windows_1250_CharToOrderMap,
|
||||||
|
HungarianLangModel,
|
||||||
|
32,
|
||||||
|
(float)0.9748272224933486,
|
||||||
|
PR_FALSE,
|
||||||
|
"WINDOWS-1250"
|
||||||
|
};
|
@ -35,7 +35,7 @@
|
|||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include "nsSBCharSetProber.h"
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -43,18 +43,18 @@
|
|||||||
//Character Mapping Table:
|
//Character Mapping Table:
|
||||||
static const unsigned char KOI8R_CharToOrderMap[] =
|
static const unsigned char KOI8R_CharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
||||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
|
155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
||||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
|
67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, //80
|
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, //80
|
||||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, //90
|
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, //90
|
||||||
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, //a0
|
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, //a0
|
||||||
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, //b0
|
238,239,240,241,242,243,244,245,246,247,248,249,250,251,NUM,SYM, //b0
|
||||||
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, //c0
|
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, //c0
|
||||||
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, //d0
|
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, //d0
|
||||||
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, //e0
|
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, //e0
|
||||||
@ -63,18 +63,18 @@ static const unsigned char KOI8R_CharToOrderMap[] =
|
|||||||
|
|
||||||
static const unsigned char win1251_CharToOrderMap[] =
|
static const unsigned char win1251_CharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
||||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
|
155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
||||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
|
67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
207,208,209,210,211,212,213,214,ILL,216,217,218,219,220,221,222,
|
||||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||||
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
|
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,NUM,SYM,
|
||||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||||
@ -83,14 +83,14 @@ static const unsigned char win1251_CharToOrderMap[] =
|
|||||||
|
|
||||||
static const unsigned char latin5_CharToOrderMap[] =
|
static const unsigned char latin5_CharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
||||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
|
155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
||||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
|
67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||||
@ -98,39 +98,39 @@ static const unsigned char latin5_CharToOrderMap[] =
|
|||||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,NUM,CTR,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char macCyrillic_CharToOrderMap[] =
|
static const unsigned char macCyrillic_CharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
||||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
|
155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
||||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
|
67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||||
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
|
239,240,241,242,243,244,245,246,247,248,249,250,251,NUM, 68, 16,
|
||||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
|
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,CTR,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char IBM855_CharToOrderMap[] =
|
static const unsigned char IBM855_CharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
||||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
|
155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
||||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
|
67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
|
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
|
||||||
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
|
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
|
||||||
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
|
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
|
||||||
@ -138,19 +138,19 @@ static const unsigned char IBM855_CharToOrderMap[] =
|
|||||||
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
|
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
|
||||||
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
|
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
|
||||||
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
|
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
|
||||||
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
|
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,NUM,CTR,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char IBM866_CharToOrderMap[] =
|
static const unsigned char IBM866_CharToOrderMap[] =
|
||||||
{
|
{
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
SYM,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
|
||||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
|
155,156,157,158,159,160,161,162,163,164,165,SYM,SYM,SYM,SYM,SYM, //50
|
||||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
SYM, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
|
||||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
|
67,179, 78, 73,180,181, 79,182,183,184,185,SYM,SYM,SYM,SYM,SYM, //70
|
||||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||||
@ -158,7 +158,7 @@ static const unsigned char IBM866_CharToOrderMap[] =
|
|||||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,NUM,CTR,
|
||||||
};
|
};
|
||||||
|
|
||||||
//Model Table:
|
//Model Table:
|
||||||
@ -300,50 +300,62 @@ static const PRUint8 RussianLangModel[] =
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
const SequenceModel Koi8rModel(
|
const SequenceModel Koi8rRussianModel =
|
||||||
|
{
|
||||||
KOI8R_CharToOrderMap,
|
KOI8R_CharToOrderMap,
|
||||||
RussianLangModel,
|
RussianLangModel,
|
||||||
|
64,
|
||||||
(float)0.976601,
|
(float)0.976601,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"KOI8-R"
|
"KOI8-R"
|
||||||
);
|
};
|
||||||
|
|
||||||
const SequenceModel Win1251Model(
|
const SequenceModel Win1251RussianModel =
|
||||||
|
{
|
||||||
win1251_CharToOrderMap,
|
win1251_CharToOrderMap,
|
||||||
RussianLangModel,
|
RussianLangModel,
|
||||||
|
64,
|
||||||
(float)0.976601,
|
(float)0.976601,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"windows-1251"
|
"WINDOWS-1251"
|
||||||
);
|
};
|
||||||
|
|
||||||
const SequenceModel Latin5Model(
|
const SequenceModel Latin5RussianModel =
|
||||||
|
{
|
||||||
latin5_CharToOrderMap,
|
latin5_CharToOrderMap,
|
||||||
RussianLangModel,
|
RussianLangModel,
|
||||||
|
64,
|
||||||
(float)0.976601,
|
(float)0.976601,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"ISO-8859-5"
|
"ISO-8859-5"
|
||||||
);
|
};
|
||||||
|
|
||||||
const SequenceModel MacCyrillicModel(
|
const SequenceModel MacCyrillicRussianModel =
|
||||||
|
{
|
||||||
macCyrillic_CharToOrderMap,
|
macCyrillic_CharToOrderMap,
|
||||||
RussianLangModel,
|
RussianLangModel,
|
||||||
|
64,
|
||||||
(float)0.976601,
|
(float)0.976601,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"x-mac-cyrillic"
|
"MAC-CYRILLIC"
|
||||||
);
|
};
|
||||||
|
|
||||||
const SequenceModel Ibm866Model(
|
const SequenceModel Ibm866RussianModel =
|
||||||
|
{
|
||||||
IBM866_CharToOrderMap,
|
IBM866_CharToOrderMap,
|
||||||
RussianLangModel,
|
RussianLangModel,
|
||||||
|
64,
|
||||||
(float)0.976601,
|
(float)0.976601,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"IBM866"
|
"IBM866"
|
||||||
);
|
};
|
||||||
|
|
||||||
const SequenceModel Ibm855Model(
|
const SequenceModel Ibm855RussianModel =
|
||||||
|
{
|
||||||
IBM855_CharToOrderMap,
|
IBM855_CharToOrderMap,
|
||||||
RussianLangModel,
|
RussianLangModel,
|
||||||
|
64,
|
||||||
(float)0.976601,
|
(float)0.976601,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"IBM855"
|
"IBM855"
|
||||||
);
|
};
|
201
PowerEditor/src/uchardet/LangModels/LangSpanishModel.cpp
Normal file
201
PowerEditor/src/uchardet/LangModels/LangSpanishModel.cpp
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Spanish *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2015-12-12 18:39:02.290370
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Iso_8859_1_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
|
||||||
|
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
|
||||||
|
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 53, 22, 41, 43, /* CX */
|
||||||
|
49, 29, 38, 19, 50, 54, 34,SYM, 44, 51, 30, 55, 32, 42, 56, 57, /* DX */
|
||||||
|
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 58, 22, 41, 43, /* EX */
|
||||||
|
49, 29, 38, 19, 50, 59, 34,SYM, 44, 51, 30, 60, 32, 42, 61, 62, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Iso_8859_15_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
|
||||||
|
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
|
||||||
|
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM, 63,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM, 65, 66,SYM,SYM, 67,SYM,SYM,SYM, 68, 69, 70,SYM, /* BX */
|
||||||
|
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 71, 22, 41, 43, /* CX */
|
||||||
|
49, 29, 38, 19, 50, 72, 34,SYM, 44, 51, 30, 73, 32, 42, 74, 75, /* DX */
|
||||||
|
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 76, 22, 41, 43, /* EX */
|
||||||
|
49, 29, 38, 19, 50, 77, 34,SYM, 44, 51, 30, 78, 32, 42, 79, 80, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Windows_1252_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
|
||||||
|
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
|
||||||
|
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 82,SYM, 83,ILL, 84,ILL, /* 8X */
|
||||||
|
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 85,SYM, 86,ILL, 87, 88, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 89,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 90, 22, 41, 43, /* CX */
|
||||||
|
49, 29, 38, 19, 50, 91, 34,SYM, 44, 51, 30, 92, 32, 42, 93, 94, /* DX */
|
||||||
|
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 95, 22, 41, 43, /* EX */
|
||||||
|
49, 29, 38, 19, 50, 96, 34,SYM, 44, 51, 30, 97, 32, 42, 98, 99, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 897
|
||||||
|
* First 512 sequences: 0.9970385677528184
|
||||||
|
* Next 512 sequences (512-1024): 0.0029614322471815486
|
||||||
|
* Rest: 4.597017211338539e-17
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 SpanishLangModel[] =
|
||||||
|
{
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,2,3,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,2,2,3,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,3,3,0,0,2,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,0,3,2,2,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,2,0,2,2,0,
|
||||||
|
3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,2,3,3,3,3,2,3,2,2,3,3,2,0,0,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,2,3,2,3,3,0,3,2,2,3,3,0,0,0,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,2,3,0,3,3,2,3,0,2,3,3,3,0,0,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,2,0,
|
||||||
|
3,3,3,3,3,3,2,2,2,2,2,3,3,3,3,2,2,3,0,3,2,0,3,2,0,3,3,2,2,0,3,2,2,
|
||||||
|
3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,0,2,2,2,3,3,0,3,2,0,3,3,2,0,0,3,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,3,2,2,3,3,0,3,2,2,0,0,2,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,2,3,3,0,3,2,2,2,3,2,0,0,3,2,3,
|
||||||
|
3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,2,3,2,0,3,0,0,3,2,0,2,2,2,0,0,3,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,2,2,2,3,2,2,2,2,2,2,0,3,2,0,0,2,2,2,2,2,0,0,2,2,0,
|
||||||
|
3,3,3,2,2,3,2,2,2,0,2,3,0,2,0,2,2,2,2,3,0,0,3,0,0,2,3,2,0,0,0,0,0,
|
||||||
|
0,0,0,3,3,0,3,3,3,3,3,0,3,3,2,3,2,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
|
||||||
|
3,3,3,3,2,3,3,3,3,3,2,3,3,0,2,0,2,3,2,2,2,0,3,2,2,2,3,0,2,0,2,2,2,
|
||||||
|
2,3,2,0,2,2,0,2,2,2,0,3,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,0,2,2,3,3,3,2,3,2,3,3,3,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,0,3,2,2,2,2,0,3,2,2,0,0,0,0,0,3,0,0,2,2,0,2,3,0,0,0,2,0,2,
|
||||||
|
3,3,3,2,0,3,2,0,2,2,2,3,2,2,2,3,0,2,0,3,2,3,2,0,3,3,2,2,0,0,2,0,0,
|
||||||
|
2,0,0,3,3,2,3,3,2,3,3,2,3,3,2,3,3,2,2,0,2,2,0,2,2,0,0,0,2,2,0,0,0,
|
||||||
|
2,3,2,3,3,2,3,3,3,3,3,2,2,3,2,3,2,2,2,0,0,0,0,2,0,0,0,0,3,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,2,2,2,3,3,0,2,2,2,3,2,0,2,0,2,0,0,0,0,2,0,0,2,2,0,
|
||||||
|
3,3,3,2,2,3,2,2,2,3,3,3,2,3,2,0,2,2,3,2,2,2,0,2,0,2,2,2,3,0,0,2,0,
|
||||||
|
3,3,3,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,3,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
2,3,2,3,3,0,2,3,2,3,2,0,3,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,0,2,0,0,0,
|
||||||
|
3,3,3,3,2,3,2,2,2,2,2,2,0,0,2,0,2,2,0,0,2,0,0,2,0,2,0,2,0,0,0,2,0,
|
||||||
|
3,0,0,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_1SpanishModel =
|
||||||
|
{
|
||||||
|
Iso_8859_1_CharToOrderMap,
|
||||||
|
SpanishLangModel,
|
||||||
|
33,
|
||||||
|
(float)0.9970385677528184,
|
||||||
|
PR_TRUE,
|
||||||
|
"ISO-8859-1"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_15SpanishModel =
|
||||||
|
{
|
||||||
|
Iso_8859_15_CharToOrderMap,
|
||||||
|
SpanishLangModel,
|
||||||
|
33,
|
||||||
|
(float)0.9970385677528184,
|
||||||
|
PR_TRUE,
|
||||||
|
"ISO-8859-15"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Windows_1252SpanishModel =
|
||||||
|
{
|
||||||
|
Windows_1252_CharToOrderMap,
|
||||||
|
SpanishLangModel,
|
||||||
|
33,
|
||||||
|
(float)0.9970385677528184,
|
||||||
|
PR_TRUE,
|
||||||
|
"WINDOWS-1252"
|
||||||
|
};
|
265
PowerEditor/src/uchardet/LangModels/LangThaiModel.cpp
Normal file
265
PowerEditor/src/uchardet/LangModels/LangThaiModel.cpp
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Thai *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2015-12-04 03:05:06.182099
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Tis_620_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 66, 70, 67, 80, 78, 87, 85, 73, 79, 93, 88, 84, 68, 77, 81, /* 4X */
|
||||||
|
75,101, 74, 61, 71, 86, 96, 90,103,100, 99,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 35, 64, 48, 52, 32, 60, 65, 54, 36, 97, 76, 46, 56, 41, 40, /* 6X */
|
||||||
|
59,104, 43, 45, 44, 55, 72, 82, 94, 57, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
ILL, 3, 23,105, 15,106, 89, 5, 21, 63, 26, 31,102, 42, 69, 58, /* AX */
|
||||||
|
49, 91, 83, 34, 9, 17, 30, 12, 39, 1, 16, 19, 33, 62, 22, 47, /* BX */
|
||||||
|
38, 7, 10, 2, 50, 11,107, 8, 28, 37, 13, 18, 98, 4, 53, 95, /* CX */
|
||||||
|
14,SYM, 0, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */
|
||||||
|
6, 20, 27, 24, 25,108, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,109, /* EX */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,110,111,ILL,ILL,ILL,ILL, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Iso_8859_11_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 66, 70, 67, 80, 78, 87, 85, 73, 79, 93, 88, 84, 68, 77, 81, /* 4X */
|
||||||
|
75,101, 74, 61, 71, 86, 96, 90,103,100, 99,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 35, 64, 48, 52, 32, 60, 65, 54, 36, 97, 76, 46, 56, 41, 40, /* 6X */
|
||||||
|
59,104, 43, 45, 44, 55, 72, 82, 94, 57, 92,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM, 3, 23,112, 15,113, 89, 5, 21, 63, 26, 31,102, 42, 69, 58, /* AX */
|
||||||
|
49, 91, 83, 34, 9, 17, 30, 12, 39, 1, 16, 19, 33, 62, 22, 47, /* BX */
|
||||||
|
38, 7, 10, 2, 50, 11,114, 8, 28, 37, 13, 18, 98, 4, 53, 95, /* CX */
|
||||||
|
14,SYM, 0, 29,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,ILL,ILL,ILL,SYM, /* DX */
|
||||||
|
6, 20, 27, 24, 25,115, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,116, /* EX */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,117,118,ILL,ILL,ILL,ILL, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 2324
|
||||||
|
* First 512 sequences: 0.8815720594354438
|
||||||
|
* Next 512 sequences (512-1024): 0.0920860122682917
|
||||||
|
* Rest: 0.026341928296264486
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 ThaiLangModel[] =
|
||||||
|
{
|
||||||
|
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,
|
||||||
|
0,2,3,0,0,3,2,3,0,0,2,0,0,0,0,2,0,1,1,1,0,2,0,0,0,0,1,0,0,0,1,1,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,
|
||||||
|
0,3,0,0,0,1,3,3,0,0,1,0,0,0,0,2,0,2,1,2,0,1,0,0,0,0,0,0,0,0,2,1,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,1,3,2,
|
||||||
|
0,2,3,0,0,2,2,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,2,1,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,
|
||||||
|
0,2,1,0,0,3,2,1,0,0,0,0,0,0,0,1,0,3,3,1,0,1,0,0,0,0,3,0,0,0,1,1,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,3,3,2,2,1,2,2,2,
|
||||||
|
0,2,0,0,0,0,2,2,0,0,1,0,0,0,0,2,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,1,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,
|
||||||
|
0,3,0,0,0,1,2,2,0,0,1,0,0,0,0,2,0,1,1,2,0,2,0,0,0,0,0,0,0,0,2,1,
|
||||||
|
0,3,3,3,3,2,0,3,3,3,3,3,3,3,0,3,3,3,3,3,0,3,3,3,0,0,3,0,3,0,1,3,
|
||||||
|
0,2,0,0,0,2,2,2,0,0,0,0,0,0,0,3,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,3,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,1,0,2,1,
|
||||||
|
0,2,2,0,1,2,2,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,2,2,2,3,3,3,2,2,2,2,2,2,0,2,2,
|
||||||
|
0,1,2,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,3,1,0,1,0,0,0,0,0,0,0,0,1,1,
|
||||||
|
3,3,3,3,3,3,3,2,3,2,3,3,3,3,0,3,2,3,2,2,3,2,2,3,3,3,2,2,1,3,2,1,
|
||||||
|
0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,1,2,2,
|
||||||
|
0,2,0,0,0,0,3,1,0,0,1,0,0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1,
|
||||||
|
3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,2,2,2,2,1,3,2,2,2,2,1,3,1,2,
|
||||||
|
0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,
|
||||||
|
3,3,3,1,2,1,2,1,2,3,3,1,1,2,2,3,2,1,2,1,1,1,2,1,1,1,1,1,3,3,0,1,
|
||||||
|
0,0,0,0,0,1,1,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,2,3,2,2,2,2,3,3,3,2,2,1,1,1,2,2,1,2,1,3,3,2,
|
||||||
|
0,1,0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||||
|
0,3,3,3,3,1,3,3,3,3,3,2,3,3,0,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,2,2,
|
||||||
|
0,2,1,0,0,0,2,2,0,0,1,0,0,0,0,1,0,1,1,0,0,2,0,0,0,0,1,0,0,0,1,1,
|
||||||
|
3,3,3,1,3,2,2,3,3,2,2,3,1,1,2,2,1,2,1,2,1,3,1,1,1,1,1,2,0,3,0,1,
|
||||||
|
0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
3,3,3,3,3,1,3,2,3,3,2,3,3,3,1,3,3,3,3,3,3,2,2,2,3,3,2,2,2,2,2,2,
|
||||||
|
0,2,0,0,0,0,2,1,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,1,
|
||||||
|
3,3,3,3,3,1,2,1,2,1,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,1,1,2,1,3,3,1,
|
||||||
|
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
3,3,3,1,2,1,0,3,3,1,2,3,1,1,1,0,0,3,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,3,3,1,2,1,2,2,2,3,2,2,2,1,1,2,1,2,2,2,1,1,2,2,1,1,1,0,2,1,
|
||||||
|
0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,3,0,0,0,0,0,
|
||||||
|
0,3,3,3,3,1,0,3,2,2,2,3,3,3,0,3,3,3,3,3,0,1,2,2,0,0,1,0,0,0,3,3,
|
||||||
|
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
3,3,3,3,3,1,3,2,2,2,1,1,2,2,3,2,1,2,1,1,2,3,3,2,2,2,1,2,0,3,1,2,
|
||||||
|
0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||||
|
3,1,3,2,3,1,2,2,3,2,3,3,3,2,0,1,3,1,1,1,2,2,1,2,1,1,1,1,1,1,1,0,
|
||||||
|
0,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,1,1,3,0,1,1,2,1,2,1,2,1,0,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
|
||||||
|
0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,3,0,3,0,0,0,0,0,2,1,0,0,2,0,1,1,3,3,1,0,3,0,0,0,0,3,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,1,3,2,2,0,0,3,3,3,0,2,3,1,0,2,2,2,2,3,0,1,1,3,0,0,1,0,0,0,1,2,
|
||||||
|
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,1,2,3,1,2,2,2,1,2,2,2,2,1,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,
|
||||||
|
0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,3,3,2,3,0,0,2,1,3,2,3,3,1,0,3,2,3,1,2,0,2,2,1,0,0,1,0,1,0,1,2,
|
||||||
|
0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,
|
||||||
|
3,3,2,2,2,0,2,2,2,1,2,1,2,2,0,1,1,2,1,1,2,2,1,2,2,2,1,1,1,0,1,1,
|
||||||
|
0,0,0,0,0,2,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
0,3,3,3,2,2,3,2,2,2,1,3,2,2,0,3,2,2,3,1,3,1,2,2,3,2,1,2,1,0,2,1,
|
||||||
|
0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
3,2,1,1,2,1,2,2,2,1,1,2,2,1,1,1,2,1,1,1,2,1,1,1,2,1,1,1,1,0,1,0,
|
||||||
|
0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
3,3,1,1,3,2,2,1,1,1,1,2,1,0,1,1,1,2,0,1,1,0,0,0,0,1,1,1,0,0,0,1,
|
||||||
|
0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,2,2,0,0,0,2,3,0,3,2,3,3,0,2,0,0,0,2,0,1,2,2,1,0,2,2,1,0,0,
|
||||||
|
1,2,0,1,0,1,1,1,1,1,2,3,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,1,2,2,1,1,1,1,1,1,1,1,2,2,3,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,1,
|
||||||
|
0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,0,0,1,2,0,0,0,1,3,0,3,3,2,3,0,2,0,0,0,2,0,1,1,2,2,0,2,1,1,0,0,
|
||||||
|
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,3,1,0,0,0,3,3,0,2,3,3,2,0,3,0,0,0,2,0,1,1,2,0,0,1,1,0,0,0,
|
||||||
|
3,1,1,2,1,0,1,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1,1,
|
||||||
|
0,1,3,0,0,1,2,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,1,0,0,0,1,0,
|
||||||
|
3,0,2,1,1,0,0,1,0,0,1,0,2,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,1,3,1,2,1,1,2,1,1,1,0,1,1,0,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,
|
||||||
|
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,1,1,0,0,0,1,3,0,3,2,2,2,0,2,0,0,0,2,0,1,2,2,1,0,2,3,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,2,2,0,0,0,2,2,0,1,3,2,1,0,2,0,0,0,3,0,1,1,1,1,0,0,1,0,0,0,
|
||||||
|
3,1,1,1,1,0,2,1,1,0,0,1,2,1,0,1,1,1,2,1,1,1,1,1,2,1,2,1,1,0,1,1,
|
||||||
|
0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,3,3,0,0,0,2,2,0,2,2,2,1,0,2,0,0,0,2,0,1,1,1,2,0,1,1,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,2,3,0,0,0,2,1,0,2,2,2,1,0,1,0,0,0,1,0,3,2,1,2,0,1,1,0,0,0,
|
||||||
|
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,1,2,0,0,0,2,1,0,1,3,2,1,0,2,0,0,0,1,0,2,1,1,1,0,1,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,2,2,0,0,0,2,2,0,0,1,1,2,0,1,0,0,0,1,0,1,1,1,1,0,1,1,0,0,0,
|
||||||
|
1,1,3,2,2,0,2,1,1,1,1,2,1,1,0,1,1,2,1,0,1,1,1,1,1,1,1,1,0,0,0,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,2,2,0,0,0,2,0,0,1,2,1,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,
|
||||||
|
3,1,1,1,2,0,1,2,1,0,0,0,1,2,0,1,2,1,1,1,1,0,0,0,1,1,0,1,1,0,0,1,
|
||||||
|
0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,3,0,0,0,0,0,2,0,0,1,0,0,1,0,2,2,0,0,1,0,0,0,0,0,0,2,0,1,0,
|
||||||
|
0,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,1,1,0,1,1,1,0,0,0,1,0,0,1,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,2,2,0,0,0,2,0,0,1,0,1,1,0,1,0,0,0,1,0,1,1,1,2,0,0,2,0,0,0,
|
||||||
|
2,1,1,0,2,0,2,1,1,1,1,2,1,1,1,0,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,2,2,0,0,0,2,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,0,0,1,1,0,0,0,0,2,0,2,2,2,2,0,2,0,0,0,2,0,1,0,1,1,0,1,1,1,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,2,2,0,0,0,2,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,2,0,1,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,0,0,1,1,0,0,0,1,1,0,0,1,2,1,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,
|
||||||
|
1,0,1,2,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,1,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,2,1,0,0,0,2,0,0,2,1,1,2,0,0,0,0,0,0,0,2,1,1,2,0,1,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,1,2,0,0,0,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,2,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,0,0,1,1,0,0,0,1,0,0,0,2,0,0,0,2,0,0,0,0,0,1,1,1,1,0,1,1,1,0,0,
|
||||||
|
0,1,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,1,1,0,0,0,0,1,1,1,1,2,0,0,1,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Tis_620ThaiModel =
|
||||||
|
{
|
||||||
|
Tis_620_CharToOrderMap,
|
||||||
|
ThaiLangModel,
|
||||||
|
64,
|
||||||
|
(float)0.8815720594354438,
|
||||||
|
PR_FALSE,
|
||||||
|
"TIS-620"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_11ThaiModel =
|
||||||
|
{
|
||||||
|
Iso_8859_11_CharToOrderMap,
|
||||||
|
ThaiLangModel,
|
||||||
|
64,
|
||||||
|
(float)0.8815720594354438,
|
||||||
|
PR_FALSE,
|
||||||
|
"ISO-8859-11"
|
||||||
|
};
|
173
PowerEditor/src/uchardet/LangModels/LangTurkishModel.cpp
Normal file
173
PowerEditor/src/uchardet/LangModels/LangTurkishModel.cpp
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Turkish *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2015-12-04 02:24:44.730727
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Iso_8859_3_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 0, 15, 21, 7, 1, 26, 22, 19, 6, 28, 9, 5, 11, 3, 14, /* 4X */
|
||||||
|
23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 0, 15, 21, 7, 1, 26, 22, 19, 2, 28, 9, 5, 11, 3, 14, /* 6X */
|
||||||
|
23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM, 48,SYM,SYM,SYM,ILL, 49,SYM,SYM, 2, 17, 25, 50,SYM,ILL, 51, /* AX */
|
||||||
|
SYM, 52,SYM,SYM,SYM,SYM, 53,SYM,SYM, 6, 17, 25, 54,SYM,ILL, 55, /* BX */
|
||||||
|
41, 36, 30,ILL, 39, 56, 57, 24, 42, 33, 58, 45, 59, 37, 31, 60, /* CX */
|
||||||
|
ILL, 47, 61, 38, 62, 63, 27,SYM, 64, 65, 40, 35, 16, 66, 67, 68, /* DX */
|
||||||
|
41, 36, 30,ILL, 39, 69, 70, 24, 42, 33, 71, 45, 72, 37, 31, 73, /* EX */
|
||||||
|
ILL, 47, 74, 38, 75, 76, 27,SYM, 77, 78, 40, 35, 16, 79, 80,SYM, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Iso_8859_9_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 0, 15, 21, 7, 1, 26, 22, 19, 6, 28, 9, 5, 11, 3, 14, /* 4X */
|
||||||
|
23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 0, 15, 21, 7, 1, 26, 22, 19, 2, 28, 9, 5, 11, 3, 14, /* 6X */
|
||||||
|
23, 34, 4, 10, 8, 12, 20, 29, 32, 13, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
41, 36, 30, 44, 39, 82, 46, 24, 42, 33, 83, 45, 84, 37, 31, 85, /* CX */
|
||||||
|
25, 47, 86, 38, 87, 88, 27,SYM, 43, 89, 40, 35, 16, 2, 17, 90, /* DX */
|
||||||
|
41, 36, 30, 44, 39, 91, 46, 24, 42, 33, 92, 45, 93, 37, 31, 94, /* EX */
|
||||||
|
25, 47, 95, 38, 96, 97, 27,SYM, 43, 98, 40, 35, 16, 6, 17, 99, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 935
|
||||||
|
* First 512 sequences: 0.991865243864388
|
||||||
|
* Next 512 sequences (512-1024): 0.008134756135611957
|
||||||
|
* Rest: 2.949029909160572e-17
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 TurkishLangModel[] =
|
||||||
|
{
|
||||||
|
3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,0,
|
||||||
|
3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,2,0,3,0,2,0,
|
||||||
|
3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,2,2,2,0,2,0,2,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,2,2,2,2,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,2,2,2,2,2,2,2,
|
||||||
|
3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,2,3,0,3,2,2,2,2,3,0,2,2,2,
|
||||||
|
3,2,0,3,3,3,3,3,3,3,3,3,2,3,2,3,0,3,3,2,3,3,2,3,2,3,2,0,0,0,0,0,2,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,2,2,2,2,3,3,3,2,3,0,2,2,2,2,2,2,0,0,0,3,2,3,2,2,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,2,2,3,0,2,3,2,2,3,2,2,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,0,2,3,2,2,3,0,0,0,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,2,3,3,0,2,3,0,2,2,0,0,2,2,2,
|
||||||
|
3,3,3,2,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,0,3,2,3,2,0,2,2,0,2,3,2,2,2,2,2,
|
||||||
|
3,3,3,3,3,3,0,3,3,3,3,3,2,3,2,3,0,3,3,3,3,3,3,3,3,3,2,0,2,2,0,0,2,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,2,2,3,2,2,0,2,3,0,2,2,0,0,2,0,2,
|
||||||
|
3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,0,2,2,3,3,3,3,3,3,0,2,2,2,2,0,2,0,0,0,3,2,2,2,0,0,2,0,0,
|
||||||
|
2,2,2,3,3,3,0,3,3,3,3,3,0,3,2,3,0,3,3,3,3,3,2,3,3,3,3,0,2,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,0,2,3,3,2,3,3,2,3,3,2,2,3,3,2,0,2,2,2,2,2,3,0,2,2,0,0,2,2,0,0,0,0,
|
||||||
|
3,3,3,2,2,3,3,3,2,2,0,3,3,3,3,2,3,0,2,2,0,3,3,0,0,0,0,2,0,0,2,2,0,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,2,2,2,2,0,2,3,0,2,0,0,2,3,2,0,2,0,2,
|
||||||
|
3,3,3,2,3,3,2,2,0,2,3,2,3,3,3,2,2,2,2,2,3,2,2,0,0,0,2,0,0,0,2,2,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,2,3,3,2,2,3,2,3,2,3,0,2,3,0,2,0,0,0,0,0,2,0,0,2,0,0,2,2,2,
|
||||||
|
3,3,3,2,3,3,3,2,2,2,2,0,3,2,3,0,3,0,2,3,2,0,2,2,0,0,2,3,2,2,2,0,0,2,0,0,
|
||||||
|
3,3,3,0,3,3,3,2,3,2,3,3,3,2,3,2,2,0,2,3,0,2,2,3,2,0,2,0,0,2,2,0,2,2,0,0,
|
||||||
|
3,3,3,0,2,3,3,2,3,2,0,3,3,2,3,2,3,2,0,0,0,0,2,2,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,0,3,3,3,3,0,0,0,3,3,0,0,2,3,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,2,3,3,3,2,3,2,2,0,3,3,3,2,2,0,0,2,0,2,2,0,2,0,2,2,2,0,2,2,0,0,0,0,
|
||||||
|
0,0,0,3,3,3,0,3,3,3,3,3,0,3,0,2,0,2,3,2,2,0,0,2,3,3,2,0,2,0,0,0,0,0,0,0,
|
||||||
|
3,3,3,0,0,2,2,2,0,2,0,0,3,0,3,0,2,0,0,0,0,2,2,2,0,0,0,2,0,0,2,0,0,0,0,0,
|
||||||
|
3,3,3,2,2,2,0,0,0,2,2,2,2,2,3,2,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,
|
||||||
|
0,0,2,3,3,3,0,3,2,2,2,2,0,2,0,2,0,2,2,3,2,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,2,0,2,0,2,2,0,0,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,2,0,0,0,2,0,2,0,0,0,0,2,2,0,0,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,
|
||||||
|
2,0,2,2,2,2,0,2,2,0,2,2,2,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||||
|
2,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,2,2,0,2,0,0,2,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_3TurkishModel =
|
||||||
|
{
|
||||||
|
Iso_8859_3_CharToOrderMap,
|
||||||
|
TurkishLangModel,
|
||||||
|
36,
|
||||||
|
(float)0.991865243864388,
|
||||||
|
PR_FALSE,
|
||||||
|
"ISO-8859-3"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_9TurkishModel =
|
||||||
|
{
|
||||||
|
Iso_8859_9_CharToOrderMap,
|
||||||
|
TurkishLangModel,
|
||||||
|
36,
|
||||||
|
(float)0.991865243864388,
|
||||||
|
PR_FALSE,
|
||||||
|
"ISO-8859-9"
|
||||||
|
};
|
247
PowerEditor/src/uchardet/LangModels/LangVietnameseModel.cpp
Normal file
247
PowerEditor/src/uchardet/LangModels/LangVietnameseModel.cpp
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Vietnamese *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2016-02-13 03:42:06.561440
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Windows_1258_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */
|
||||||
|
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */
|
||||||
|
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
SYM,ILL,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,ILL, /* 8X */
|
||||||
|
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,102, /* 9X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,103,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
|
12, 15, 25, 51, 97,104, 98, 91, 90, 62, 27,105,SYM, 47,106,107, /* CX */
|
||||||
|
10,108,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,109, 96, 18,SYM, 99, /* DX */
|
||||||
|
12, 15, 25, 51, 97,110, 98, 91, 90, 62, 27,111,SYM, 47,112,113, /* EX */
|
||||||
|
10,114,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,115, 96, 18,116,117, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Viscii_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR, 88,CTR,CTR, 95, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR, 80,CTR,CTR,CTR,CTR, 79,CTR,CTR,CTR,CTR, 92,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */
|
||||||
|
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */
|
||||||
|
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
30, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* 8X */
|
||||||
|
53, 60, 84, 31, 37, 40, 38, 59, 42, 81, 44, 73, 35, 72, 48, 76, /* 9X */
|
||||||
|
86, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* AX */
|
||||||
|
53, 60, 84, 87, 46, 31, 38, 59, 42, 56, 52, 55, 70, 46, 40, 18, /* BX */
|
||||||
|
12, 15, 25, 61, 34, 51, 88, 95, 90, 62, 27, 85, 50, 47, 64, 76, /* CX */
|
||||||
|
10, 52, 63, 33, 29, 30, 80, 55, 70, 58, 67, 79, 92, 68, 87, 18, /* DX */
|
||||||
|
12, 15, 25, 61, 34, 51, 26, 77, 90, 62, 27, 85, 50, 47, 64, 73, /* EX */
|
||||||
|
10, 56, 63, 33, 29, 86, 81, 44, 48, 58, 67, 72, 35, 68, 37, 26, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 1494
|
||||||
|
* First 512 sequences: 0.9321889118082535
|
||||||
|
* Next 512 sequences (512-1024): 0.06092051479986333
|
||||||
|
* Rest: 0.0068905733918831966
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 VietnameseLangModel[] =
|
||||||
|
{
|
||||||
|
3,3,3,3,3,3,3,2,2,3,0,2,3,1,1,1,1,2,3,3,2,3,3,3,2,1,2,
|
||||||
|
3,0,3,2,2,2,3,1,0,1,1,2,0,0,1,0,1,0,2,2,1,0,0,0,3,0,0,2,
|
||||||
|
2,1,2,0,3,0,3,3,2,3,0,2,3,0,2,3,0,0,3,1,3,3,1,3,1,3,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,0,
|
||||||
|
2,3,2,2,3,1,3,3,1,3,1,3,3,2,2,3,2,0,3,2,2,3,1,3,0,3,0,
|
||||||
|
3,1,3,3,3,3,2,3,2,0,0,2,1,2,2,2,2,0,0,1,3,2,3,2,2,2,2,0,
|
||||||
|
2,3,2,2,3,0,3,3,2,3,0,2,2,1,2,3,1,1,2,2,2,3,1,0,2,2,0,
|
||||||
|
0,0,3,2,3,2,3,3,3,1,1,2,0,0,2,0,3,0,0,2,0,2,2,0,2,3,1,1,
|
||||||
|
3,1,3,3,3,3,3,2,3,3,1,3,2,2,3,3,2,2,0,3,1,3,3,3,2,0,3,
|
||||||
|
3,3,1,0,0,3,1,3,0,2,0,2,3,3,2,0,0,2,3,0,0,0,1,0,1,0,0,2,
|
||||||
|
2,3,2,2,3,1,3,3,1,3,0,3,3,0,2,2,0,1,3,2,2,3,1,1,1,2,3,
|
||||||
|
0,0,3,3,1,2,2,0,1,0,2,2,0,0,1,1,3,3,0,0,0,1,1,2,1,0,3,0,
|
||||||
|
3,2,3,3,3,2,2,3,3,3,0,3,0,2,3,0,2,3,0,3,3,2,3,0,2,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,
|
||||||
|
3,1,3,2,3,2,3,1,3,2,0,3,1,2,3,2,2,2,0,3,3,3,2,2,2,3,0,
|
||||||
|
2,1,3,1,3,3,0,2,0,0,0,1,0,1,3,0,3,0,0,2,2,0,3,0,2,0,3,1,
|
||||||
|
2,1,0,2,3,0,3,3,2,3,0,0,3,0,2,3,2,2,3,2,2,3,2,0,0,1,0,
|
||||||
|
0,2,3,3,3,2,2,1,0,0,0,2,0,3,3,0,1,2,2,0,0,3,2,2,1,2,1,1,
|
||||||
|
3,2,3,2,3,2,3,3,3,2,0,3,3,2,3,3,2,3,0,3,2,2,3,0,2,0,0,
|
||||||
|
0,0,0,3,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,
|
||||||
|
0,0,0,0,3,0,3,2,0,3,0,1,3,0,0,3,0,1,3,0,0,1,0,3,0,3,0,
|
||||||
|
2,3,3,3,3,3,3,3,2,0,1,3,3,1,3,3,3,3,3,2,2,0,1,2,2,3,3,0,
|
||||||
|
3,2,3,2,3,2,3,3,2,3,0,3,2,2,3,2,1,2,3,3,3,3,3,0,2,1,2,
|
||||||
|
3,1,2,2,3,2,0,2,0,0,2,2,1,0,3,3,2,3,0,1,2,2,2,3,3,1,2,0,
|
||||||
|
3,0,0,0,3,0,0,2,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,0,3,0,3,3,0,2,0,1,3,0,1,1,0,0,2,1,1,3,1,1,0,2,1,
|
||||||
|
2,1,2,1,0,1,0,0,0,0,2,1,0,3,2,3,3,1,3,0,3,2,3,3,3,0,0,0,
|
||||||
|
0,2,2,1,3,2,3,3,2,3,0,0,3,2,3,2,2,2,3,2,2,3,2,1,1,2,1,
|
||||||
|
3,2,2,3,3,2,1,0,0,0,3,2,0,3,2,3,2,1,0,1,2,2,3,0,2,0,0,1,
|
||||||
|
3,0,3,3,3,1,0,2,3,3,0,1,0,0,1,0,3,0,0,1,3,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,3,2,0,3,0,3,2,1,3,0,3,0,0,2,0,2,1,0,2,2,3,1,0,0,0,0,
|
||||||
|
2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
2,1,0,2,3,1,3,3,0,3,0,3,3,0,3,3,0,3,1,2,2,3,1,1,1,0,0,
|
||||||
|
2,1,0,2,3,3,2,3,0,0,0,1,0,2,2,3,2,0,1,0,2,1,2,3,0,2,3,0,
|
||||||
|
3,0,1,1,2,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,3,3,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||||
|
1,3,3,3,3,1,3,3,2,3,0,1,2,0,2,3,2,2,2,3,2,3,2,0,2,2,0,
|
||||||
|
0,0,2,1,0,3,2,2,0,1,1,1,1,1,1,0,0,0,0,2,0,1,0,0,1,2,1,0,
|
||||||
|
2,0,1,2,1,0,2,2,1,2,0,2,0,0,1,1,2,1,0,2,0,2,1,3,1,0,0,
|
||||||
|
3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,2,2,2,3,2,3,3,0,3,0,2,3,1,2,2,0,3,2,3,3,0,2,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
1,1,1,2,3,1,3,3,0,3,0,3,3,1,2,1,0,0,3,2,2,3,2,0,1,3,1,
|
||||||
|
1,0,0,3,1,1,1,0,0,0,0,1,0,0,3,3,2,1,0,1,0,3,2,1,1,2,1,0,
|
||||||
|
3,0,3,2,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,3,1,0,3,1,3,2,0,2,0,2,0,1,2,0,0,1,0,2,2,2,0,3,1,0,0,
|
||||||
|
2,0,1,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,3,0,0,2,0,0,0,1,
|
||||||
|
3,0,1,1,0,0,0,3,3,0,0,0,0,0,1,0,1,0,0,0,3,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,1,0,0,0,3,3,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,2,3,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,
|
||||||
|
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,3,3,0,0,0,2,3,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,2,3,0,3,0,2,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,3,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,3,3,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,0,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,3,0,0,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,3,3,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,1,3,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,3,0,0,2,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,3,1,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,1,0,0,0,2,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,1,1,0,0,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,3,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,1,2,1,2,0,3,3,0,1,0,0,0,2,0,3,1,2,2,0,1,3,0,2,0,2,0,
|
||||||
|
2,0,2,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,1,2,0,0,1,1,2,0,2,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Windows_1258VietnameseModel =
|
||||||
|
{
|
||||||
|
Windows_1258_CharToOrderMap,
|
||||||
|
VietnameseLangModel,
|
||||||
|
55,
|
||||||
|
(float)0.9321889118082535,
|
||||||
|
PR_FALSE,
|
||||||
|
"WINDOWS-1258"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel VisciiVietnameseModel =
|
||||||
|
{
|
||||||
|
Viscii_CharToOrderMap,
|
||||||
|
VietnameseLangModel,
|
||||||
|
55,
|
||||||
|
(float)0.9321889118082535,
|
||||||
|
PR_FALSE,
|
||||||
|
"VISCII"
|
||||||
|
};
|
@ -1,220 +0,0 @@
|
|||||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
||||||
/* ***** BEGIN LICENSE BLOCK *****
|
|
||||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
||||||
*
|
|
||||||
* The contents of this file are subject to the Mozilla Public License Version
|
|
||||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*
|
|
||||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
||||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
||||||
* for the specific language governing rights and limitations under the
|
|
||||||
* License.
|
|
||||||
*
|
|
||||||
* The Original Code is Mozilla Communicator client code.
|
|
||||||
*
|
|
||||||
* The Initial Developer of the Original Code is
|
|
||||||
* Netscape Communications Corporation.
|
|
||||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
||||||
* the Initial Developer. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Contributor(s):
|
|
||||||
*
|
|
||||||
* Alternatively, the contents of this file may be used under the terms of
|
|
||||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
||||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
||||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
||||||
* of those above. If you wish to allow use of your version of this file only
|
|
||||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
||||||
* use your version of this file under the terms of the MPL, indicate your
|
|
||||||
* decision by deleting the provisions above and replace them with the notice
|
|
||||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
||||||
* the provisions above, a recipient may use your version of this file under
|
|
||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
||||||
*
|
|
||||||
* ***** END LICENSE BLOCK ***** */
|
|
||||||
|
|
||||||
#include "nsSBCharSetProber.h"
|
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************
|
|
||||||
255: Control characters that usually does not exist in any text
|
|
||||||
254: Carriage/Return
|
|
||||||
253: symbol (punctuation) that does not belong to word
|
|
||||||
252: 0 - 9
|
|
||||||
|
|
||||||
*****************************************************************/
|
|
||||||
|
|
||||||
//The following result for thai was collected from a limited sample (1M).
|
|
||||||
|
|
||||||
//Character Mapping Table:
|
|
||||||
static const unsigned char TIS620CharToOrderMap[] =
|
|
||||||
{
|
|
||||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
|
|
||||||
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
|
|
||||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
|
|
||||||
253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, //40
|
|
||||||
188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, //50
|
|
||||||
253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, //60
|
|
||||||
96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, //70
|
|
||||||
209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
|
|
||||||
223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
|
|
||||||
236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
|
|
||||||
49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
|
|
||||||
45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
|
|
||||||
22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
|
|
||||||
11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
|
|
||||||
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Model Table:
|
|
||||||
//total sequences: 100%
|
|
||||||
//first 512 sequences: 92.6386%
|
|
||||||
//first 1024 sequences:7.3177%
|
|
||||||
//rest sequences: 1.0230%
|
|
||||||
//negative sequences: 0.0436%
|
|
||||||
static const PRUint8 ThaiLangModel[] =
|
|
||||||
{
|
|
||||||
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
|
|
||||||
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
|
|
||||||
3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
|
|
||||||
0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
|
||||||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
|
|
||||||
3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
|
||||||
3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
|
|
||||||
3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
|
|
||||||
3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
|
|
||||||
3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
|
|
||||||
2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
|
|
||||||
3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
|
|
||||||
0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
|
|
||||||
3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
|
|
||||||
0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
|
|
||||||
1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
|
|
||||||
3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
|
|
||||||
3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
|
|
||||||
1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
|
|
||||||
0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
|
||||||
2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
|
|
||||||
0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
|
|
||||||
3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
|
|
||||||
2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
|
|
||||||
0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
|
|
||||||
3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
|
||||||
3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
|
|
||||||
2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
|
||||||
3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
|
|
||||||
2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
|
|
||||||
3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
|
|
||||||
3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
|
|
||||||
3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
|
|
||||||
3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
|
||||||
3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
|
|
||||||
3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
|
|
||||||
1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
|
|
||||||
0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
|
||||||
3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
|
|
||||||
0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
|
|
||||||
3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
|
|
||||||
3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
|
|
||||||
1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
|
|
||||||
3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
|
|
||||||
3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
|
|
||||||
0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
|
|
||||||
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
|
|
||||||
1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
|
|
||||||
1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
|
|
||||||
3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
|
|
||||||
0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
|
|
||||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
|
|
||||||
3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
|
|
||||||
0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
|
|
||||||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
|
|
||||||
0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
|
|
||||||
0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
|
|
||||||
0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
|
|
||||||
0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
|
|
||||||
0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
|
|
||||||
0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
|
|
||||||
2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
|
|
||||||
0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
|
|
||||||
3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
|
|
||||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
|
||||||
2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
|
|
||||||
1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
|
|
||||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
|
|
||||||
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
|
||||||
1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
|
|
||||||
2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
|
|
||||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
|
|
||||||
1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
const SequenceModel TIS620ThaiModel(
|
|
||||||
TIS620CharToOrderMap,
|
|
||||||
ThaiLangModel,
|
|
||||||
(float)0.926386,
|
|
||||||
PR_FALSE,
|
|
||||||
"TIS-620"
|
|
||||||
);
|
|
@ -1,10 +0,0 @@
|
|||||||
Uchardet is a C language binding of the original C++ implementation of the universal charset detection library by Mozilla.
|
|
||||||
The source code of universalchardet is available at https://github.com/BYVoid/uchardet
|
|
||||||
|
|
||||||
uchardet is an encoding detector library, which takes a sequence of bytes in an unknown character encoding without any additional information, and attempts to determine the encoding of the text.
|
|
||||||
|
|
||||||
The original code of universalchardet is available at http://lxr.mozilla.org/seamonkey/source/extensions/universalchardet/
|
|
||||||
|
|
||||||
Techniques used by universalchardet are described at http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
|
||||||
|
|
||||||
Uchardet is licensed under Mozilla Public License Version 1.1 (http://www.mozilla.org/MPL/1.1/)
|
|
294
PowerEditor/src/uchardet/README.md
Normal file
294
PowerEditor/src/uchardet/README.md
Normal file
@ -0,0 +1,294 @@
|
|||||||
|
# uchardet
|
||||||
|
|
||||||
|
[uchardet](https://www.freedesktop.org/wiki/Software/uchardet/) is an encoding detector library, which takes a sequence of bytes in an unknown character encoding without any additional information, and attempts to determine the encoding of the text. Returned encoding names are [iconv](https://www.gnu.org/software/libiconv/)-compatible.
|
||||||
|
|
||||||
|
uchardet started as a C language binding of the original C++ implementation of the universal charset detection library by Mozilla. It can now detect more charsets, and more reliably than the original implementation.
|
||||||
|
|
||||||
|
The original code of universalchardet is available at http://lxr.mozilla.org/seamonkey/source/extensions/universalchardet/
|
||||||
|
|
||||||
|
Techniques used by universalchardet are described at http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
||||||
|
|
||||||
|
## Supported Languages/Encodings
|
||||||
|
|
||||||
|
* International (Unicode)
|
||||||
|
* UTF-8
|
||||||
|
* UTF-16BE / UTF-16LE
|
||||||
|
* UTF-32BE / UTF-32LE / X-ISO-10646-UCS-4-34121 / X-ISO-10646-UCS-4-21431
|
||||||
|
* Arabic
|
||||||
|
* ISO-8859-6
|
||||||
|
* WINDOWS-1256
|
||||||
|
* Bulgarian
|
||||||
|
* ISO-8859-5
|
||||||
|
* WINDOWS-1251
|
||||||
|
* Chinese
|
||||||
|
* ISO-2022-CN
|
||||||
|
* BIG5
|
||||||
|
* EUC-TW
|
||||||
|
* GB18030
|
||||||
|
* HZ-GB-2312
|
||||||
|
* Croatian:
|
||||||
|
* ISO-8859-2
|
||||||
|
* ISO-8859-13
|
||||||
|
* ISO-8859-16
|
||||||
|
* Windows-1250
|
||||||
|
* IBM852
|
||||||
|
* MAC-CENTRALEUROPE
|
||||||
|
* Czech
|
||||||
|
* Windows-1250
|
||||||
|
* ISO-8859-2
|
||||||
|
* IBM852
|
||||||
|
* MAC-CENTRALEUROPE
|
||||||
|
* Danish
|
||||||
|
* ISO-8859-1
|
||||||
|
* ISO-8859-15
|
||||||
|
* WINDOWS-1252
|
||||||
|
* English
|
||||||
|
* ASCII
|
||||||
|
* Esperanto
|
||||||
|
* ISO-8859-3
|
||||||
|
* Estonian
|
||||||
|
* ISO-8859-4
|
||||||
|
* ISO-8859-13
|
||||||
|
* ISO-8859-13
|
||||||
|
* Windows-1252
|
||||||
|
* Windows-1257
|
||||||
|
* Finnish
|
||||||
|
* ISO-8859-1
|
||||||
|
* ISO-8859-4
|
||||||
|
* ISO-8859-9
|
||||||
|
* ISO-8859-13
|
||||||
|
* ISO-8859-15
|
||||||
|
* WINDOWS-1252
|
||||||
|
* French
|
||||||
|
* ISO-8859-1
|
||||||
|
* ISO-8859-15
|
||||||
|
* WINDOWS-1252
|
||||||
|
* German
|
||||||
|
* ISO-8859-1
|
||||||
|
* WINDOWS-1252
|
||||||
|
* Greek
|
||||||
|
* ISO-8859-7
|
||||||
|
* WINDOWS-1253
|
||||||
|
* Hebrew
|
||||||
|
* ISO-8859-8
|
||||||
|
* WINDOWS-1255
|
||||||
|
* Hungarian:
|
||||||
|
* ISO-8859-2
|
||||||
|
* WINDOWS-1250
|
||||||
|
* Irish Gaelic
|
||||||
|
* ISO-8859-1
|
||||||
|
* ISO-8859-9
|
||||||
|
* ISO-8859-15
|
||||||
|
* WINDOWS-1252
|
||||||
|
* Italian
|
||||||
|
* ISO-8859-1
|
||||||
|
* ISO-8859-3
|
||||||
|
* ISO-8859-9
|
||||||
|
* ISO-8859-15
|
||||||
|
* WINDOWS-1252
|
||||||
|
* Japanese
|
||||||
|
* ISO-2022-JP
|
||||||
|
* SHIFT_JIS
|
||||||
|
* EUC-JP
|
||||||
|
* Korean
|
||||||
|
* ISO-2022-KR
|
||||||
|
* EUC-KR / UHC
|
||||||
|
* Lithuanian
|
||||||
|
* ISO-8859-4
|
||||||
|
* ISO-8859-10
|
||||||
|
* ISO-8859-13
|
||||||
|
* Latvian
|
||||||
|
* ISO-8859-4
|
||||||
|
* ISO-8859-10
|
||||||
|
* ISO-8859-13
|
||||||
|
* Maltese
|
||||||
|
* ISO-8859-3
|
||||||
|
* Polish:
|
||||||
|
* ISO-8859-2
|
||||||
|
* ISO-8859-13
|
||||||
|
* ISO-8859-16
|
||||||
|
* Windows-1250
|
||||||
|
* IBM852
|
||||||
|
* MAC-CENTRALEUROPE
|
||||||
|
* Portuguese
|
||||||
|
* ISO-8859-1
|
||||||
|
* ISO-8859-9
|
||||||
|
* ISO-8859-15
|
||||||
|
* WINDOWS-1252
|
||||||
|
* Romanian:
|
||||||
|
* ISO-8859-2
|
||||||
|
* ISO-8859-16
|
||||||
|
* Windows-1250
|
||||||
|
* IBM852
|
||||||
|
* Russian
|
||||||
|
* ISO-8859-5
|
||||||
|
* KOI8-R
|
||||||
|
* WINDOWS-1251
|
||||||
|
* MAC-CYRILLIC
|
||||||
|
* IBM866
|
||||||
|
* IBM855
|
||||||
|
* Slovak
|
||||||
|
* Windows-1250
|
||||||
|
* ISO-8859-2
|
||||||
|
* IBM852
|
||||||
|
* MAC-CENTRALEUROPE
|
||||||
|
* Slovene
|
||||||
|
* ISO-8859-2
|
||||||
|
* ISO-8859-16
|
||||||
|
* Windows-1250
|
||||||
|
* IBM852
|
||||||
|
* MAC-CENTRALEUROPE
|
||||||
|
* Spanish
|
||||||
|
* ISO-8859-1
|
||||||
|
* ISO-8859-15
|
||||||
|
* WINDOWS-1252
|
||||||
|
* Swedish
|
||||||
|
* ISO-8859-1
|
||||||
|
* ISO-8859-4
|
||||||
|
* ISO-8859-9
|
||||||
|
* ISO-8859-15
|
||||||
|
* WINDOWS-1252
|
||||||
|
* Thai
|
||||||
|
* TIS-620
|
||||||
|
* ISO-8859-11
|
||||||
|
* Turkish:
|
||||||
|
* ISO-8859-3
|
||||||
|
* ISO-8859-9
|
||||||
|
* Vietnamese:
|
||||||
|
* VISCII
|
||||||
|
* Windows-1258
|
||||||
|
* Others
|
||||||
|
* WINDOWS-1252
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
### Debian/Ubuntu/Mint
|
||||||
|
|
||||||
|
apt-get install uchardet libuchardet-dev
|
||||||
|
|
||||||
|
### Mageia
|
||||||
|
|
||||||
|
urpmi libuchardet libuchardet-devel
|
||||||
|
|
||||||
|
### Fedora
|
||||||
|
|
||||||
|
dnf install uchardet uchardet-devel
|
||||||
|
|
||||||
|
### Gentoo
|
||||||
|
|
||||||
|
emerge uchardet
|
||||||
|
|
||||||
|
### Mac
|
||||||
|
|
||||||
|
brew install uchardet
|
||||||
|
|
||||||
|
### Windows
|
||||||
|
|
||||||
|
Binary packages are provided in Fedora repository. There may exist other
|
||||||
|
pre-built packages but I am not aware of them.
|
||||||
|
Nevertheless the library is very easily and quickly compilable under
|
||||||
|
Windows as well, so finding a binary package is not necessary.
|
||||||
|
Some did it successfully with the [CMake Windows
|
||||||
|
installer](https://cmake.org/download/) and MinGW. It should be possible
|
||||||
|
to use MinGW-w64 instead of MinGW, in particular to build both 32 and
|
||||||
|
64-bit DLL libraries).
|
||||||
|
|
||||||
|
Note also that it is very easily cross-buildable (for instance from a
|
||||||
|
GNU/Linux machine).
|
||||||
|
|
||||||
|
### Build from source
|
||||||
|
|
||||||
|
Releases are available from:
|
||||||
|
https://www.freedesktop.org/software/uchardet/releases/
|
||||||
|
|
||||||
|
If you prefer a development version, clone the git repository:
|
||||||
|
|
||||||
|
git clone git://anongit.freedesktop.org/uchardet/uchardet
|
||||||
|
|
||||||
|
The source can be browsed at: https://cgit.freedesktop.org/uchardet/uchardet/
|
||||||
|
|
||||||
|
cmake .
|
||||||
|
make
|
||||||
|
make install
|
||||||
|
|
||||||
|
### Build with flatpak-builder
|
||||||
|
|
||||||
|
Here is a working "module" section to include in your Flatpak's json manifest:
|
||||||
|
|
||||||
|
```
|
||||||
|
"modules": [
|
||||||
|
{
|
||||||
|
"name": "uchardet",
|
||||||
|
"buildsystem": "cmake",
|
||||||
|
"builddir": true,
|
||||||
|
"config-opts": [ "-DCMAKE_INSTALL_LIBDIR=lib" ],
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
...
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Command Line
|
||||||
|
|
||||||
|
```
|
||||||
|
uchardet Command Line Tool
|
||||||
|
Version 0.0.6
|
||||||
|
|
||||||
|
Authors: BYVoid, Jehan
|
||||||
|
Bug Report: https://bugs.freedesktop.org/enter_bug.cgi?product=uchardet
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
uchardet [Options] [File]...
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-v, --version Print version and build information.
|
||||||
|
-h, --help Print this help.
|
||||||
|
```
|
||||||
|
|
||||||
|
### Library
|
||||||
|
|
||||||
|
See [uchardet.h](https://cgit.freedesktop.org/uchardet/uchardet/tree/src/uchardet.h)
|
||||||
|
|
||||||
|
## Related Projects
|
||||||
|
|
||||||
|
* [python-chardet](https://github.com/chardet/chardet) Python port
|
||||||
|
* [ruby-rchardet](http://rubyforge.org/projects/chardet/) Ruby port
|
||||||
|
* [juniversalchardet](http://code.google.com/p/juniversalchardet/) Java port of universalchardet
|
||||||
|
* [jchardet](http://jchardet.sourceforge.net/) Java port of chardet
|
||||||
|
* [nuniversalchardet](http://code.google.com/p/nuniversalchardet/) C# port of universalchardet
|
||||||
|
* [nchardet](http://www.conceptdevelopment.net/Localization/NCharDet/) C# port of chardet
|
||||||
|
* [uchardet-enhanced](https://bitbucket.org/medoc/uchardet-enhanced) A fork of mozilla universalchardet
|
||||||
|
* [rust-uchardet](https://github.com/emk/rust-uchardet) Rust language binding of uchardet
|
||||||
|
* [libchardet](https://ftp.oops.org/pub/oops/libchardet/) Another C/C++ API wrapping Mozilla code.
|
||||||
|
|
||||||
|
## Used by
|
||||||
|
|
||||||
|
* [mpv](https://mpv.io/) for subtitle detection
|
||||||
|
* [Tepl](https://wiki.gnome.org/Projects/Tepl)
|
||||||
|
* [Nextcloud IOS app](https://github.com/nextcloud/ios)
|
||||||
|
* …
|
||||||
|
|
||||||
|
## Licenses
|
||||||
|
|
||||||
|
* [Mozilla Public License Version 1.1](http://www.mozilla.org/MPL/1.1/)
|
||||||
|
* [GNU General Public License, version 2.0](http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) or later.
|
||||||
|
* [GNU Lesser General Public License, version 2.1](http://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html) or later.
|
||||||
|
|
||||||
|
See the file `COPYING` for the complete text of these 3 licenses.
|
||||||
|
|
||||||
|
## Code of Conduct
|
||||||
|
|
||||||
|
The `uchardet` project is hosted by [freedesktop.org](https://www.freedesktop.org/)
|
||||||
|
and as such follows its code of conduct. In other words, it means we
|
||||||
|
will treat anyone with respect and expect anyone to do the same.
|
||||||
|
|
||||||
|
Please read [freedesktop.org Code of Conduct](https://www.freedesktop.org/wiki/CodeOfConduct).
|
||||||
|
|
||||||
|
In case of any problem regarding abusive behavior in uchardet project,
|
||||||
|
please contact the maintainer (Jehan) or create a bug report (possibly
|
||||||
|
private if needed).
|
@ -50,7 +50,7 @@ public:
|
|||||||
Reset();}
|
Reset();}
|
||||||
virtual ~nsBig5Prober(void){delete mCodingSM;}
|
virtual ~nsBig5Prober(void){delete mCodingSM;}
|
||||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||||
const char* GetCharSetName() {return "Big5";}
|
const char* GetCharSetName() {return "BIG5";}
|
||||||
nsProbingState GetState(void) {return mState;}
|
nsProbingState GetState(void) {return mState;}
|
||||||
void Reset(void);
|
void Reset(void);
|
||||||
float GetConfidence(void);
|
float GetConfidence(void);
|
||||||
|
@ -74,9 +74,7 @@ PRBool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf, PRUint32 a
|
|||||||
if (meetMSB && curPtr > prevPtr)
|
if (meetMSB && curPtr > prevPtr)
|
||||||
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
while (prevPtr < curPtr) *newptr++ = *prevPtr++;
|
||||||
|
|
||||||
auto np = reinterpret_cast<uintptr_t>(newptr);
|
newLen = static_cast<PRUint32>(newptr - *newBuf);
|
||||||
auto nb = reinterpret_cast<uintptr_t>(*newBuf);
|
|
||||||
newLen = static_cast<PRUint32>(np - nb);
|
|
||||||
|
|
||||||
return PR_TRUE;
|
return PR_TRUE;
|
||||||
}
|
}
|
||||||
@ -121,9 +119,7 @@ PRBool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, PRUint32 aLen
|
|||||||
while (prevPtr < curPtr)
|
while (prevPtr < curPtr)
|
||||||
*newptr++ = *prevPtr++;
|
*newptr++ = *prevPtr++;
|
||||||
|
|
||||||
auto np = reinterpret_cast<uintptr_t>(newptr);
|
newLen = static_cast<PRUint32>(newptr - *newBuf);
|
||||||
auto nb = reinterpret_cast<uintptr_t>(*newBuf);
|
|
||||||
newLen = static_cast<PRUint32>(np - nb);
|
|
||||||
|
|
||||||
return PR_TRUE;
|
return PR_TRUE;
|
||||||
}
|
}
|
||||||
|
@ -48,17 +48,14 @@ typedef enum {
|
|||||||
#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
|
#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
|
||||||
|
|
||||||
//state machine model
|
//state machine model
|
||||||
struct SMModel
|
typedef struct
|
||||||
{
|
{
|
||||||
nsPkgInt classTable;
|
nsPkgInt classTable;
|
||||||
PRUint32 classFactor;
|
PRUint32 classFactor;
|
||||||
nsPkgInt stateTable;
|
nsPkgInt stateTable;
|
||||||
const PRUint32* charLenTable;
|
const PRUint32* charLenTable;
|
||||||
const char* name;
|
const char* name;
|
||||||
SMModel(){};
|
} SMModel;
|
||||||
SMModel(nsPkgInt a,PRUint32 b,nsPkgInt c,const PRUint32* d, const char* e):
|
|
||||||
classTable(a), classFactor(b), stateTable(c), charLenTable(d), name(e){};
|
|
||||||
} ;
|
|
||||||
|
|
||||||
class nsCodingStateMachine {
|
class nsCodingStateMachine {
|
||||||
public:
|
public:
|
||||||
|
@ -50,7 +50,7 @@ public:
|
|||||||
Reset();}
|
Reset();}
|
||||||
virtual ~nsEUCTWProber(void){delete mCodingSM;}
|
virtual ~nsEUCTWProber(void){delete mCodingSM;}
|
||||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||||
const char* GetCharSetName() {return "x-euc-tw";}
|
const char* GetCharSetName() {return "EUC-TW";}
|
||||||
nsProbingState GetState(void) {return mState;}
|
nsProbingState GetState(void) {return mState;}
|
||||||
void Reset(void);
|
void Reset(void);
|
||||||
float GetConfidence(void);
|
float GetConfidence(void);
|
||||||
|
@ -75,13 +75,17 @@ void nsEscCharSetProber::Reset(void)
|
|||||||
|
|
||||||
nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, PRUint32 aLen)
|
nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||||
{
|
{
|
||||||
for (PRUint32 i = 0; i < aLen && mState == eDetecting; i++)
|
nsSMState codingState;
|
||||||
|
PRInt32 j;
|
||||||
|
PRUint32 i;
|
||||||
|
|
||||||
|
for ( i = 0; i < aLen && mState == eDetecting; i++)
|
||||||
{
|
{
|
||||||
for (PRInt32 j = mActiveSM-1; j>= 0; j--)
|
for (j = mActiveSM-1; j>= 0; j--)
|
||||||
{
|
{
|
||||||
if (mCodingSM[j])
|
if (mCodingSM[j])
|
||||||
{
|
{
|
||||||
nsSMState codingState = mCodingSM[j]->NextState(aBuf[i]);
|
codingState = mCodingSM[j]->NextState(aBuf[i]);
|
||||||
if (codingState == eItsMe)
|
if (codingState == eItsMe)
|
||||||
{
|
{
|
||||||
mState = eFoundIt;
|
mState = eFoundIt;
|
||||||
|
@ -34,7 +34,6 @@
|
|||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include "nsCodingStateMachine.h"
|
#include "nsCodingStateMachine.h"
|
||||||
|
|
||||||
static const PRUint32 HZ_cls[ 256 / 8 ] = {
|
static const PRUint32 HZ_cls[ 256 / 8 ] = {
|
||||||
@ -84,12 +83,13 @@ PCK4BITS( 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
|
|||||||
|
|
||||||
static const PRUint32 HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
|
static const PRUint32 HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
|
||||||
|
|
||||||
const SMModel HZSMModel(
|
const SMModel HZSMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls },
|
||||||
6,
|
6,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st },
|
||||||
HZCharLenTable,
|
HZCharLenTable,
|
||||||
"HZ-GB-2312");
|
"HZ-GB-2312",
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static const PRUint32 ISO2022CN_cls [ 256 / 8 ] = {
|
static const PRUint32 ISO2022CN_cls [ 256 / 8 ] = {
|
||||||
@ -141,12 +141,13 @@ PCK4BITS(eError,eError,eError,eError,eError,eItsMe,eError,eStart) //38-3f
|
|||||||
|
|
||||||
static const PRUint32 ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
static const PRUint32 ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
|
||||||
const SMModel ISO2022CNSMModel(
|
const SMModel ISO2022CNSMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls },
|
||||||
9,
|
9,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st },
|
||||||
ISO2022CNCharLenTable,
|
ISO2022CNCharLenTable,
|
||||||
"ISO-2022-CN");
|
"ISO-2022-CN",
|
||||||
|
};
|
||||||
|
|
||||||
static const PRUint32 ISO2022JP_cls [ 256 / 8 ] = {
|
static const PRUint32 ISO2022JP_cls [ 256 / 8 ] = {
|
||||||
PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
|
PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
|
||||||
@ -198,12 +199,13 @@ PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eStart,eStart) //40-47
|
|||||||
|
|
||||||
static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};
|
static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
|
||||||
const SMModel ISO2022JPSMModel(
|
const SMModel ISO2022JPSMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
|
||||||
10,
|
10,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
|
||||||
ISO2022JPCharLenTable,
|
ISO2022JPCharLenTable,
|
||||||
"ISO-2022-JP");
|
"ISO-2022-JP",
|
||||||
|
};
|
||||||
|
|
||||||
static const PRUint32 ISO2022KR_cls [ 256 / 8 ] = {
|
static const PRUint32 ISO2022KR_cls [ 256 / 8 ] = {
|
||||||
PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
|
PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
|
||||||
@ -251,10 +253,11 @@ PCK4BITS(eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart) //20-27
|
|||||||
|
|
||||||
static const PRUint32 ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
|
static const PRUint32 ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
|
||||||
|
|
||||||
const SMModel ISO2022KRSMModel(
|
const SMModel ISO2022KRSMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls },
|
||||||
6,
|
6,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st },
|
||||||
ISO2022KRCharLenTable,
|
ISO2022KRCharLenTable,
|
||||||
"ISO-2022-KR");
|
"ISO-2022-KR",
|
||||||
|
};
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@
|
|||||||
#include "nsCodingStateMachine.h"
|
#include "nsCodingStateMachine.h"
|
||||||
#include "CharDistribution.h"
|
#include "CharDistribution.h"
|
||||||
|
|
||||||
// We use gb18030 to replace gb2312, because 18030 is a superset.
|
// We use GB18030 to replace GB2312, because 18030 is a superset.
|
||||||
|
|
||||||
class nsGB18030Prober: public nsCharSetProber {
|
class nsGB18030Prober: public nsCharSetProber {
|
||||||
public:
|
public:
|
||||||
@ -52,7 +52,7 @@ public:
|
|||||||
Reset();}
|
Reset();}
|
||||||
virtual ~nsGB18030Prober(void){delete mCodingSM;}
|
virtual ~nsGB18030Prober(void){delete mCodingSM;}
|
||||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||||
const char* GetCharSetName() {return "gb18030";}
|
const char* GetCharSetName() {return "GB18030";}
|
||||||
nsProbingState GetState(void) {return mState;}
|
nsProbingState GetState(void) {return mState;}
|
||||||
void Reset(void);
|
void Reset(void);
|
||||||
float GetConfidence(void);
|
float GetConfidence(void);
|
||||||
|
@ -59,7 +59,7 @@
|
|||||||
#define MIN_MODEL_DISTANCE (0.01)
|
#define MIN_MODEL_DISTANCE (0.01)
|
||||||
|
|
||||||
#define VISUAL_HEBREW_NAME ("ISO-8859-8")
|
#define VISUAL_HEBREW_NAME ("ISO-8859-8")
|
||||||
#define LOGICAL_HEBREW_NAME ("windows-1255")
|
#define LOGICAL_HEBREW_NAME ("WINDOWS-1255")
|
||||||
|
|
||||||
PRBool nsHebrewProber::isFinal(char c)
|
PRBool nsHebrewProber::isFinal(char c)
|
||||||
{
|
{
|
||||||
|
@ -48,7 +48,7 @@ public:
|
|||||||
nsLatin1Prober(void){Reset();}
|
nsLatin1Prober(void){Reset();}
|
||||||
virtual ~nsLatin1Prober(void){}
|
virtual ~nsLatin1Prober(void){}
|
||||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||||
const char* GetCharSetName() {return "windows-1252";}
|
const char* GetCharSetName() {return "WINDOWS-1252";}
|
||||||
nsProbingState GetState(void) {return mState;}
|
nsProbingState GetState(void) {return mState;}
|
||||||
void Reset(void);
|
void Reset(void);
|
||||||
float GetConfidence(void);
|
float GetConfidence(void);
|
||||||
|
@ -36,7 +36,6 @@
|
|||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "nsMBCSGroupProber.h"
|
#include "nsMBCSGroupProber.h"
|
||||||
@ -45,13 +44,13 @@
|
|||||||
#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
|
#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
|
||||||
const char *ProberName[] =
|
const char *ProberName[] =
|
||||||
{
|
{
|
||||||
"UTF8",
|
"UTF-8",
|
||||||
"SJIS",
|
"SJIS",
|
||||||
"EUCJP",
|
"EUC-JP",
|
||||||
"GB18030",
|
"GB18030",
|
||||||
"EUCKR",
|
"EUC-KR",
|
||||||
"Big5",
|
"Big5",
|
||||||
"EUCTW",
|
"EUC-TW",
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -34,7 +34,6 @@
|
|||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include "nsCodingStateMachine.h"
|
#include "nsCodingStateMachine.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -90,12 +89,13 @@ PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart) //10-17
|
|||||||
|
|
||||||
static const PRUint32 Big5CharLenTable[] = {0, 1, 1, 2, 0};
|
static const PRUint32 Big5CharLenTable[] = {0, 1, 1, 2, 0};
|
||||||
|
|
||||||
const SMModel Big5SMModel(
|
SMModel const Big5SMModel = {
|
||||||
nsPkgInt( eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls ),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls },
|
||||||
5,
|
5,
|
||||||
nsPkgInt( eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st ),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st },
|
||||||
Big5CharLenTable,
|
Big5CharLenTable,
|
||||||
"Big5");
|
"BIG5",
|
||||||
|
};
|
||||||
|
|
||||||
static const PRUint32 EUCJP_cls [ 256 / 8 ] = {
|
static const PRUint32 EUCJP_cls [ 256 / 8 ] = {
|
||||||
//PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
|
//PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
|
||||||
@ -144,12 +144,13 @@ PCK4BITS( 3,eError,eError,eError,eStart,eStart,eStart,eStart) //20-27
|
|||||||
|
|
||||||
static const PRUint32 EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
|
static const PRUint32 EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
|
||||||
|
|
||||||
const SMModel EUCJPSMModel(
|
const SMModel EUCJPSMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls },
|
||||||
6,
|
6,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st },
|
||||||
EUCJPCharLenTable,
|
EUCJPCharLenTable,
|
||||||
"EUC-JP");
|
"EUC-JP",
|
||||||
|
};
|
||||||
|
|
||||||
static const PRUint32 EUCKR_cls [ 256 / 8 ] = {
|
static const PRUint32 EUCKR_cls [ 256 / 8 ] = {
|
||||||
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
||||||
@ -195,12 +196,13 @@ PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
|
|||||||
|
|
||||||
static const PRUint32 EUCKRCharLenTable[] = {0, 1, 2, 0};
|
static const PRUint32 EUCKRCharLenTable[] = {0, 1, 2, 0};
|
||||||
|
|
||||||
const SMModel EUCKRSMModel (
|
const SMModel EUCKRSMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls },
|
||||||
4,
|
4,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st },
|
||||||
EUCKRCharLenTable,
|
EUCKRCharLenTable,
|
||||||
"EUC-KR");
|
"EUC-KR",
|
||||||
|
};
|
||||||
|
|
||||||
static const PRUint32 EUCTW_cls [ 256 / 8 ] = {
|
static const PRUint32 EUCTW_cls [ 256 / 8 ] = {
|
||||||
//PCK4BITS(0,2,2,2,2,2,2,2), // 00 - 07
|
//PCK4BITS(0,2,2,2,2,2,2,2), // 00 - 07
|
||||||
@ -250,14 +252,15 @@ PCK4BITS(eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
|
|||||||
|
|
||||||
static const PRUint32 EUCTWCharLenTable[] = {0, 0, 1, 2, 2, 2, 3};
|
static const PRUint32 EUCTWCharLenTable[] = {0, 0, 1, 2, 2, 2, 3};
|
||||||
|
|
||||||
const SMModel EUCTWSMModel(
|
const SMModel EUCTWSMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_cls },
|
||||||
7,
|
7,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_st },
|
||||||
EUCTWCharLenTable,
|
EUCTWCharLenTable,
|
||||||
"x-euc-tw");
|
"EUC-TW",
|
||||||
|
};
|
||||||
|
|
||||||
/* obsolete GB2312 by gb18030
|
/* obsolete GB2312 by GB18030
|
||||||
static PRUint32 GB2312_cls [ 256 / 8 ] = {
|
static PRUint32 GB2312_cls [ 256 / 8 ] = {
|
||||||
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
||||||
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
|
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
|
||||||
@ -365,12 +368,13 @@ PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
|
|||||||
// 2 here.
|
// 2 here.
|
||||||
static const PRUint32 GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};
|
static const PRUint32 GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};
|
||||||
|
|
||||||
const SMModel GB18030SMModel(
|
const SMModel GB18030SMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls ),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls },
|
||||||
7,
|
7,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st ),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st },
|
||||||
GB18030CharLenTable,
|
GB18030CharLenTable,
|
||||||
"GB18030");
|
"GB18030",
|
||||||
|
};
|
||||||
|
|
||||||
// sjis
|
// sjis
|
||||||
|
|
||||||
@ -421,12 +425,13 @@ PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17
|
|||||||
|
|
||||||
static const PRUint32 SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
|
static const PRUint32 SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
|
||||||
|
|
||||||
const SMModel SJISSMModel(
|
const SMModel SJISSMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls },
|
||||||
6,
|
6,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st },
|
||||||
SJISCharLenTable,
|
SJISCharLenTable,
|
||||||
"Shift_JIS");
|
"SHIFT_JIS",
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static const PRUint32 UTF8_cls [ 256 / 8 ] = {
|
static const PRUint32 UTF8_cls [ 256 / 8 ] = {
|
||||||
@ -498,10 +503,11 @@ PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) //c8-cf
|
|||||||
static const PRUint32 UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3,
|
static const PRUint32 UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3,
|
||||||
3, 3, 4, 4, 5, 5, 6, 6 };
|
3, 3, 4, 4, 5, 5, 6, 6 };
|
||||||
|
|
||||||
const SMModel UTF8SMModel(
|
const SMModel UTF8SMModel = {
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls },
|
||||||
16,
|
16,
|
||||||
nsPkgInt(eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st),
|
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st },
|
||||||
UTF8CharLenTable,
|
UTF8CharLenTable,
|
||||||
"UTF-8");
|
"UTF-8",
|
||||||
|
};
|
||||||
|
|
||||||
|
@ -63,17 +63,13 @@ typedef enum {
|
|||||||
eUnitMsk16bits = 0x0000FFFFL
|
eUnitMsk16bits = 0x0000FFFFL
|
||||||
} nsUnitMsk;
|
} nsUnitMsk;
|
||||||
|
|
||||||
struct nsPkgInt {
|
typedef struct nsPkgInt {
|
||||||
nsIdxSft idxsft;
|
nsIdxSft idxsft;
|
||||||
nsSftMsk sftmsk;
|
nsSftMsk sftmsk;
|
||||||
nsBitSft bitsft;
|
nsBitSft bitsft;
|
||||||
nsUnitMsk unitmsk;
|
nsUnitMsk unitmsk;
|
||||||
const PRUint32* const data;
|
const PRUint32* const data;
|
||||||
nsPkgInt(nsIdxSft a,nsSftMsk b, nsBitSft c,nsUnitMsk d,const PRUint32* const e)
|
} nsPkgInt;
|
||||||
:idxsft(a), sftmsk(b), bitsft(c), unitmsk(d), data(e){}
|
|
||||||
nsPkgInt();
|
|
||||||
nsPkgInt operator= (const nsPkgInt&);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#define PCK16BITS(a,b) ((PRUint32)(((b) << 16) | (a)))
|
#define PCK16BITS(a,b) ((PRUint32)(((b) << 16) | (a)))
|
||||||
|
@ -46,42 +46,70 @@
|
|||||||
|
|
||||||
nsSBCSGroupProber::nsSBCSGroupProber()
|
nsSBCSGroupProber::nsSBCSGroupProber()
|
||||||
{
|
{
|
||||||
mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
|
mProbers[0] = new nsSingleByteCharSetProber(&Win1251RussianModel);
|
||||||
mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
|
mProbers[1] = new nsSingleByteCharSetProber(&Koi8rRussianModel);
|
||||||
mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
|
mProbers[2] = new nsSingleByteCharSetProber(&Latin5RussianModel);
|
||||||
mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
|
mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel);
|
||||||
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
|
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
|
||||||
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
|
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
|
||||||
mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
|
|
||||||
mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
|
mProbers[6] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
|
||||||
|
mProbers[7] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
|
||||||
|
|
||||||
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
|
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
|
||||||
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
|
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
|
||||||
mProbers[10] = new nsSingleByteCharSetProber(&TIS620ThaiModel);
|
|
||||||
|
|
||||||
nsHebrewProber *hebprober = new nsHebrewProber();
|
nsHebrewProber *hebprober = new nsHebrewProber();
|
||||||
// Notice: Any change in these indexes - 10,11,12 must be reflected
|
// Notice: Any change in these indexes - 10,11,12 must be reflected
|
||||||
// in the code below as well.
|
// in the code below as well.
|
||||||
mProbers[11] = hebprober;
|
mProbers[10] = hebprober;
|
||||||
mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
|
mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
|
||||||
mProbers[13] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
|
mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
|
||||||
// Tell the Hebrew prober about the logical and visual probers
|
// Tell the Hebrew prober about the logical and visual probers
|
||||||
if (mProbers[11] && mProbers[12] && mProbers[13]) // all are not null
|
if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
|
||||||
{
|
{
|
||||||
hebprober->SetModelProbers(mProbers[12], mProbers[13]);
|
hebprober->SetModelProbers(mProbers[11], mProbers[12]);
|
||||||
}
|
}
|
||||||
else // One or more is null. avoid any Hebrew probing, null them all
|
else // One or more is null. avoid any Hebrew probing, null them all
|
||||||
{
|
{
|
||||||
for (PRUint32 i = 11; i <= 13; ++i)
|
for (PRUint32 i = 10; i <= 12; ++i)
|
||||||
{
|
{
|
||||||
delete mProbers[i];
|
delete mProbers[i];
|
||||||
mProbers[i] = 0;
|
mProbers[i] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// disable latin2 before latin1 is available, otherwise all latin1
|
mProbers[13] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
|
||||||
// will be detected as latin2 because of their similarity.
|
mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel);
|
||||||
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
|
|
||||||
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
|
mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
|
||||||
|
mProbers[16] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
|
||||||
|
mProbers[17] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
|
||||||
|
|
||||||
|
mProbers[18] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
|
||||||
|
mProbers[19] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
|
||||||
|
mProbers[20] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
|
||||||
|
|
||||||
|
mProbers[21] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
|
||||||
|
mProbers[22] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
|
||||||
|
|
||||||
|
mProbers[23] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
|
||||||
|
mProbers[24] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
|
||||||
|
|
||||||
|
mProbers[25] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
|
||||||
|
|
||||||
|
mProbers[26] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
|
||||||
|
mProbers[27] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
|
||||||
|
|
||||||
|
mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
|
||||||
|
mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
|
||||||
|
|
||||||
|
mProbers[30] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
|
||||||
|
mProbers[31] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
|
||||||
|
|
||||||
|
mProbers[32] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel);
|
||||||
|
mProbers[33] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
|
||||||
|
mProbers[34] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
@ -40,7 +40,7 @@
|
|||||||
#define nsSBCSGroupProber_h__
|
#define nsSBCSGroupProber_h__
|
||||||
|
|
||||||
|
|
||||||
#define NUM_OF_SBCS_PROBERS 14
|
#define NUM_OF_SBCS_PROBERS 35
|
||||||
|
|
||||||
class nsCharSetProber;
|
class nsCharSetProber;
|
||||||
class nsSBCSGroupProber: public nsCharSetProber {
|
class nsSBCSGroupProber: public nsCharSetProber {
|
||||||
|
@ -35,7 +35,6 @@
|
|||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "nsSBCharSetProber.h"
|
#include "nsSBCharSetProber.h"
|
||||||
|
|
||||||
@ -48,18 +47,31 @@ nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32
|
|||||||
order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
|
order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
|
||||||
|
|
||||||
if (order < SYMBOL_CAT_ORDER)
|
if (order < SYMBOL_CAT_ORDER)
|
||||||
|
{
|
||||||
mTotalChar++;
|
mTotalChar++;
|
||||||
if (order < SAMPLE_SIZE)
|
}
|
||||||
|
else if (order == ILL)
|
||||||
|
{
|
||||||
|
/* When encountering an illegal codepoint, no need
|
||||||
|
* to continue analyzing data. */
|
||||||
|
mState = eNotMe;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (order == CTR)
|
||||||
|
{
|
||||||
|
mCtrlChar++;
|
||||||
|
}
|
||||||
|
if (order < mModel->freqCharCount)
|
||||||
{
|
{
|
||||||
mFreqChar++;
|
mFreqChar++;
|
||||||
|
|
||||||
if (mLastOrder < SAMPLE_SIZE)
|
if (mLastOrder < mModel->freqCharCount)
|
||||||
{
|
{
|
||||||
mTotalSeqs++;
|
mTotalSeqs++;
|
||||||
if (!mReversed)
|
if (!mReversed)
|
||||||
++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]);
|
++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*mModel->freqCharCount+order]]);
|
||||||
else // reverse the order of the letters in the lookup
|
else // reverse the order of the letters in the lookup
|
||||||
++(mSeqCounters[mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]);
|
++(mSeqCounters[mModel->precedenceMatrix[order*mModel->freqCharCount+mLastOrder]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mLastOrder = order;
|
mLastOrder = order;
|
||||||
@ -86,6 +98,7 @@ void nsSingleByteCharSetProber::Reset(void)
|
|||||||
mSeqCounters[i] = 0;
|
mSeqCounters[i] = 0;
|
||||||
mTotalSeqs = 0;
|
mTotalSeqs = 0;
|
||||||
mTotalChar = 0;
|
mTotalChar = 0;
|
||||||
|
mCtrlChar = 0;
|
||||||
mFreqChar = 0;
|
mFreqChar = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,6 +116,19 @@ float nsSingleByteCharSetProber::GetConfidence(void)
|
|||||||
|
|
||||||
if (mTotalSeqs > 0) {
|
if (mTotalSeqs > 0) {
|
||||||
r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
|
r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
|
||||||
|
/* Multiply by a ratio of positive sequences per characters.
|
||||||
|
* This would help in particular to distinguish close winners.
|
||||||
|
* Indeed if you add a letter, you'd expect the positive sequence count
|
||||||
|
* to increase as well. If it doesn't, it may mean that this new codepoint
|
||||||
|
* may not have been a letter, but instead a symbol (or some other
|
||||||
|
* character). This could make the difference between very closely related
|
||||||
|
* charsets used for the same language.
|
||||||
|
*/
|
||||||
|
r = r * (mSeqCounters[POSITIVE_CAT] + (float) mSeqCounters[PROBABLE_CAT] / 4) / mTotalChar;
|
||||||
|
/* The more control characters (proportionnaly to the size of the text), the
|
||||||
|
* less confident we become in the current charset.
|
||||||
|
*/
|
||||||
|
r = r * (mTotalChar - mCtrlChar) / mTotalChar;
|
||||||
r = r*mFreqChar/mTotalChar;
|
r = r*mFreqChar/mTotalChar;
|
||||||
if (r >= (float)1.00)
|
if (r >= (float)1.00)
|
||||||
r = (float)0.99;
|
r = (float)0.99;
|
||||||
|
@ -40,27 +40,42 @@
|
|||||||
|
|
||||||
#include "nsCharSetProber.h"
|
#include "nsCharSetProber.h"
|
||||||
|
|
||||||
#define SAMPLE_SIZE 64
|
/** Codepoints **/
|
||||||
|
|
||||||
|
/* Illegal codepoints.*/
|
||||||
|
#define ILL 255
|
||||||
|
/* Control character. */
|
||||||
|
#define CTR 254
|
||||||
|
/* Symbols and punctuation that does not belong to words. */
|
||||||
|
#define SYM 253
|
||||||
|
/* Return/Line feeds. */
|
||||||
|
#define RET 252
|
||||||
|
/* Numbers 0-9. */
|
||||||
|
#define NUM 251
|
||||||
|
|
||||||
#define SB_ENOUGH_REL_THRESHOLD 1024
|
#define SB_ENOUGH_REL_THRESHOLD 1024
|
||||||
#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95
|
#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95
|
||||||
#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
|
#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
|
||||||
#define SYMBOL_CAT_ORDER 250
|
#define SYMBOL_CAT_ORDER 250
|
||||||
|
|
||||||
#define NUMBER_OF_SEQ_CAT 4
|
#define NUMBER_OF_SEQ_CAT 4
|
||||||
#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1)
|
#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1)
|
||||||
|
#define PROBABLE_CAT (NUMBER_OF_SEQ_CAT-2)
|
||||||
|
#define NEUTRAL_CAT (NUMBER_OF_SEQ_CAT-3)
|
||||||
#define NEGATIVE_CAT 0
|
#define NEGATIVE_CAT 0
|
||||||
|
|
||||||
struct SequenceModel
|
typedef struct
|
||||||
{
|
{
|
||||||
const unsigned char* const charToOrderMap; // [256] table use to find a char's order
|
/* [256] table mapping codepoints to chararacter orders. */
|
||||||
const PRUint8* const precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
|
const unsigned char* const charToOrderMap;
|
||||||
|
/* freqCharCount x freqCharCount table of 2-char sequence's frequencies. */
|
||||||
|
const PRUint8* const precedenceMatrix;
|
||||||
|
/* The count of frequent characters. */
|
||||||
|
int freqCharCount;
|
||||||
float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
|
float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
|
||||||
PRBool keepEnglishLetter; // says if this script contains English characters (not implemented)
|
PRBool keepEnglishLetter; // says if this script contains English characters (not implemented)
|
||||||
const char* const charsetName;
|
const char* const charsetName;
|
||||||
SequenceModel(void);
|
} SequenceModel;
|
||||||
SequenceModel(const unsigned char* const a, const PRUint8* const b,float c,PRBool d,const char* const e)
|
|
||||||
: charToOrderMap(a), precedenceMatrix(b), mTypicalPositiveRatio(c), keepEnglishLetter(d), charsetName(e){}
|
|
||||||
SequenceModel& operator=(const SequenceModel&);
|
|
||||||
} ;
|
|
||||||
|
|
||||||
|
|
||||||
class nsSingleByteCharSetProber : public nsCharSetProber{
|
class nsSingleByteCharSetProber : public nsCharSetProber{
|
||||||
@ -69,7 +84,7 @@ public:
|
|||||||
:mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); }
|
:mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); }
|
||||||
nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber)
|
nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber)
|
||||||
:mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
|
:mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
|
||||||
nsSingleByteCharSetProber(): mModel(0), mReversed(0){};
|
|
||||||
virtual const char* GetCharSetName();
|
virtual const char* GetCharSetName();
|
||||||
virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||||
virtual nsProbingState GetState(void) {return mState;}
|
virtual nsProbingState GetState(void) {return mState;}
|
||||||
@ -84,7 +99,6 @@ public:
|
|||||||
// prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
|
// prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
|
||||||
// of the English letters.
|
// of the English letters.
|
||||||
PRBool KeepEnglishLetters() {return mModel->keepEnglishLetter;} // (not implemented)
|
PRBool KeepEnglishLetters() {return mModel->keepEnglishLetter;} // (not implemented)
|
||||||
nsSingleByteCharSetProber operator=(const nsSingleByteCharSetProber&) = delete;
|
|
||||||
|
|
||||||
#ifdef DEBUG_chardet
|
#ifdef DEBUG_chardet
|
||||||
virtual void DumpStatus();
|
virtual void DumpStatus();
|
||||||
@ -102,6 +116,7 @@ protected:
|
|||||||
PRUint32 mSeqCounters[NUMBER_OF_SEQ_CAT];
|
PRUint32 mSeqCounters[NUMBER_OF_SEQ_CAT];
|
||||||
|
|
||||||
PRUint32 mTotalChar;
|
PRUint32 mTotalChar;
|
||||||
|
PRUint32 mCtrlChar;
|
||||||
//characters that fall in our sampling range
|
//characters that fall in our sampling range
|
||||||
PRUint32 mFreqChar;
|
PRUint32 mFreqChar;
|
||||||
|
|
||||||
@ -110,21 +125,52 @@ protected:
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern const SequenceModel Windows_1256ArabicModel;
|
||||||
|
extern const SequenceModel Iso_8859_6ArabicModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Koi8rRussianModel;
|
||||||
|
extern const SequenceModel Win1251RussianModel;
|
||||||
|
extern const SequenceModel Latin5RussianModel;
|
||||||
|
extern const SequenceModel MacCyrillicRussianModel;
|
||||||
|
extern const SequenceModel Ibm866RussianModel;
|
||||||
|
extern const SequenceModel Ibm855RussianModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Iso_8859_7GreekModel;
|
||||||
|
extern const SequenceModel Windows_1253GreekModel;
|
||||||
|
|
||||||
extern const SequenceModel Koi8rModel;
|
|
||||||
extern const SequenceModel Win1251Model;
|
|
||||||
extern const SequenceModel Latin5Model;
|
|
||||||
extern const SequenceModel MacCyrillicModel;
|
|
||||||
extern const SequenceModel Ibm866Model;
|
|
||||||
extern const SequenceModel Ibm855Model;
|
|
||||||
extern const SequenceModel Latin7Model;
|
|
||||||
extern const SequenceModel Win1253Model;
|
|
||||||
extern const SequenceModel Latin5BulgarianModel;
|
extern const SequenceModel Latin5BulgarianModel;
|
||||||
extern const SequenceModel Win1251BulgarianModel;
|
extern const SequenceModel Win1251BulgarianModel;
|
||||||
extern const SequenceModel Latin2HungarianModel;
|
|
||||||
extern const SequenceModel Win1250HungarianModel;
|
extern const SequenceModel Iso_8859_2HungarianModel;
|
||||||
|
extern const SequenceModel Windows_1250HungarianModel;
|
||||||
|
|
||||||
extern const SequenceModel Win1255Model;
|
extern const SequenceModel Win1255Model;
|
||||||
extern const SequenceModel TIS620ThaiModel;
|
|
||||||
|
extern const SequenceModel Tis_620ThaiModel;
|
||||||
|
extern const SequenceModel Iso_8859_11ThaiModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Iso_8859_15FrenchModel;
|
||||||
|
extern const SequenceModel Iso_8859_1FrenchModel;
|
||||||
|
extern const SequenceModel Windows_1252FrenchModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Iso_8859_15SpanishModel;
|
||||||
|
extern const SequenceModel Iso_8859_1SpanishModel;
|
||||||
|
extern const SequenceModel Windows_1252SpanishModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Iso_8859_1GermanModel;
|
||||||
|
extern const SequenceModel Windows_1252GermanModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Iso_8859_3EsperantoModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Iso_8859_3TurkishModel;
|
||||||
|
extern const SequenceModel Iso_8859_9TurkishModel;
|
||||||
|
|
||||||
|
extern const SequenceModel VisciiVietnameseModel;
|
||||||
|
extern const SequenceModel Windows_1258VietnameseModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Iso_8859_15DanishModel;
|
||||||
|
extern const SequenceModel Iso_8859_1DanishModel;
|
||||||
|
extern const SequenceModel Windows_1252DanishModel;
|
||||||
|
|
||||||
#endif /* nsSingleByteCharSetProber_h__ */
|
#endif /* nsSingleByteCharSetProber_h__ */
|
||||||
|
|
||||||
|
@ -40,7 +40,6 @@
|
|||||||
// 2, kana character often exist in group
|
// 2, kana character often exist in group
|
||||||
// 3, certain combination of kana is never used in japanese language
|
// 3, certain combination of kana is never used in japanese language
|
||||||
|
|
||||||
|
|
||||||
#include "nsSJISProber.h"
|
#include "nsSJISProber.h"
|
||||||
|
|
||||||
void nsSJISProber::Reset(void)
|
void nsSJISProber::Reset(void)
|
||||||
|
@ -57,7 +57,7 @@ public:
|
|||||||
Reset();}
|
Reset();}
|
||||||
virtual ~nsSJISProber(void){delete mCodingSM;}
|
virtual ~nsSJISProber(void){delete mCodingSM;}
|
||||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||||
const char* GetCharSetName() {return "Shift_JIS";}
|
const char* GetCharSetName() {return "SHIFT_JIS";}
|
||||||
nsProbingState GetState(void) {return mState;}
|
nsProbingState GetState(void) {return mState;}
|
||||||
void Reset(void);
|
void Reset(void);
|
||||||
float GetConfidence(void);
|
float GetConfidence(void);
|
||||||
|
@ -47,6 +47,7 @@
|
|||||||
|
|
||||||
nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
|
nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
|
||||||
{
|
{
|
||||||
|
mNbspFound = PR_FALSE;
|
||||||
mDone = PR_FALSE;
|
mDone = PR_FALSE;
|
||||||
mBestGuess = -1; //illegal value as signal
|
mBestGuess = -1; //illegal value as signal
|
||||||
mInTag = PR_FALSE;
|
mInTag = PR_FALSE;
|
||||||
@ -75,6 +76,7 @@ nsUniversalDetector::~nsUniversalDetector()
|
|||||||
void
|
void
|
||||||
nsUniversalDetector::Reset()
|
nsUniversalDetector::Reset()
|
||||||
{
|
{
|
||||||
|
mNbspFound = PR_FALSE;
|
||||||
mDone = PR_FALSE;
|
mDone = PR_FALSE;
|
||||||
mBestGuess = -1; //illegal value as signal
|
mBestGuess = -1; //illegal value as signal
|
||||||
mInTag = PR_FALSE;
|
mInTag = PR_FALSE;
|
||||||
@ -96,7 +98,7 @@ nsUniversalDetector::Reset()
|
|||||||
|
|
||||||
//---------------------------------------------------------------------
|
//---------------------------------------------------------------------
|
||||||
#define SHORTCUT_THRESHOLD (float)0.95
|
#define SHORTCUT_THRESHOLD (float)0.95
|
||||||
#define MINIMUM_THRESHOLD (float)0.60
|
#define MINIMUM_THRESHOLD (float)0.20
|
||||||
|
|
||||||
nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
||||||
{
|
{
|
||||||
@ -106,7 +108,7 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
|||||||
if (aLen > 0)
|
if (aLen > 0)
|
||||||
mGotData = PR_TRUE;
|
mGotData = PR_TRUE;
|
||||||
|
|
||||||
//If the data starts with BOM, we know it is UTF
|
/* If the data starts with BOM, we know it is UTF. */
|
||||||
if (mStart)
|
if (mStart)
|
||||||
{
|
{
|
||||||
mStart = PR_FALSE;
|
mStart = PR_FALSE;
|
||||||
@ -115,20 +117,42 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
|||||||
{
|
{
|
||||||
case '\xEF':
|
case '\xEF':
|
||||||
if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
|
if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
|
||||||
// EF BB BF UTF-8 encoded BOM
|
/* EF BB BF: UTF-8 encoded BOM. */
|
||||||
mDetectedCharset = "UTF-8";
|
mDetectedCharset = "UTF-8";
|
||||||
break;
|
break;
|
||||||
case '\xFE':
|
case '\xFE':
|
||||||
if ('\xFF' == aBuf[1])
|
if ('\xFF' == aBuf[1])
|
||||||
// FE FF UTF-16, big endian BOM
|
/* FE FF: UTF-16, big endian BOM. */
|
||||||
mDetectedCharset = "UTF-16";
|
mDetectedCharset = "UTF-16";
|
||||||
break;
|
break;
|
||||||
case '\xFF':
|
case '\xFF':
|
||||||
if ('\xFE' == aBuf[1])
|
if ('\xFE' == aBuf[1])
|
||||||
// FF FE UTF-16, little endian BOM
|
{
|
||||||
mDetectedCharset = "UTF-16";
|
if (aLen > 3 &&
|
||||||
break;
|
aBuf[2] == '\x00' &&
|
||||||
} // switch
|
aBuf[3] == '\x00')
|
||||||
|
{
|
||||||
|
/* FF FE 00 00: UTF-32 (LE). */
|
||||||
|
mDetectedCharset = "UTF-32";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* FF FE: UTF-16, little endian BOM. */
|
||||||
|
mDetectedCharset = "UTF-16";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '\x00':
|
||||||
|
if (aLen > 3 &&
|
||||||
|
aBuf[1] == '\x00' &&
|
||||||
|
aBuf[2] == '\xFE' &&
|
||||||
|
aBuf[3] == '\xFF')
|
||||||
|
{
|
||||||
|
/* 00 00 FE FF: UTF-32 (BE). */
|
||||||
|
mDetectedCharset = "UTF-32";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (mDetectedCharset)
|
if (mDetectedCharset)
|
||||||
{
|
{
|
||||||
@ -140,10 +164,13 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
|||||||
PRUint32 i;
|
PRUint32 i;
|
||||||
for (i = 0; i < aLen; i++)
|
for (i = 0; i < aLen; i++)
|
||||||
{
|
{
|
||||||
//other than 0xa0, if every othe character is ascii, the page is ascii
|
/* If every other character is ASCII or 0xA0, we don't run charset
|
||||||
if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') //Since many Ascii only page contains NBSP
|
* probers.
|
||||||
|
* 0xA0 (NBSP in a few charset) is apparently a rare exception
|
||||||
|
* of non-ASCII character often contained in nearly-ASCII text. */
|
||||||
|
if (aBuf[i] & '\x80' && aBuf[i] != '\xA0')
|
||||||
{
|
{
|
||||||
//we got a non-ascii byte (high-byte)
|
/* We got a non-ASCII byte (high-byte) */
|
||||||
if (mInputState != eHighbyte)
|
if (mInputState != eHighbyte)
|
||||||
{
|
{
|
||||||
//adjust state
|
//adjust state
|
||||||
@ -179,11 +206,19 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//ok, just pure ascii so far
|
/* Just pure ASCII or NBSP so far. */
|
||||||
if ( ePureAscii == mInputState &&
|
if (aBuf[i] == '\xA0')
|
||||||
(aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) )
|
|
||||||
{
|
{
|
||||||
//found escape character or HZ "~{"
|
/* ASCII with the only exception of NBSP seems quite common.
|
||||||
|
* I doubt it is really necessary to train a model here, so let's
|
||||||
|
* just make an exception.
|
||||||
|
*/
|
||||||
|
mNbspFound = PR_TRUE;
|
||||||
|
}
|
||||||
|
else if (mInputState == ePureAscii &&
|
||||||
|
(aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')))
|
||||||
|
{
|
||||||
|
/* We found an escape character or HZ "~{". */
|
||||||
mInputState = eEscAscii;
|
mInputState = eEscAscii;
|
||||||
}
|
}
|
||||||
mLastChar = aBuf[i];
|
mLastChar = aBuf[i];
|
||||||
@ -205,6 +240,16 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
|||||||
mDone = PR_TRUE;
|
mDone = PR_TRUE;
|
||||||
mDetectedCharset = mEscCharSetProber->GetCharSetName();
|
mDetectedCharset = mEscCharSetProber->GetCharSetName();
|
||||||
}
|
}
|
||||||
|
else if (mNbspFound)
|
||||||
|
{
|
||||||
|
mDetectedCharset = "ISO-8859-1";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* ASCII with the ESC character (or the sequence "~{") is still
|
||||||
|
* ASCII until proven otherwise. */
|
||||||
|
mDetectedCharset = "ASCII";
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case eHighbyte:
|
case eHighbyte:
|
||||||
for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
|
for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
|
||||||
@ -222,8 +267,19 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default: //pure ascii
|
default:
|
||||||
;//do nothing here
|
if (mNbspFound)
|
||||||
|
{
|
||||||
|
/* ISO-8859-1 is a good result candidate for ASCII + NBSP.
|
||||||
|
* (though it could have been any ISO-8859 encoding). */
|
||||||
|
mDetectedCharset = "ISO-8859-1";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Pure ASCII */
|
||||||
|
mDetectedCharset = "ASCII";
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return NS_OK;
|
return NS_OK;
|
||||||
}
|
}
|
||||||
|
@ -72,6 +72,7 @@ protected:
|
|||||||
virtual void Report(const char* aCharset) = 0;
|
virtual void Report(const char* aCharset) = 0;
|
||||||
virtual void Reset();
|
virtual void Reset();
|
||||||
nsInputState mInputState;
|
nsInputState mInputState;
|
||||||
|
PRBool mNbspFound;
|
||||||
PRBool mDone;
|
PRBool mDone;
|
||||||
PRBool mInTag;
|
PRBool mInTag;
|
||||||
PRBool mStart;
|
PRBool mStart;
|
||||||
@ -86,4 +87,3 @@ protected:
|
|||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -34,47 +34,52 @@
|
|||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include "uchardet.h"
|
#include "uchardet.h"
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include "nscore.h"
|
#include "nscore.h"
|
||||||
#include "nsUniversalDetector.h"
|
#include "nsUniversalDetector.h"
|
||||||
#include <string>
|
|
||||||
|
|
||||||
using std::string;
|
|
||||||
|
|
||||||
class HandleUniversalDetector : public nsUniversalDetector
|
class HandleUniversalDetector : public nsUniversalDetector
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
string m_charset;
|
char *m_charset;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
HandleUniversalDetector()
|
HandleUniversalDetector()
|
||||||
: nsUniversalDetector(NS_FILTER_ALL)
|
: nsUniversalDetector(NS_FILTER_ALL)
|
||||||
|
, m_charset(0)
|
||||||
{
|
{
|
||||||
m_charset = "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~HandleUniversalDetector()
|
virtual ~HandleUniversalDetector()
|
||||||
{}
|
{
|
||||||
|
if (m_charset)
|
||||||
|
free(m_charset);
|
||||||
|
}
|
||||||
|
|
||||||
virtual void Report(const char* charset)
|
virtual void Report(const char* charset)
|
||||||
{
|
{
|
||||||
m_charset = charset;
|
if (m_charset)
|
||||||
|
free(m_charset);
|
||||||
|
m_charset = strdup(charset);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void Reset()
|
virtual void Reset()
|
||||||
{
|
{
|
||||||
nsUniversalDetector::Reset();
|
nsUniversalDetector::Reset();
|
||||||
m_charset = "";
|
if (m_charset)
|
||||||
|
free(m_charset);
|
||||||
|
m_charset = strdup("");
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* GetCharset() const
|
const char* GetCharset() const
|
||||||
{
|
{
|
||||||
return m_charset.c_str();
|
return m_charset? m_charset : "";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
uchardet_t uchardet_new()
|
uchardet_t uchardet_new(void)
|
||||||
{
|
{
|
||||||
return reinterpret_cast<uchardet_t> (new HandleUniversalDetector());
|
return reinterpret_cast<uchardet_t> (new HandleUniversalDetector());
|
||||||
}
|
}
|
||||||
|
@ -34,8 +34,8 @@
|
|||||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
*
|
*
|
||||||
* ***** END LICENSE BLOCK ***** */
|
* ***** END LICENSE BLOCK ***** */
|
||||||
#ifndef ___UCHARDET_H___
|
#ifndef UCHARDET_H___
|
||||||
#define ___UCHARDET_H___
|
#define UCHARDET_H___
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -43,13 +43,13 @@ extern "C" {
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
typedef void * uchardet_t;
|
typedef struct uchardet * uchardet_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an encoding detector.
|
* Create an encoding detector.
|
||||||
* @return a handle of a instance of uchardet
|
* @return a handle of a instance of uchardet
|
||||||
*/
|
*/
|
||||||
uchardet_t uchardet_new();
|
uchardet_t uchardet_new(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete an encoding detector.
|
* Delete an encoding detector.
|
||||||
@ -79,9 +79,9 @@ void uchardet_data_end(uchardet_t ud);
|
|||||||
void uchardet_reset(uchardet_t ud);
|
void uchardet_reset(uchardet_t ud);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the name of encoding that was detected.
|
* Get an iconv-compatible name of the encoding that was detected.
|
||||||
* @param ud [in] handle of a instance of uchardet
|
* @param ud [in] handle of a instance of uchardet
|
||||||
* @return name of charset on success and "" on failure or pure ascii.
|
* @return name of charset on success and "" on failure.
|
||||||
*/
|
*/
|
||||||
const char * uchardet_get_charset(uchardet_t ud);
|
const char * uchardet_get_charset(uchardet_t ud);
|
||||||
|
|
||||||
|
@ -272,6 +272,20 @@ copy ..\src\contextMenu.xml ..\bin64\contextMenu.xml
|
|||||||
<ClCompile Include="..\src\MISC\Common\verifySignedfile.cpp" />
|
<ClCompile Include="..\src\MISC\Common\verifySignedfile.cpp" />
|
||||||
<ClCompile Include="..\src\MISC\md5\md5Dlgs.cpp" />
|
<ClCompile Include="..\src\MISC\md5\md5Dlgs.cpp" />
|
||||||
<ClCompile Include="..\src\MISC\sha2\sha-256.cpp" />
|
<ClCompile Include="..\src\MISC\sha2\sha-256.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangArabicModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangBulgarianModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangDanishModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangEsperantoModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangFrenchModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangGermanModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangGreekModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangHebrewModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangHungarianModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangRussianModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangSpanishModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangThaiModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangTurkishModel.cpp" />
|
||||||
|
<ClCompile Include="..\src\uchardet\LangModels\LangVietnameseModel.cpp" />
|
||||||
<ClCompile Include="..\src\WinControls\AboutDlg\AboutDlg.cpp" />
|
<ClCompile Include="..\src\WinControls\AboutDlg\AboutDlg.cpp" />
|
||||||
<ClCompile Include="..\src\WinControls\AnsiCharPanel\ansiCharPanel.cpp" />
|
<ClCompile Include="..\src\WinControls\AnsiCharPanel\ansiCharPanel.cpp" />
|
||||||
<ClCompile Include="..\src\ScitillaComponent\AutoCompletion.cpp" />
|
<ClCompile Include="..\src\ScitillaComponent\AutoCompletion.cpp" />
|
||||||
@ -306,12 +320,6 @@ copy ..\src\contextMenu.xml ..\bin64\contextMenu.xml
|
|||||||
<ClCompile Include="..\src\MISC\PluginsManager\IDAllocator.cpp" />
|
<ClCompile Include="..\src\MISC\PluginsManager\IDAllocator.cpp" />
|
||||||
<ClCompile Include="..\src\WinControls\ImageListSet\ImageListSet.cpp" />
|
<ClCompile Include="..\src\WinControls\ImageListSet\ImageListSet.cpp" />
|
||||||
<ClCompile Include="..\src\uchardet\JpCntx.cpp" />
|
<ClCompile Include="..\src\uchardet\JpCntx.cpp" />
|
||||||
<ClCompile Include="..\src\uchardet\LangBulgarianModel.cpp" />
|
|
||||||
<ClCompile Include="..\src\uchardet\LangCyrillicModel.cpp" />
|
|
||||||
<ClCompile Include="..\src\uchardet\LangGreekModel.cpp" />
|
|
||||||
<ClCompile Include="..\src\uchardet\LangHebrewModel.cpp" />
|
|
||||||
<ClCompile Include="..\src\uchardet\LangHungarianModel.cpp" />
|
|
||||||
<ClCompile Include="..\src\uchardet\LangThaiModel.cpp" />
|
|
||||||
<ClCompile Include="..\src\lastRecentFileList.cpp" />
|
<ClCompile Include="..\src\lastRecentFileList.cpp" />
|
||||||
<ClCompile Include="..\src\lesDlgs.cpp" />
|
<ClCompile Include="..\src\lesDlgs.cpp" />
|
||||||
<ClCompile Include="..\src\WinControls\AnsiCharPanel\ListView.cpp" />
|
<ClCompile Include="..\src\WinControls\AnsiCharPanel\ListView.cpp" />
|
||||||
|
Loading…
Reference in New Issue
Block a user