[BUG_FIXED] (Author: François-R Boyer) Fix DBCS encodings file saving corruption bug.

[BUG_FIXED] (Author: François-R Boyer) Fix file containing NULL character loading bug. [ENHANCEMENT] (Author: François-R Boyer) Improve getCurrentDocCharCount() method performance. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@651 f5eea248-9336-0410-98b8-ebc06183d4e3
2010-08-16 16:52:03 +00:00 · 2010-08-16 16:52:03 +00:00 · 948f281eb0
commit 948f281eb0
parent 33de57fe1d
8 changed files with 235 additions and 121 deletions
--- a/PowerEditor/src/MISC/Common/Common.cpp
+++ b/PowerEditor/src/MISC/Common/Common.cpp
@ -16,6 +16,7 @@
 //Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 #include "precompiledHeaders.h"
 #include "../Utf8.h"
 WcharMbcsConvertor * WcharMbcsConvertor::_pSelf = new WcharMbcsConvertor;
@ -217,28 +218,60 @@ generic_string purgeMenuItemString(const TCHAR * menuItemStr, bool keepAmpersand
 	return cleanedName;
 };
-const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, UINT codepage)
+const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, UINT codepage, int lenMbcs, int *pLenWc, int *pBytesNotProcessed)
 {
-	if (!_wideCharStr)
+	// Do not process empty strings
 	if (lenMbcs == 0 || lenMbcs == -1 && mbcs2Convert[0] == 0) { _wideCharStr.empty(); return _wideCharStr;	}
 	int bytesNotProcessed = 0;
 	int lenWc = 0;
 	// If length not specified, simply convert without checking
 	if (lenMbcs == -1)
 	{
-		_wideCharStr = new wchar_t[initSize];
+		lenWc = MultiByteToWideChar(codepage, 0, mbcs2Convert, lenMbcs, NULL, 0);
-		_wideCharAllocLen = initSize;
+	}
 	// Otherwise, test if we are cutting a multi-byte character at end of buffer
 	else if(lenMbcs != -1 && codepage == CP_UTF8) // For UTF-8, we know how to test it
 	{
 		int indexOfLastChar = Utf8::characterStart(mbcs2Convert, lenMbcs-1); // get index of last character
 		if (indexOfLastChar != 0 && !Utf8::isValid(mbcs2Convert+indexOfLastChar, lenMbcs-indexOfLastChar)) // if it is not valid we do not process it right now (unless its the only character in string, to ensure that we always progress, e.g. that bytesNotProcessed < lenMbcs)
 		{
 			bytesNotProcessed = lenMbcs-indexOfLastChar;
 		}
 		lenWc = MultiByteToWideChar(codepage, 0, mbcs2Convert, lenMbcs-bytesNotProcessed, NULL, 0);
 	}
 	else // For other encodings, ask system if there are any invalid characters; note that it will not correctly know if last character is cut when there are invalid characters inside the text
 	{
 		lenWc = MultiByteToWideChar(codepage, (lenMbcs == -1) ? 0 : MB_ERR_INVALID_CHARS, mbcs2Convert, lenMbcs, NULL, 0);
 		if (lenWc == 0 && GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
 		{
 			// Test without last byte
 			if (lenMbcs > 1) lenWc = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, mbcs2Convert, lenMbcs-1, NULL, 0);
 			if (lenWc == 0) // don't have to check that the error is still ERROR_NO_UNICODE_TRANSLATION, since only the length parameter changed
 			{
 				// TODO: should warn user about incorrect loading due to invalid characters
 				// We still load the file, but the system will either strip or replace invalid characters (including the last character, if cut in half)
 				lenWc = MultiByteToWideChar(codepage, 0, mbcs2Convert, lenMbcs, NULL, 0);
 			}
 			else
 			{
 				// We found a valid text by removing one byte.
 				bytesNotProcessed = 1;
 			}
 		}
 	}
-	int len = MultiByteToWideChar(codepage, 0, mbcs2Convert, -1, _wideCharStr, 0);
+	if (lenWc > 0)
 	if (len > 0)
 	{
-		if ((size_t)len > _wideCharAllocLen)
+		_wideCharStr.sizeTo(lenWc);
-		{
+		MultiByteToWideChar(codepage, 0, mbcs2Convert, lenMbcs-bytesNotProcessed, _wideCharStr, lenWc);
 			delete [] _wideCharStr;
 			_wideCharAllocLen = len;
 			_wideCharStr = new wchar_t[_wideCharAllocLen];
 		}
 		MultiByteToWideChar(codepage, 0, mbcs2Convert, -1, _wideCharStr, len);
 	}
 	else
-		_wideCharStr[0] = 0;
+		_wideCharStr.empty();
 	if(pLenWc) *pLenWc = lenWc;
 	if(pBytesNotProcessed) *pBytesNotProcessed = bytesNotProcessed;
 	return _wideCharStr;
 }
@ -246,21 +279,10 @@ const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, UINT c
 // which are converted to the corresponding indexes in the returned wchar_t string.
 const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, UINT codepage, int *mstart, int *mend)
 {
-	if (!_wideCharStr)
+	int len = MultiByteToWideChar(codepage, 0, mbcs2Convert, -1, NULL, 0);
 	{
 		_wideCharStr = new wchar_t[initSize];
 		_wideCharAllocLen = initSize;
 	}
 	int len = MultiByteToWideChar(codepage, 0, mbcs2Convert, -1, _wideCharStr, 0);
 	if (len > 0)
 	{
-		if (len > int(_wideCharAllocLen))
+		_wideCharStr.sizeTo(len);
 		{
 			delete [] _wideCharStr;
 			_wideCharAllocLen = len;
 			_wideCharStr = new wchar_t[_wideCharAllocLen];
 		}
 		len = MultiByteToWideChar(codepage, 0, mbcs2Convert, -1, _wideCharStr, len);
 		if ((size_t)*mstart < strlen(mbcs2Convert) && (size_t)*mend <= strlen(mbcs2Convert))
@ -276,61 +298,40 @@ const wchar_t * WcharMbcsConvertor::char2wchar(const char * mbcs2Convert, UINT c
 	}
 	else
 	{
-		_wideCharStr[0] = 0;
+		_wideCharStr.empty();
 		*mstart = 0;
 		*mend = 0;
 	}
 	return _wideCharStr;
 } 
-const char * WcharMbcsConvertor::wchar2char(const wchar_t * wcharStr2Convert, UINT codepage) 
+const char * WcharMbcsConvertor::wchar2char(const wchar_t * wcharStr2Convert, UINT codepage, int lenWc, int *pLenMbcs) 
 {
-	if (!_multiByteStr)
+	int lenMbcs = WideCharToMultiByte(codepage, 0, wcharStr2Convert, lenWc, NULL, 0, NULL, NULL);
 	if (lenMbcs > 0)
 	{
-		_multiByteStr = new char[initSize];
+		_multiByteStr.sizeTo(lenMbcs);
-		_multiByteAllocLen = initSize;
+		WideCharToMultiByte(codepage, 0, wcharStr2Convert, lenWc, _multiByteStr, lenMbcs, NULL, NULL);
 	}
 	int len = WideCharToMultiByte(codepage, 0, wcharStr2Convert, -1, _multiByteStr, 0, NULL, NULL);
 	if (len > 0)
 	{
 		if ((size_t)len > _multiByteAllocLen)
 		{
 			delete [] _multiByteStr;
 			_multiByteAllocLen = len;
 			_multiByteStr = new char[_multiByteAllocLen];
 		}
 		WideCharToMultiByte(codepage, 0, wcharStr2Convert, -1, _multiByteStr, len, NULL, NULL);
 	}
 	else
-		_multiByteStr[0] = 0;
+		_multiByteStr.empty();
 	if(pLenMbcs) *pLenMbcs = lenMbcs;
 	return _multiByteStr;
 }
 const char * WcharMbcsConvertor::wchar2char(const wchar_t * wcharStr2Convert, UINT codepage, long *mstart, long *mend) 
 {
-	if (!_multiByteStr)
+	int len = WideCharToMultiByte(codepage, 0, wcharStr2Convert, -1, NULL, 0, NULL, NULL);
 	{
 		_multiByteStr = new char[initSize];
 		_multiByteAllocLen = initSize;
 	}
 	int len = WideCharToMultiByte(codepage, 0, wcharStr2Convert, -1, _multiByteStr, 0, NULL, NULL);
 	if (len > 0)
 	{
-		if ((size_t)len > _multiByteAllocLen)
+		_multiByteStr.sizeTo(len);
 		{
 			delete [] _multiByteStr;
 			_multiByteAllocLen = len;
 			_multiByteStr = new char[_multiByteAllocLen];
 		}
 		len = WideCharToMultiByte(codepage, 0, wcharStr2Convert, -1, _multiByteStr, len, NULL, NULL); // not needed?
        if ((int)*mstart < lstrlenW(wcharStr2Convert) && (int)*mend < lstrlenW(wcharStr2Convert))
        {
-			*mstart = WideCharToMultiByte(codepage, 0, wcharStr2Convert, *mstart, _multiByteStr, 0, NULL, NULL);
+			*mstart = WideCharToMultiByte(codepage, 0, wcharStr2Convert, *mstart, NULL, 0, NULL, NULL);
-			*mend = WideCharToMultiByte(codepage, 0, wcharStr2Convert, *mend, _multiByteStr, 0, NULL, NULL);
+			*mend = WideCharToMultiByte(codepage, 0, wcharStr2Convert, *mend, NULL, 0, NULL, NULL);
 			if (*mstart >= len || *mend >= len)
 			{
 				*mstart = 0;
@ -339,7 +340,7 @@ const char * WcharMbcsConvertor::wchar2char(const wchar_t * wcharStr2Convert, UI
 		}
 	}
 	else
-		_multiByteStr[0] = 0;
+		_multiByteStr.empty();
 	return _multiByteStr;
 }
--- a/PowerEditor/src/MISC/Common/Common.h
+++ b/PowerEditor/src/MISC/Common/Common.h
@ -94,32 +94,56 @@ public:
 	static WcharMbcsConvertor * getInstance() {return _pSelf;};
 	static void destroyInstance() {delete _pSelf;};
-	const wchar_t * char2wchar(const char *mbStr, UINT codepage);
+	const wchar_t * char2wchar(const char *mbStr, UINT codepage, int lenIn=-1, int *pLenOut=NULL, int *pBytesNotProcessed=NULL);
 	const wchar_t * char2wchar(const char *mbcs2Convert, UINT codepage, int *mstart, int *mend);
-	const char * wchar2char(const wchar_t *wcStr, UINT codepage);
+	const char * wchar2char(const wchar_t *wcStr, UINT codepage, int lenIn=-1, int *pLenOut=NULL);
 	const char * wchar2char(const wchar_t *wcStr, UINT codepage, long *mstart, long *mend);
-	const char * encode(UINT fromCodepage, UINT toCodepage, const char *txt2Encode) {
+	const char * encode(UINT fromCodepage, UINT toCodepage, const char *txt2Encode, int lenIn=-1, int *pLenOut=NULL, int *pBytesNotProcessed=NULL) {
-        const wchar_t * strW = char2wchar(txt2Encode, fromCodepage);
+		int lenWc = 0;
-        return wchar2char(strW, toCodepage);
+        const wchar_t * strW = char2wchar(txt2Encode, fromCodepage, lenIn, &lenWc, pBytesNotProcessed);
        return wchar2char(strW, toCodepage, lenWc, pLenOut);
    };
 protected:
-	WcharMbcsConvertor() : _multiByteStr(NULL), _wideCharStr(NULL), _multiByteAllocLen(0), _wideCharAllocLen(0), initSize(1024) {
+	WcharMbcsConvertor() {
 	};
 	~WcharMbcsConvertor() {
 		if (_multiByteStr)
 			delete [] _multiByteStr;
 		if (_wideCharStr)
 			delete [] _wideCharStr;
 	};
 	static WcharMbcsConvertor * _pSelf;
-	const int initSize;
+	template <class T>
-	char *_multiByteStr;
+	class StringBuffer {
-	size_t _multiByteAllocLen;
+	public:
-	wchar_t *_wideCharStr;
+		StringBuffer() : _str(0), _allocLen(0) { }
-	size_t _wideCharAllocLen;
+		~StringBuffer() { if(_str) delete [] _str; }
 		void sizeTo(size_t size) {
 			if(_allocLen < size)
 			{
 				if(_allocLen) delete[] _str;
 				_allocLen = max(size, initSize);
 				_str = new T[_allocLen];
 			}
 		}
 		void empty() {
 			static T nullStr = 0; // routines may return an empty string, with null terminator, without allocating memory; a pointer to this null character will be returned in that case
 			if(_allocLen == 0)
 				_str = &nullStr;
 			else
 				_str[0] = 0;
 		}
 		operator T*() { return _str; }
 	protected:
 		static const int initSize = 1024;
 		size_t _allocLen;
 		T* _str;
 	};
 	StringBuffer<char> _multiByteStr;
 	StringBuffer<wchar_t> _wideCharStr;
 private:
 	// Since there's no public ctor, we need to void the default assignment operator.
--- a/PowerEditor/src/Notepad_plus.cpp
+++ b/PowerEditor/src/Notepad_plus.cpp
@ -31,16 +31,15 @@
 #include "xmlMatchedTagsHighlighter.h"
 #include "EncodingMapper.h"
 enum tb_stat {tb_saved, tb_unsaved, tb_ro};
 #define DIR_LEFT true
 #define DIR_RIGHT false
 int docTabIconIDs[] = {IDI_SAVED_ICON, IDI_UNSAVED_ICON, IDI_READONLY_ICON};
 ToolBarButtonUnit toolBarIcons[] = {
 	{IDM_FILE_NEW,		IDI_NEW_OFF_ICON,		IDI_NEW_ON_ICON,		IDI_NEW_OFF_ICON, IDR_FILENEW},
-	{IDM_FILE_OPEN,		IDI_OPEN_OFF_ICON,		IDI_OPEN_ON_ICON,		IDI_NEW_OFF_ICON, IDR_FILEOPEN},
+	{IDM_FILE_OPEN,		IDI_OPEN_OFF_ICON,		IDI_OPEN_ON_ICON,		IDI_OPEN_OFF_ICON, IDR_FILEOPEN},
 	{IDM_FILE_SAVE,		IDI_SAVE_OFF_ICON,		IDI_SAVE_ON_ICON,		IDI_SAVE_DISABLE_ICON, IDR_FILESAVE},
 	{IDM_FILE_SAVEALL,	IDI_SAVEALL_OFF_ICON,	IDI_SAVEALL_ON_ICON,	IDI_SAVEALL_DISABLE_ICON, IDR_SAVEALL},
 	{IDM_FILE_CLOSE,	IDI_CLOSE_OFF_ICON,		IDI_CLOSE_ON_ICON,		IDI_CLOSE_OFF_ICON, IDR_CLOSEFILE},
@ -2311,7 +2310,26 @@ size_t Notepad_plus::getSelectedCharNumber(UniMode u)
 	}
 	return result;
 }
-/*
+
 #ifdef _OPENMP
 #include <omp.h>
 #endif
 static inline size_t countUtf8Characters(unsigned char *buf, int pos, int endpos)
 {
 	size_t result = 0;
 	while(pos < endpos)
 	{
 		unsigned char c = buf[pos++];
 		if ((c&0xc0) == 0x80 // do not count unexpected continuation bytes (this handles the case where an UTF-8 character is split in the middle)
 			|| c == '\n' || c == '\r') continue; // do not count end of lines
 		if (c >= 0xc0) pos += utflen[(c & 0x30) >>  4];
 		result++;
 	}
 	return result;
 }
 size_t Notepad_plus::getCurrentDocCharCount(size_t numLines, UniMode u)
 {
 	if (u != uniUTF8 && u != uniCookie)
@ -2322,23 +2340,39 @@ size_t Notepad_plus::getCurrentDocCharCount(size_t numLines, UniMode u)
 		result -= lines;
 		return ((int)result < 0)?0:result;
 	}
-	else
+ 	else
-	{
+ 	{
 		// Note that counting is not well defined for invalid UTF-8 characters.
 		// This method is O(filelength) regardless of the number of characters we count (due to SCI_GETCHARACTERPOINTER);
 		// it would not be appropriate for counting characters in a small selection.
 		size_t result = 0;
-		for (size_t line=0; line<numLines; line++)
+
 		size_t endpos = _pEditView->execute(SCI_GETLENGTH);
 		unsigned char* buf = (unsigned char*)_pEditView->execute(SCI_GETCHARACTERPOINTER); // Scintilla doc sais the pointer can be invalidated by any other "execute"
 #ifdef _OPENMP // parallel counting of characters with OpenMP
 		if(endpos > 50000) // starting threads takes time; for small files it is better to simply count in one thread
 		{
-			size_t endpos = _pEditView->execute(SCI_GETLINEENDPOSITION, line);
+			#pragma omp parallel reduction(+: result)
 			for (size_t pos = _pEditView->execute(SCI_POSITIONFROMLINE, line); pos < endpos; pos++)
 			{
-				unsigned char c = 0xf0 & (unsigned char)_pEditView->execute(SCI_GETCHARAT, pos);
+				// split in chunks of same size (except last chunk if it's not evenly divisible)
-				if (c >= 0xc0) pos += utflen[(c & 0x30) >>  4];
+				unsigned int num_threads = omp_get_num_threads();
-				result++;
+				unsigned int thread_num = omp_get_thread_num();
 				size_t chunk_size = endpos/num_threads;
 				size_t pos = chunk_size*thread_num;
 				size_t endpos_local = (thread_num == num_threads-1) ? endpos : pos+chunk_size;
 				result = countUtf8Characters(buf, pos, endpos_local);
 			}
 		}
-		return result;
+		else
-	}
+#endif
 		{
 			result = countUtf8Characters(buf, 0, endpos);
 		}
 		return result;
 	}
 }
-*/
+
 bool Notepad_plus::isFormatUnicode(UniMode u)
 {
@ -2377,6 +2411,8 @@ size_t Notepad_plus::getSelectedBytes()
 void Notepad_plus::updateStatusBar() 
 {
 	if(!NppParameters::getInstance()->getNppGUI()._statusBarShow) return; // do not update if status bar not shown
 	UniMode u = _pEditView->getCurrentBuffer()->getUnicodeMode();
    TCHAR strLnCol[64];
--- a/PowerEditor/src/Notepad_plus.h
+++ b/PowerEditor/src/Notepad_plus.h
@ -541,7 +541,7 @@ private:
 	void updateStatusBar();
 	size_t getSelectedCharNumber(UniMode);
-	//size_t getCurrentDocCharCount(size_t numLines, UniMode u);
+	size_t getCurrentDocCharCount(size_t numLines, UniMode u);
 	int getSelectedAreas();
 	int _numSel;
 	size_t getSelectedBytes();
--- a/PowerEditor/src/NppBigSwitch.cpp
+++ b/PowerEditor/src/NppBigSwitch.cpp
@ -1666,7 +1666,8 @@ LRESULT Notepad_plus::process(HWND hwnd, UINT Message, WPARAM wParam, LPARAM lPa
 			_pPublicInterface->getClientRect(rc);
 			nppGUI._statusBarShow = show;
-            _statusBar.display(nppGUI._statusBarShow);
+			if(show)
 				_statusBar.display(nppGUI._statusBarShow);
            ::SendMessage(_pPublicInterface->getHSelf(), WM_SIZE, SIZE_RESTORED, MAKELONG(rc.bottom, rc.right));
            return oldVal;
        }
--- a/PowerEditor/src/ScitillaComponent/Buffer.cpp
+++ b/PowerEditor/src/ScitillaComponent/Buffer.cpp
@ -564,9 +564,10 @@ bool FileManager::saveBuffer(BufferID id, const TCHAR * filename, bool isCopy) {
 		char data[blockSize + 1];
 		int lengthDoc = _pscratchTilla->getCurrentDocLen();
-		for (int i = 0; i < lengthDoc; i += blockSize)
+		int grabSize;
 		for (int i = 0; i < lengthDoc; i += grabSize)
 		{
-			int grabSize = lengthDoc - i;
+			grabSize = lengthDoc - i;
 			if (grabSize > blockSize) 
 				grabSize = blockSize;
@ -574,8 +575,11 @@ bool FileManager::saveBuffer(BufferID id, const TCHAR * filename, bool isCopy) {
 			if (encoding != -1)
 			{
 				WcharMbcsConvertor *wmc = WcharMbcsConvertor::getInstance();
-				const char *newData = wmc->encode(SC_CP_UTF8, encoding, data);
+				int newDataLen = 0;
-				UnicodeConvertor.fwrite(newData, strlen(newData));
+				int incompleteMultibyteChar = 0;
 				const char *newData = wmc->encode(SC_CP_UTF8, encoding, data, grabSize, &newDataLen, &incompleteMultibyteChar);
 				grabSize -= incompleteMultibyteChar;
 				UnicodeConvertor.fwrite(newData, newDataLen);
 			}
 			else
 			{
@ -692,26 +696,11 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
 		size_t lenFile = 0;
 		size_t lenConvert = 0;	//just in case conversion results in 0, but file not empty
 		bool isFirstTime = true;
-		int incompleteMultibyteChar = 0; //we do not want to call SCI_APPENDTEXT with an incomplete character if the buffer ends in the middle of one
+		int incompleteMultibyteChar = 0;
 		char incompleteMultibyteChar_first = 0;
 		do {
 			lenFile = fread(data+incompleteMultibyteChar, 1, blockSize-incompleteMultibyteChar, fp) + incompleteMultibyteChar;
 			// we might not know yet the encoding; we ensure that valid UTF-8 characters will not be cut in the middle, without causing problems if it's not UTF-8
 			// TODO: all expressions for testing UTF chars should be put in inline functions, not directly in the code
 			if(lenFile == blockSize && (data[blockSize-1]&0x80) != 0) // possible multi-byte character that could be cut due to blockSize
 			{
 				incompleteMultibyteChar = 1;
 				while(incompleteMultibyteChar < 6 // longest "defined" UTF-8 code (including restricted codes not yet defined by Unicode)
 					&& (data[blockSize-incompleteMultibyteChar]&0xC0) == 0x80) // is possibly a continuation byte in a multi-byte character
 					++incompleteMultibyteChar;
 				// leave for the next buffer all bytes that could potentially be multi-byte UTF-8 at the end of current buffer
 				lenFile -= incompleteMultibyteChar;
 				incompleteMultibyteChar_first = data[lenFile]; // this byte can be erased by following code to put a null terminator
 			}
 			else incompleteMultibyteChar = 0;
            // check if file contain any BOM
            if (isFirstTime) 
            {
@ -726,10 +715,19 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
 			if (encoding != -1)
 			{
-				data[lenFile] = '\0';
+				if (encoding == SC_CP_UTF8)
-				WcharMbcsConvertor *wmc = WcharMbcsConvertor::getInstance();
+				{
-				const char *newData = wmc->encode(encoding, SC_CP_UTF8, data);
+					// Pass through UTF-8 (this does not check validity of characters, thus inserting a multi-byte character in two halfs is working)
-				_pscratchTilla->execute(SCI_APPENDTEXT, strlen(newData), (LPARAM)newData);
+					_pscratchTilla->execute(SCI_APPENDTEXT, lenFile, (LPARAM)data);
 				}
 				else
 				{
 					WcharMbcsConvertor *wmc = WcharMbcsConvertor::getInstance();
 					int newDataLen = 0;
 					const char *newData = wmc->encode(encoding, SC_CP_UTF8, data, lenFile, &newDataLen, &incompleteMultibyteChar);
 					_pscratchTilla->execute(SCI_APPENDTEXT, newDataLen, (LPARAM)newData);
 				}
 				if (format == -1)
 					format = getEOLFormatForm(data);
 			}
@ -743,7 +741,6 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
 			{
 				// copy bytes to next buffer
 				memcpy(data, data+blockSize-incompleteMultibyteChar, incompleteMultibyteChar);
 				data[0] = incompleteMultibyteChar_first;
 			}
 		} while (lenFile > 0);
--- a/PowerEditor/src/Utf8.h
+++ b/PowerEditor/src/Utf8.h
@ -0,0 +1,55 @@
 // Simple functions to test UTF-8 characters.
 // Copyright (C)2010 Francois-R.Boyer@PolyMtl.ca
 // First version 2010-08
 //
 // Written for notepad++, and distributed under same license:
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either
 // version 2 of the License, or (at your option) any later version.
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 namespace Utf8 { // could be a static class, instead of a namespace, if it needs private members
 	// basic classification of UTF-8 bytes
 	inline static bool isSingleByte(UCHAR c)       { return c < 0x80; }
 	inline static bool isPartOfMultibyte(UCHAR c)  { return c >= 0x80; }
 	inline static bool isFirstOfMultibyte(UCHAR c) { return c >= 0xC2 && c < 0xF5; } // 0xF5 to 0xFD are defined by UTF-8, but are not currently valid Unicode
 	inline static bool isContinuation(UCHAR c)     { return (c & 0xC0) == 0x80; }
 	inline static bool isValid(UCHAR c)            { return c < 0xC0 || isFirstOfMultibyte(c); }	// validates a byte, out of context
 	// number of continuation bytes for a given valid first character (0 for single byte characters)
 	inline static int  continuationBytes(UCHAR c)  {
 		static const char _len[] = { 1,1,2,3 };
 		return (c < 0xC0) ? 0 : _len[(c & 0x30) >>  4];
 	} 
 	// validates a full character
 	inline static bool isValid(const char* buf, int buflen) {
 		if(isSingleByte(buf[0])) return true; // single byte is valid
 		if(!isFirstOfMultibyte(buf[0])) return false; // not single byte, nor valid multi-byte first byte
 		int charContinuationBytes = continuationBytes(buf[0]);
 		if(buflen < charContinuationBytes+1) return false; // character does not fit in buffer
 		for(int i = charContinuationBytes; i>0; --i)
 			if(!isContinuation(*(++buf))) return false; // not enough continuation bytes
 		return true;  // the character is valid (if there are too many continuation bytes, it is the next character that will be invalid)
 	}
 	// rewinds to the first byte of a multi-byte character for any valid UTF-8 (and will not rewind too much on any other input)
 	inline static int characterStart(const char* buf, int startingIndex) {
 		int charContinuationBytes = 0;
 		while(charContinuationBytes < startingIndex	// rewind past start of buffer?
 			&& charContinuationBytes < 5	// UTF-8 support up to 5 continuation bytes (but valid sequences currently do not have more than 3)
 			&& isContinuation(buf[startingIndex-charContinuationBytes])
 			)
 			++charContinuationBytes;
 		return startingIndex-charContinuationBytes;
 	}
 };
--- a/PowerEditor/src/resource.h
+++ b/PowerEditor/src/resource.h
@ -18,12 +18,12 @@
 #ifndef RESOURCE_H
 #define RESOURCE_H
-#define NOTEPAD_PLUS_VERSION TEXT("Notepad++ v5.7")
+#define NOTEPAD_PLUS_VERSION TEXT("Notepad++ v5.7.1")
 // should be X.Y : ie. if VERSION_DIGITALVALUE == 4, 7, 1, 0 , then X = 4, Y = 71 
 // ex : #define VERSION_VALUE TEXT("5.63\0")
-#define VERSION_VALUE TEXT("5.7\0")
+#define VERSION_VALUE TEXT("5.71\0")
-#define VERSION_DIGITALVALUE 5, 7, 0, 0
+#define VERSION_DIGITALVALUE 5, 7, 1, 0
 #ifdef UNICODE
 #define UNICODE_ANSI_MODE TEXT("(UNICODE)")