notepad-plus-plus-legacy/scintilla/src/UniConversion.h

// Scintilla source code edit control
/** @file UniConversion.h
 ** Functions to handle UTF-8 and UTF-16 strings.
 **/
// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.

#ifndef UNICONVERSION_H
#define UNICONVERSION_H

namespace Scintilla {

const int UTF8MaxBytes = 4;

const int unicodeReplacementChar = 0xFFFD;

size_t UTF8Length(std::wstring_view wsv) noexcept;
size_t UTF8PositionFromUTF16Position(std::string_view u8Text, size_t positionUTF16) noexcept;
void UTF8FromUTF16(std::wstring_view wsv, char *putf, size_t len);
void UTF8FromUTF32Character(int uch, char *putf) noexcept;
size_t UTF16Length(std::string_view svu8) noexcept;
size_t UTF16FromUTF8(std::string_view svu8, wchar_t *tbuf, size_t tlen);
size_t UTF32Length(std::string_view svu8) noexcept;
size_t UTF32FromUTF8(std::string_view svu8, unsigned int *tbuf, size_t tlen);
// WStringFromUTF8 does the right thing when wchar_t is 2 or 4 bytes so
// works on both Windows and Unix.
std::wstring WStringFromUTF8(std::string_view svu8);
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept;
bool UTF8IsValid(std::string_view svu8) noexcept;
std::string FixInvalidUTF8(const std::string &text);

extern const unsigned char UTF8BytesOfLead[256];

inline int UnicodeFromUTF8(const unsigned char *us) noexcept {
	switch (UTF8BytesOfLead[us[0]]) {
	case 1:
		return us[0];
	case 2:
		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
	case 3:
		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
	default:
		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
	}
}

inline constexpr bool UTF8IsTrailByte(unsigned char ch) noexcept {
	return (ch >= 0x80) && (ch < 0xc0);
}

inline constexpr bool UTF8IsAscii(int ch) noexcept {
	return ch < 0x80;
}

enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };
int UTF8Classify(const unsigned char *us, size_t len) noexcept;
inline int UTF8Classify(std::string_view sv) noexcept {
	return UTF8Classify(reinterpret_cast<const unsigned char *>(sv.data()), sv.length());
}

// Similar to UTF8Classify but returns a length of 1 for invalid bytes
// instead of setting the invalid flag
int UTF8DrawBytes(const unsigned char *us, int len) noexcept;

// Line separator is U+2028 \xe2\x80\xa8
// Paragraph separator is U+2029 \xe2\x80\xa9
const int UTF8SeparatorLength = 3;
inline bool UTF8IsSeparator(const unsigned char *us) noexcept {
	return (us[0] == 0xe2) && (us[1] == 0x80) && ((us[2] == 0xa8) || (us[2] == 0xa9));
}

// NEL is U+0085 \xc2\x85
const int UTF8NELLength = 2;
inline bool UTF8IsNEL(const unsigned char *us) noexcept {
	return (us[0] == 0xc2) && (us[1] == 0x85);
}

enum { SURROGATE_LEAD_FIRST = 0xD800 };
enum { SURROGATE_LEAD_LAST = 0xDBFF };
enum { SURROGATE_TRAIL_FIRST = 0xDC00 };
enum { SURROGATE_TRAIL_LAST = 0xDFFF };
enum { SUPPLEMENTAL_PLANE_FIRST = 0x10000 };

inline constexpr unsigned int UTF16CharLength(wchar_t uch) noexcept {
	return ((uch >= SURROGATE_LEAD_FIRST) && (uch <= SURROGATE_LEAD_LAST)) ? 2 : 1;
}

inline constexpr unsigned int UTF16LengthFromUTF8ByteCount(unsigned int byteCount) noexcept {
	return (byteCount < 4) ? 1 : 2;
}

}

#endif
Moved scintilla in trunk remotely git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@461 f5eea248-9336-0410-98b8-ebc06183d4e3 2009-04-24 23:35:41 +00:00			`// Scintilla source code edit control`
			`/** @file UniConversion.h`
[UPDATE] Update Scintilla from 2.0.1 to 2.1.2 git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@640 f5eea248-9336-0410-98b8-ebc06183d4e3 2010-07-12 22:19:51 +00:00			`** Functions to handle UTF-8 and UTF-16 strings.`
Moved scintilla in trunk remotely git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@461 f5eea248-9336-0410-98b8-ebc06183d4e3 2009-04-24 23:35:41 +00:00			`**/`
			`// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>`
			`// The License.txt file describes the conditions under which this software may be distributed.`

[UPGRADE] Upgrade Scintilla from v3.34 to v3.56. 2015-06-07 21:19:26 +00:00			`#ifndef UNICONVERSION_H`
			`#define UNICONVERSION_H`

			`namespace Scintilla {`

[UPDATE] Update Scintilla to 3.3.4 [BUG_FIXED] (Author: Dave Brotherstone) Fix scintilla crash bug while closing a document. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@1104 f5eea248-9336-0410-98b8-ebc06183d4e3 2013-08-28 00:44:27 +00:00			`const int UTF8MaxBytes = 4;`

[UPGRADE] Upgrade Scintilla from v3.34 to v3.56. 2015-06-07 21:19:26 +00:00			`const int unicodeReplacementChar = 0xFFFD;`

Upgrade Scintilla from 4.1.4 to 4.2.0 Fix #5822 2019-07-21 13:26:02 +00:00			`size_t UTF8Length(std::wstring_view wsv) noexcept;`
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`size_t UTF8PositionFromUTF16Position(std::string_view u8Text, size_t positionUTF16) noexcept;`
			`void UTF8FromUTF16(std::wstring_view wsv, char *putf, size_t len);`
Upgrade Scintilla from 4.1.4 to 4.2.0 Fix #5822 2019-07-21 13:26:02 +00:00			`void UTF8FromUTF32Character(int uch, char *putf) noexcept;`
			`size_t UTF16Length(std::string_view svu8) noexcept;`
			`size_t UTF16FromUTF8(std::string_view svu8, wchar_t *tbuf, size_t tlen);`
			`size_t UTF32Length(std::string_view svu8) noexcept;`
			`size_t UTF32FromUTF8(std::string_view svu8, unsigned int *tbuf, size_t tlen);`
			`// WStringFromUTF8 does the right thing when wchar_t is 2 or 4 bytes so`
			`// works on both Windows and Unix.`
			`std::wstring WStringFromUTF8(std::string_view svu8);`
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept;`
Upgrade Scintilla from 4.1.4 to 4.2.0 Fix #5822 2019-07-21 13:26:02 +00:00			`bool UTF8IsValid(std::string_view svu8) noexcept;`
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`std::string FixInvalidUTF8(const std::string &text);`

			`extern const unsigned char UTF8BytesOfLead[256];`
Moved scintilla in trunk remotely git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@461 f5eea248-9336-0410-98b8-ebc06183d4e3 2009-04-24 23:35:41 +00:00
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`inline int UnicodeFromUTF8(const unsigned char *us) noexcept {`
			`switch (UTF8BytesOfLead[us[0]]) {`
			`case 1:`
			`return us[0];`
			`case 2:`
			`return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);`
			`case 3:`
			`return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);`
			`default:`
			`return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);`
			`}`
			`}`
[UPDATE] Update Scintilla to 3.3.4 [BUG_FIXED] (Author: Dave Brotherstone) Fix scintilla crash bug while closing a document. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@1104 f5eea248-9336-0410-98b8-ebc06183d4e3 2013-08-28 00:44:27 +00:00
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`inline constexpr bool UTF8IsTrailByte(unsigned char ch) noexcept {`
[UPDATE] Update Scintilla to 3.3.4 [BUG_FIXED] (Author: Dave Brotherstone) Fix scintilla crash bug while closing a document. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@1104 f5eea248-9336-0410-98b8-ebc06183d4e3 2013-08-28 00:44:27 +00:00			`return (ch >= 0x80) && (ch < 0xc0);`
			`}`

Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`inline constexpr bool UTF8IsAscii(int ch) noexcept {`
[UPDATE] Update Scintilla to 3.3.4 [BUG_FIXED] (Author: Dave Brotherstone) Fix scintilla crash bug while closing a document. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@1104 f5eea248-9336-0410-98b8-ebc06183d4e3 2013-08-28 00:44:27 +00:00			`return ch < 0x80;`
			`}`

			`enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };`
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`int UTF8Classify(const unsigned char *us, size_t len) noexcept;`
			`inline int UTF8Classify(std::string_view sv) noexcept {`
			`return UTF8Classify(reinterpret_cast<const unsigned char *>(sv.data()), sv.length());`
			`}`
[UPDATE] Update Scintilla to 3.3.4 [BUG_FIXED] (Author: Dave Brotherstone) Fix scintilla crash bug while closing a document. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@1104 f5eea248-9336-0410-98b8-ebc06183d4e3 2013-08-28 00:44:27 +00:00
[UPGRADE] Upgrade Scintilla from v3.34 to v3.56. 2015-06-07 21:19:26 +00:00			`// Similar to UTF8Classify but returns a length of 1 for invalid bytes`
			`// instead of setting the invalid flag`
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`int UTF8DrawBytes(const unsigned char *us, int len) noexcept;`
[UPGRADE] Upgrade Scintilla from v3.34 to v3.56. 2015-06-07 21:19:26 +00:00
[UPDATE] Update Scintilla to 3.3.4 [BUG_FIXED] (Author: Dave Brotherstone) Fix scintilla crash bug while closing a document. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@1104 f5eea248-9336-0410-98b8-ebc06183d4e3 2013-08-28 00:44:27 +00:00			`// Line separator is U+2028 \xe2\x80\xa8`
			`// Paragraph separator is U+2029 \xe2\x80\xa9`
			`const int UTF8SeparatorLength = 3;`
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`inline bool UTF8IsSeparator(const unsigned char *us) noexcept {`
[UPDATE] Update Scintilla to 3.3.4 [BUG_FIXED] (Author: Dave Brotherstone) Fix scintilla crash bug while closing a document. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@1104 f5eea248-9336-0410-98b8-ebc06183d4e3 2013-08-28 00:44:27 +00:00			`return (us[0] == 0xe2) && (us[1] == 0x80) && ((us[2] == 0xa8) \|\| (us[2] == 0xa9));`
			`}`

			`// NEL is U+0085 \xc2\x85`
			`const int UTF8NELLength = 2;`
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`inline bool UTF8IsNEL(const unsigned char *us) noexcept {`
[UPDATE] Update Scintilla to 3.3.4 [BUG_FIXED] (Author: Dave Brotherstone) Fix scintilla crash bug while closing a document. git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@1104 f5eea248-9336-0410-98b8-ebc06183d4e3 2013-08-28 00:44:27 +00:00			`return (us[0] == 0xc2) && (us[1] == 0x85);`
			`}`
[UPGRADE] Upgrade Scintilla from v3.34 to v3.56. 2015-06-07 21:19:26 +00:00
			`enum { SURROGATE_LEAD_FIRST = 0xD800 };`
			`enum { SURROGATE_LEAD_LAST = 0xDBFF };`
Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`enum { SURROGATE_TRAIL_FIRST = 0xDC00 };`
			`enum { SURROGATE_TRAIL_LAST = 0xDFFF };`
			`enum { SUPPLEMENTAL_PLANE_FIRST = 0x10000 };`

			`inline constexpr unsigned int UTF16CharLength(wchar_t uch) noexcept {`
[UPGRADE] Upgrade Scintilla from v3.34 to v3.56. 2015-06-07 21:19:26 +00:00			`return ((uch >= SURROGATE_LEAD_FIRST) && (uch <= SURROGATE_LEAD_LAST)) ? 2 : 1;`
			`}`

Upgrade Scintilla from v3.56 to v4.14 2019-05-04 18:14:48 +00:00			`inline constexpr unsigned int UTF16LengthFromUTF8ByteCount(unsigned int byteCount) noexcept {`
			`return (byteCount < 4) ? 1 : 2;`
			`}`

[UPGRADE] Upgrade Scintilla from v3.34 to v3.56. 2015-06-07 21:19:26 +00:00			`}`

			`#endif`