Scintilla: Buffer: fixed invalid read via strlen when loading a file

When loading a file via `FileManager::loadFileData`, a fixed-length buffer is filled via `fread`. Then, in some cases, a conversion is done with the help of `Utf8_16_Read`. However, the method `Utf8_16_Read::convert` performs a call to `strlen` on this buffer. This is obviously wrong: `\0` char should be accepted (even if a bit strange) and the buffer is not zero-terminated. The changes merely consist in adding an additional parameter `length` to not have to guess the size of the buffer.
2015-05-30 19:47:44 +02:00 · 2015-05-30 19:47:44 +02:00 · d09947d22d
commit d09947d22d
parent 17e8ca3c15
4 changed files with 30 additions and 23 deletions
--- a/PowerEditor/src/ScitillaComponent/Buffer.cpp
+++ b/PowerEditor/src/ScitillaComponent/Buffer.cpp
@ -508,11 +508,10 @@ BufferID FileManager::loadFile(const TCHAR * filename, Document doc, int encodin
 		if (encoding == -1)
 		{
 			// 3 formats : WIN_FORMAT, UNIX_FORMAT and MAC_FORMAT
-			if (UnicodeConvertor.getNewBuf()) 
+			if (nullptr != UnicodeConvertor.getNewBuf()) 
 			{
-				int format = getEOLFormatForm(UnicodeConvertor.getNewBuf());
+				int format = getEOLFormatForm(UnicodeConvertor.getNewBuf(), UnicodeConvertor.getNewSize());
 				buf->setFormat(format == -1?WIN_FORMAT:(formatType)format);
 			}
 			else
 			{
@ -569,7 +568,7 @@ bool FileManager::reloadBuffer(BufferID id)
 		{
 			if (UnicodeConvertor.getNewBuf()) 
 			{
-				int format = getEOLFormatForm(UnicodeConvertor.getNewBuf());
+				int format = getEOLFormatForm(UnicodeConvertor.getNewBuf(), UnicodeConvertor.getNewSize());
 				buf->setFormat(format == -1?WIN_FORMAT:(formatType)format);
 			}
 			else
@ -1241,7 +1240,7 @@ bool FileManager::loadFileData(Document doc, const TCHAR * filename, Utf8_16_Rea
 				}
 				if (format == -1)
-					format = getEOLFormatForm(data);
+					format = getEOLFormatForm(data, lenFile);
 			}
 			else
 			{
@ -1323,14 +1322,15 @@ int FileManager::docLength(Buffer * buffer) const
 	return docLen;
 }
-int FileManager::getEOLFormatForm(const char *data) const
+int FileManager::getEOLFormatForm(const char* const data, size_t length) const
 {
-	size_t len = strlen(data);
+	assert(data != nullptr && "invalid buffer for getEOLFormatForm()");
-	for (size_t i = 0 ; i < len ; i++)
+
 	for (size_t i = 0; i != length; ++i)
 	{
 		if (data[i] == CR)
 		{
-			if (i+1 < len &&  data[i+1] == LF)
+			if (i+1 < length && data[i+1] == LF)
 			{
 				return int(WIN_FORMAT);
 			}
--- a/PowerEditor/src/ScitillaComponent/Buffer.h
+++ b/PowerEditor/src/ScitillaComponent/Buffer.h
@ -104,7 +104,7 @@ public:
 	void destroyInstance() { delete _pSelf; };
 	int getFileNameFromBuffer(BufferID id, TCHAR * fn2copy);
 	int docLength(Buffer * buffer) const;
-	int getEOLFormatForm(const char *data) const;
+	int getEOLFormatForm(const char* const data, size_t length) const;
 	size_t nextUntitledNewNumber() const;
 private:
@ -384,6 +384,9 @@ private :
 		if (_canNotify)
 			_pManager->beNotifiedOfBufferChange(this, mask); 
 	};
 	Buffer(const Buffer&) { assert(false);  }
 	Buffer& operator = (const Buffer&) { assert(false);  return *this; }
 };
 #endif //BUFFER_H
--- a/PowerEditor/src/Utf8_16.cpp
+++ b/PowerEditor/src/Utf8_16.cpp
@ -31,7 +31,8 @@ const Utf8_16::utf8 Utf8_16::k_Boms[][3] = {
 Utf8_16_Read::Utf8_16_Read() {
 	m_eEncoding		= uni8Bit;
-	m_nBufSize		= 0;
+	m_nAllocatedBufSize = 0;
 	m_nNewBufSize   = 0;
 	m_pNewBuf		= NULL;
 	m_bFirstRead	= true;
 }
@ -113,10 +114,9 @@ size_t Utf8_16_Read::convert(char* buf, size_t len)
 	// bugfix by Jens Lorenz
 	static	size_t nSkip = 0;
    size_t  ret = 0;
 	m_pBuf = (ubyte*)buf;
 	m_nLen = len;
 	m_nNewBufSize = 0;
 	if (m_bFirstRead == true)
    {
@ -131,16 +131,16 @@ size_t Utf8_16_Read::convert(char* buf, size_t len)
        case uni8Bit:
        case uniCookie: {
            // Do nothing, pass through
-            m_nBufSize = 0;
+			m_nAllocatedBufSize = 0;
            m_pNewBuf = m_pBuf;
-            ret = len;
+			m_nNewBufSize = len;
            break;
        }
        case uniUTF8: {
            // Pass through after BOM
-            m_nBufSize = 0;
+			m_nAllocatedBufSize = 0;
            m_pNewBuf = m_pBuf + nSkip;
-            ret = len - nSkip;
+			m_nNewBufSize = len - nSkip;
            break;
        }    
        case uni16BE_NoBOM:
@ -149,13 +149,13 @@ size_t Utf8_16_Read::convert(char* buf, size_t len)
        case uni16LE: {
            size_t newSize = len + len / 2 + 1;
-            if (m_nBufSize != newSize)
+			if (m_nAllocatedBufSize != newSize)
            {
 				if (m_pNewBuf)
 					delete [] m_pNewBuf;
                m_pNewBuf  = NULL;
                m_pNewBuf  = new ubyte[newSize];
-                m_nBufSize = newSize;
+				m_nAllocatedBufSize = newSize;
            }
            ubyte* pCur = m_pNewBuf;
@ -166,7 +166,7 @@ size_t Utf8_16_Read::convert(char* buf, size_t len)
            {
                *pCur++ = m_Iter16.get();
            }
-            ret = pCur - m_pNewBuf;
+			m_nNewBufSize = pCur - m_pNewBuf;
            break;
        }
        default:
@ -176,7 +176,7 @@ size_t Utf8_16_Read::convert(char* buf, size_t len)
 	// necessary for second calls and more
 	nSkip = 0;
-	return ret;
+	return m_nNewBufSize;
 }
--- a/PowerEditor/src/Utf8_16.h
+++ b/PowerEditor/src/Utf8_16.h
@ -112,7 +112,8 @@ public:
 	~Utf8_16_Read();
 	size_t convert(char* buf, size_t len);
-	char* getNewBuf() { return reinterpret_cast<char *>(m_pNewBuf); }
+	const char* getNewBuf() const { return (const char*) m_pNewBuf; }
 	size_t getNewSize() const { return m_nNewBufSize; }
 	UniMode getEncoding() const { return m_eEncoding; }
 	size_t calcCurPos(size_t pos);
@ -126,7 +127,10 @@ private:
 	UniMode    m_eEncoding;
 	ubyte*          m_pBuf;
 	ubyte*          m_pNewBuf;
-	size_t          m_nBufSize;
+	// size of the new buffer
 	size_t          m_nNewBufSize;
 	// size of the previously allocated buffer (if != 0)
 	size_t          m_nAllocatedBufSize;
 	size_t			m_nSkip;
 	bool            m_bFirstRead;
 	size_t          m_nLen;