Improve URL parser: fix apostrophe in an URL issue

Improve also test tool.

Fix #9031, close #9090
This commit is contained in:
Udo Hoffmann 2020-11-01 03:28:18 +01:00 committed by Don HO
parent 47419910f6
commit 2aac88e3b1
No known key found for this signature in database
GPG Key ID: 6C429F1D8D84F46E
7 changed files with 454 additions and 216 deletions

View File

@ -3,37 +3,34 @@ local testFiles = {"verifyUrlDetection_1a",
local URL_INDIC = 8
local timerInterval = 10
local function verifyUrlDetection()
local curPos = 0
local task = -1
local uFrom = 0
local uTo = 0
local mFrom = 0
local mTo = 0
local OKorKO = "OK"
local nFile = 1
local testResults = {}
local outFile = nil
local curPos = 0
local task = -1
local uFrom = 0
local uTo = 0
local mFrom = 0
local mTo = 0
local OKorKO = "OK"
local nFile = 1
local testResults = {}
local outFile = nil
local function Summary()
local function Summary()
local resLine = ""
local i = 1
while testFiles[i] ~= nil do
if testResults[i] == nil then
testResults[i] = 'KO'
testResults[i] = "KO"
end
print(testFiles[i] .. ": " .. testResults[i])
i = i + 1
end
print(resLine)
if endNppAfterUrlTest ~= nil then
print("good bye")
npp:MenuCommand(IDM_FILE_EXIT)
end
end
end
local function nextFile()
local function nextFile()
local fileAvail = false
if outFile ~= nil then
io.close(outFile)
@ -67,9 +64,9 @@ local function verifyUrlDetection()
end
end
return fileAvail
end
end
local function scrollToNextURL()
local function scrollToNextURL()
editor.TargetStart = curPos
editor.TargetEnd = editor.Length
editor.SearchFlags = SCFIND_REGEXP
@ -102,9 +99,9 @@ local function verifyUrlDetection()
else
return 0
end
end
end
local function verifyURL()
local function verifyURL()
local mMsk = editor:textrange(mFrom, mTo)
editor:GotoPos(uFrom + 2)
local uMsk = "m "
@ -129,6 +126,9 @@ local function verifyUrlDetection()
outFile:write("KO", "\t", editor:textrange(uFrom, uTo), "\n")
outFile:write("ok", "\t", mMsk, "\n")
outFile:write("ko", "\t", uMsk, "\n")
print("KO", "\t", editor:textrange(uFrom, uTo))
print("ok", "\t", mMsk)
print("ko", "\t", uMsk)
OKorKO = "KO"
Res = 1
end
@ -138,9 +138,9 @@ local function verifyUrlDetection()
OKorKO = "KO"
end
return Res
end
end
local function goForward(timer)
local function goForward(timer)
if task < 0 then
task = task + 1
if task == 0 then
@ -183,15 +183,12 @@ local function verifyUrlDetection()
end
else
npp.stopTimer(timer)
print("KO---", "Internal impossibility")
print("KO", "Internal impossibility")
print()
Summary()
end
end
npp.StartTimer(timerInterval, goForward)
end
npp.ClearConsole()
verifyUrlDetection()
npp.StartTimer(timerInterval, goForward)

View File

@ -1,26 +1,30 @@
try {
if (Test-Path -Path '..\..\Bin\plugins' -PathType Container)
$binDir = '..\..\Bin'
$pluginsDir = $binDir + '\plugins'
$pluginsSaveDir = $binDir + '\plugins_save'
if (Test-Path -Path $pluginsDir -PathType Container)
{
if (Test-Path -Path '..\..\Bin\plugins_save' -PathType Container)
if (Test-Path -Path $pluginsSaveDir -PathType Container)
{
"Backup for plugins directory already exists"
exit -1
}
"Backing up plugin directory ..."
Move-Item ..\..\Bin\plugins ..\..\bin\plugins_save
Move-Item $pluginsDir $pluginsSaveDir
}
"Installing Lua plugin for testing ..."
Copy-Item -Path .\plugins -Destination ..\..\bin -Recurse
Copy-Item -Path .\plugins -Destination $binDir -Recurse
"Testing ..."
..\..\bin\notepad++.exe | Out-Null
Invoke-Expression ($binDir + "\notepad++.exe | Out-Null")
if (Test-Path -Path '..\..\Bin\plugins_save' -PathType Container)
if (Test-Path -Path $pluginsSaveDir -PathType Container)
{
"Removing Lua plugin ..."
Remove-Item -Path ..\..\Bin\plugins -Recurse -Force
Remove-Item -Path $pluginsDir -Recurse -Force
"Restoring plugin directory ..."
Move-Item ..\..\Bin\plugins_save ..\..\bin\plugins
Move-Item $pluginsSaveDir $pluginsDir
}
$expectedRes = Get-Content .\verifyUrlDetection_1a.expected.result

View File

@ -169,6 +169,9 @@ Unwanted trailing character removal:
u (https://github.com/notepad-plus-plus/notepad-plus-plus) u
m 01111111111111111111111111111111111111111111111111111110 m
u [https://github.com/notepad-plus-plus/notepad-plus-plus] u
m 01111111111111111111111111111111111111111111111111111110 m
u https://github.com/notepad-plus-plus/notepad-plus-plus; u
m 1111111111111111111111111111111111111111111111111111110 m
@ -193,6 +196,9 @@ m 01111111111111111111111111111111111111111111110 m
u (https://en.wikipedia.org/wiki/Saw_2003_film) u
m 011111111111111111111111111111111111111111110 m
u [https://en.wikipedia.org/wiki/Saw_[2003_film]] u
m 01111111111111111111111111111111111111111111110 m
International characters:
@ -238,7 +244,224 @@ m 0000001111111111111111111100000000000 m
u \href{https://ig.com/?query=c761&vars={"id":"0815","first":100}}{click me} u
m 00000011111111111111111111111111111111111111111111111111111111100000000000 m
========
Quotation mark
- forbidden in name and path (delimiter)
- parsed in query part as quoting character,
overriding all other quoting characters
u http://xxx.xxx/xxx"xxx" u
m 11111111111111111100000 m
u http://xxx.xxx/?q="A"+"B"" u
m 11111111111111111111111110 m
u http://xxx.xxx/?q="A'+'B{}`'"" u
m 111111111111111111111111111110 m
========
Apostrophe
- allowed unrestricted in name and path
- parsed in query part as quoting character,
overriding all other quoting characters
u https://en.wikipedia.org/wiki/Murphy's_law u
m 111111111111111111111111111111111111111111 m
u http://xxx.xxx/xxx'xxx' u
m 11111111111111111111111 m
u http://xxx.xxx/xxx'xxx'' u
m 111111111111111111111111 m
u http://xxx.xxx/?q='A'+'B'' u
m 11111111111111111111111110 m
u http://xxx.xxx/?q='A'+'B'' u
m 11111111111111111111111110 m
u http://xxx.xxx/?q='A'+'B"{}`'' u
m 111111111111111111111111111110 m
========
Grave accent
- allowed unrestricted in name and path
- parsed in query part as quoting character,
overriding all other quoting characters
u http://xxx.xxx/Tom`s_sisters u
m 1111111111111111111111111111 m
u http://xxx.xxx/Tom`s%20sisters` u
m 1111111111111111111111111111111 m
u http://xxx.xxx/Tom`s%20sisters`` u
m 11111111111111111111111111111111 m
u http://xxx.xxx/?q=`A`+`B` u
m 1111111111111111111111111 m
u http://xxx.xxx/?q=`A`+`B`` u
m 11111111111111111111111110 m
u http://xxx.xxx/?q=`A"{}()'`` u
m 1111111111111111111111111110 m
========
Parentheses
- allowed in name and path
- closing parenthesis at end of path is removed,
if there are no other parentheses in path,
except pairing parentheses
- parsed in query part as quoting character,
overriding all other quoting characters
- no other parentheses in path, remove last closing parenthesis
u http://xxx.xxx/xxx) u
m 1111111111111111110 m
- pairing parentheses in path: remove last closing unpaired parenthesis
u http://xxx.xxx/xxx(xxx)) u
m 111111111111111111111110 m
- pairing parentheses in path: remove last closing unpaired parenthesis
u http://xxx.xxx/xxx((xxx))) u
m 11111111111111111111111110 m
- pairing parenthesis in path: keep last closing paired parenthesis
u http://xxx.xxx/xxx(xxx) u
m 11111111111111111111111 m
- pairing parentheses in path: remove last closing unpaired parenthesis
u http://xxx.xxx/xxx()xxx) u
m 111111111111111111111110 m
- arbitrary parentheses in path: keep last closing parenthesis
u http://xxx.xxx/xxx)) u
m 11111111111111111111 m
- arbitrary parentheses in path: keep last closing parenthesis
u http://xxx.xxx/xxx)(xxx) u
m 111111111111111111111111 m
- arbitrary parentheses in path: keep last closing parenthesis
u http://xxx.xxx/xxx)(xxx)) u
m 1111111111111111111111111 m
- arbitrary parentheses in path: keep last closing parenthesis
u http://xxx.xxx/xxx((xxx) u
m 111111111111111111111111 m
- arbitrary parentheses in path: keep last closing parenthesis
u http://xxx.xxx/xxx)((xxx) u
m 1111111111111111111111111 m
- parentheses in query part: end after last closing quote of query part
u http://xxx.xxx/xxx?q=(xxx)) u
m 111111111111111111111111110 m
- parentheses in query part: end after last closing quote of query part
u http://xxx.xxx/xxx?q=(xxx)( u
m 111111111111111111111111110 m
- parentheses in query part: end after last closing quote of query part
u http://xxx.xxx/xxx?q=(xxx)( u
m 111111111111111111111111110 m
- parentheses in query part: end after last closing quote of query part
u http://xxx.xxx/xxx?q=(xxx)&(xxx)( u
m 111111111111111111111111111111110 m
========
Square brackets
- allowed in name and path
- closing square bracket at end of path is removed,
if there are no other square brackets in path,
except pairing square brackets
- parsed in query part as quoting characters,
overriding all other quoting characters
- no other square brackets in path, remove last closing square bracket
u http://xxx.xxx/xxx] u
m 1111111111111111110 m
- pairing square brackets in path: remove last closing unpaired square bracket
u http://xxx.xxx/xxx[xxx]] u
m 111111111111111111111110 m
- pairing square brackets in path: remove last closing unpaired square bracket
u http://xxx.xxx/xxx[[xxx]]] u
m 11111111111111111111111110 m
- pairing square brackets in path: keep last closing paired square bracket
u http://xxx.xxx/xxx[xxx] u
m 11111111111111111111111 m
- pairing square brackets in path: remove last closing unpaired square bracket
u http://xxx.xxx/xxx[]xxx] u
m 111111111111111111111110 m
- arbitrary square brackets in path: keep last closing square bracket
u http://xxx.xxx/xxx]] u
m 11111111111111111111 m
- arbitrary square brackets in path: keep last closing square bracket
u http://xxx.xxx/xxx][xxx] u
m 111111111111111111111111 m
- arbitrary square brackets in path: keep last closing square bracket
u http://xxx.xxx/xxx][xxx]] u
m 1111111111111111111111111 m
- arbitrary square brackets in path: keep last closing square bracket
u http://xxx.xxx/xxx[[xxx] u
m 111111111111111111111111 m
- arbitrary square brackets in path: keep last closing square bracket
u http://xxx.xxx/xxx][[xxx] u
m 1111111111111111111111111 m
- square brackets in query part: end after last closing quote of query part
u http://xxx.xxx/xxx?q=[xxx]] u
m 111111111111111111111111110 m
- square brackets in query part: end after last closing quote of query part
u http://xxx.xxx/xxx?q=[xxx][ u
m 111111111111111111111111110 m
- square brackets in query part: end after last closing quote of query part
u )http://xxx.xxx/xxx?q=[xxx][ u
m 0111111111111111111111111110 m
- square brackets in query part: end after last closing quote of query part
u http://xxx.xxx/xxx?q=[xxx]&[xxx][ u
m 111111111111111111111111111111110 m
========
Curly brackets
- forbidden in name and path, because of LaTeX
- parsed in query part as quoting characters,
overriding all other quoting characters
u http://xxx.xxx/xxx{xxx}} u
m 111111111111111111000000 m
u http://xxx.xxx/xxx{xxx} u
m 11111111111111111100000 m
u http://xxx.xxx/xxx?q={xxx}} u
m 111111111111111111111111110 m
u http://xxx.xxx/xxx?q={xxx};{"[]()''`}} u
m 11111111111111111111111111111111111110 m
========
Mail:

View File

@ -49,6 +49,7 @@ OK u "http://github.com/notepad-plus-plus/notepad-plus-plus" u
OK u "https://github.com /notepad-plus-plus/notepad-plus-plus" u
OK u "https://github.com/notepad plus plus/notepad-plus-plus" u
OK u (https://github.com/notepad-plus-plus/notepad-plus-plus) u
OK u [https://github.com/notepad-plus-plus/notepad-plus-plus] u
OK u https://github.com/notepad-plus-plus/notepad-plus-plus; u
OK u https://github.com/notepad-plus-plus/notepad-plus-plus? u
OK u https://github.com/notepad-plus-plus/notepad-plus-plus! u
@ -57,6 +58,7 @@ OK u http://github.com/notepad-plus-plus/notepad-plus-plus#fragment u
OK u (e.g., https://en.wikipedia.org/wiki/Saw_(2003_film))? u
OK u (https://en.wikipedia.org/wiki/Saw_(2003_film)) u
OK u (https://en.wikipedia.org/wiki/Saw_2003_film) u
OK u [https://en.wikipedia.org/wiki/Saw_[2003_film]] u
OK u https://apache-windows.ru/как-установить-сервер-apache-c-php-mysql-и-phpmyadmin-на-windows/ u
OK u https://www.rnids.rs/национални-домени/регистрација-националних-домена u
OK u https://www.morfix.co.il/שלום u
@ -68,6 +70,51 @@ OK u [https://ig.com/?query=c761&vars={"id":"0815","first":100}] u
OK u "https://ig.com/?query=c761&vars={"id":"0815","first":100}" u
OK u \href{https://weblink.com/}{click me} u
OK u \href{https://ig.com/?query=c761&vars={"id":"0815","first":100}}{click me} u
OK u http://xxx.xxx/xxx"xxx" u
OK u http://xxx.xxx/?q="A"+"B"" u
OK u http://xxx.xxx/?q="A'+'B{}`'"" u
OK u https://en.wikipedia.org/wiki/Murphy's_law u
OK u http://xxx.xxx/xxx'xxx' u
OK u http://xxx.xxx/xxx'xxx'' u
OK u http://xxx.xxx/?q='A'+'B'' u
OK u http://xxx.xxx/?q='A'+'B'' u
OK u http://xxx.xxx/?q='A'+'B"{}`'' u
OK u http://xxx.xxx/Tom`s_sisters u
OK u http://xxx.xxx/Tom`s%20sisters` u
OK u http://xxx.xxx/Tom`s%20sisters`` u
OK u http://xxx.xxx/?q=`A`+`B` u
OK u http://xxx.xxx/?q=`A`+`B`` u
OK u http://xxx.xxx/?q=`A"{}()'`` u
OK u http://xxx.xxx/xxx) u
OK u http://xxx.xxx/xxx(xxx)) u
OK u http://xxx.xxx/xxx(xxx) u
OK u http://xxx.xxx/xxx()xxx) u
OK u http://xxx.xxx/xxx)) u
OK u http://xxx.xxx/xxx)(xxx) u
OK u http://xxx.xxx/xxx)(xxx)) u
OK u http://xxx.xxx/xxx((xxx) u
OK u http://xxx.xxx/xxx)((xxx) u
OK u http://xxx.xxx/xxx?q=(xxx)) u
OK u http://xxx.xxx/xxx?q=(xxx)( u
OK u http://xxx.xxx/xxx?q=(xxx)( u
OK u http://xxx.xxx/xxx?q=(xxx)&(xxx)( u
OK u http://xxx.xxx/xxx] u
OK u http://xxx.xxx/xxx[xxx]] u
OK u http://xxx.xxx/xxx[xxx] u
OK u http://xxx.xxx/xxx[]xxx] u
OK u http://xxx.xxx/xxx]] u
OK u http://xxx.xxx/xxx][xxx] u
OK u http://xxx.xxx/xxx][xxx]] u
OK u http://xxx.xxx/xxx[[xxx] u
OK u http://xxx.xxx/xxx][[xxx] u
OK u http://xxx.xxx/xxx?q=[xxx]] u
OK u http://xxx.xxx/xxx?q=[xxx][ u
OK u )http://xxx.xxx/xxx?q=[xxx][ u
OK u http://xxx.xxx/xxx?q=[xxx]&[xxx][ u
OK u http://xxx.xxx/xxx{xxx}} u
OK u http://xxx.xxx/xxx{xxx} u
OK u http://xxx.xxx/xxx?q={xxx}} u
OK u http://xxx.xxx/xxx?q={xxx};{"[]()''`}} u
OK u mailto:don.h@free.fr u
OK u <don.h@free.fr> u
OK u <mailto:don.h@free.fr> u

View File

@ -8,18 +8,3 @@ m 0000000000000000 m
u домhttp://test.com u
m 000000000000000000 m
Apostrophes:
u https://en.wikipedia.org/wiki/Murphy's_law u
m 111111111111111111111111111111111111111111 m
u http://xxx.xxx/Tom's%20sisters'%20careers u
m 11111111111111111111111111111111111111111 m
u http://xxx.xxx/Tom's%20sisters' u
m 1111111111111111111111111111111 m
u http://xxx.xxx/Tom's%20sisters'' u
m 11111111111111111111111111111111 m

View File

@ -4,15 +4,3 @@ ko m 0111111111111111 m
KO u домhttp://test.com u
ok m 000000000000000000 m
ko m 000111111111111111 m
KO u https://en.wikipedia.org/wiki/Murphy's_law u
ok m 111111111111111111111111111111111111111111 m
ko m 111111111111111111111111111111111111000000 m
KO u http://xxx.xxx/Tom's%20sisters'%20careers u
ok m 11111111111111111111111111111111111111111 m
ko m 11111111111111111100000000000000000000000 m
KO u http://xxx.xxx/Tom's%20sisters' u
ok m 1111111111111111111111111111111 m
ko m 1111111111111111110000000000000 m
KO u http://xxx.xxx/Tom's%20sisters'' u
ok m 11111111111111111111111111111111 m
ko m 11111111111111111100000000000000 m

View File

@ -2600,7 +2600,6 @@ bool isUrlTextChar(TCHAR const c)
{
case '"':
case '#':
case '\'':
case '<':
case '>':
case '{':
@ -2731,11 +2730,21 @@ void scanToUrlEnd(TCHAR *text, int textLen, int start, int* distance)
break;
case sQueryAfterDelimiter:
if ((text [p] == '\'') || (text [p] == '"'))
if ((text [p] == '\'') || (text [p] == '"') || (text [p] == '`'))
{
q = text [p];
s = sQueryQuotes;
}
else if (text [p] == '(')
{
q = ')';
s = sQueryQuotes;
}
else if (text [p] == '[')
{
q = ']';
s = sQueryQuotes;
}
else if (text [p] == '{')
{
q = '}';
@ -2799,43 +2808,28 @@ bool removeUnwantedTrailingCharFromUrl (TCHAR const *text, int* length)
}
}
{ // remove unwanted closing parenthesis
const TCHAR *closingParenthesis = L")]>";
const TCHAR *openingParenthesis = L"([<";
const TCHAR *closingParenthesis = L")]";
const TCHAR *openingParenthesis = L"([";
for (int i = 0; closingParenthesis [i]; i++)
if (text [l] == closingParenthesis [i])
{
int count = 1;
int count = 0;
for (int j = l - 1; j >= 0; j--)
{
if (text [j] == closingParenthesis [i])
count++;
if (text [j] == openingParenthesis [i])
if (count > 0)
count--;
else
return false;
}
if (count == 0)
if (count != 0)
return false;
*length = l;
return true;
}
}
{ // remove unwanted quotes
const TCHAR *quotes = L"\"'`";
for (int i = 0; quotes [i]; i++)
{
if (text [l] == quotes [i])
{
int count = 0;
for (int j = l - 1; j >= 0; j--)
if (text [j] == quotes [i])
count++;
if (count & 1)
return false;
*length = l;
return true;
}
}
}
return false;
}