Fix Sort Lines as Integers issue: use Natural Sort algorithm

This changes the line operations "Sort as Integers Ascending" and "Sort as Integers Descending" to sort by Natural Sort Order, in which consecutive numerals are considered as one character. This causes "2" < "10", just like in the old Integer sort, but also "foo 2" < "foo 10", which was not previously available functionality. In cases where every line is a single integer, Natural Sort functions exactly the same as Integer Sort; when every line begins with a single integer, it is a valid Integer Sort.

Close #4413, fix #2025
This commit is contained in:
nrecker 2018-04-14 15:10:34 -04:00 committed by Don HO
parent 2e4b01cd3d
commit e65b4c37bf
2 changed files with 120 additions and 1 deletions

View File

@ -114,6 +114,125 @@ public:
}
};
// Treat consecutive numerals as one number
// Otherwise it is a lexicographic sort
class NaturalSorter : public ISorter
{
public:
NaturalSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
std::vector<generic_string> sort(std::vector<generic_string> lines) override
{
// Note that both branches here are equivalent in the sense that they give always give the same answer.
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
// getSortKey() so many times.
if (isSortingSpecificColumns())
{
std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
{
generic_string a = getSortKey(aIn);
generic_string b = getSortKey(bIn);
long long compareResult = 0;
size_t i = 0;
while (compareResult == 0)
{
if (i >= a.length() || i >= b.length())
{
compareResult = a.compare(min(i, a.length()), generic_string::npos, b, min(i, b.length()), generic_string::npos);
break;
}
bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9';
bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9';
// One is number and one is string
if (aChunkIsNum != bChunkIsNum)
{
compareResult = a[i] - b[i];
// No need to update i; compareResult != 0
}
// Both are numbers
else if (aChunkIsNum)
{
size_t delta = 0;
compareResult = std::stoll(a.substr(i)) - std::stoll(b.substr(i), &delta);
i += delta;
}
// Both are strings
else
{
size_t aChunkEnd = a.find_first_of(L"1234567890", i);
size_t bChunkEnd = b.find_first_of(L"1234567890", i);
compareResult = a.compare(i, aChunkEnd - i, b, i, bChunkEnd - i);
i = aChunkEnd;
}
}
if (isDescending())
{
return compareResult > 0;
}
else
{
return compareResult < 0;
}
});
}
else
{
std::sort(lines.begin(), lines.end(), [this](generic_string a, generic_string b)
{
long long compareResult = 0;
size_t i = 0;
while (compareResult == 0)
{
if (i >= a.length() || i >= b.length())
{
compareResult = a.compare(min(i,a.length()), generic_string::npos, b, min(i,b.length()), generic_string::npos);
break;
}
bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9';
bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9';
// One is number and one is string
if (aChunkIsNum != bChunkIsNum)
{
compareResult = a[i] - b[i];
// No need to update i; compareResult != 0
}
// Both are numbers
else if (aChunkIsNum)
{
size_t delta = 0;
compareResult = std::stoll(a.substr(i)) - std::stoll(b.substr(i), &delta);
i += delta;
}
// Both are strings
else
{
size_t aChunkEnd = a.find_first_of(L"1234567890", i);
size_t bChunkEnd = b.find_first_of(L"1234567890", i);
compareResult = a.compare(i, aChunkEnd-i, b, i, bChunkEnd-i);
i = aChunkEnd;
}
}
if (isDescending())
{
return compareResult > 0;
}
else
{
return compareResult < 0;
}
});
}
return lines;
}
};
// Convert each line to a number and then sort.
// The conversion must be implemented in classes which inherit from this, see prepareStringForConversion and convertStringToNumber.
template<typename T_Num>

View File

@ -612,7 +612,7 @@ void Notepad_plus::command(int id)
}
else if (id == IDM_EDIT_SORTLINES_INTEGER_DESCENDING || id == IDM_EDIT_SORTLINES_INTEGER_ASCENDING)
{
pSorter = std::unique_ptr<ISorter>(new IntegerSorter(isDescending, fromColumn, toColumn));
pSorter = std::unique_ptr<ISorter>(new NaturalSorter(isDescending, fromColumn, toColumn));
}
else if (id == IDM_EDIT_SORTLINES_DECIMALCOMMA_DESCENDING || id == IDM_EDIT_SORTLINES_DECIMALCOMMA_ASCENDING)
{