Fix Sort Lines as Integers issue: use Natural Sort algorithm
This changes the line operations "Sort as Integers Ascending" and "Sort as Integers Descending" to sort by Natural Sort Order, in which consecutive numerals are considered as one character. This causes "2" < "10", just like in the old Integer sort, but also "foo 2" < "foo 10", which was not previously available functionality. In cases where every line is a single integer, Natural Sort functions exactly the same as Integer Sort; when every line begins with a single integer, it is a valid Integer Sort. Close #4413, fix #2025
This commit is contained in:
parent
2e4b01cd3d
commit
e65b4c37bf
@ -114,6 +114,125 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// Treat consecutive numerals as one number
|
||||
// Otherwise it is a lexicographic sort
|
||||
class NaturalSorter : public ISorter
|
||||
{
|
||||
public:
|
||||
NaturalSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { };
|
||||
|
||||
std::vector<generic_string> sort(std::vector<generic_string> lines) override
|
||||
{
|
||||
// Note that both branches here are equivalent in the sense that they give always give the same answer.
|
||||
// However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling
|
||||
// getSortKey() so many times.
|
||||
if (isSortingSpecificColumns())
|
||||
{
|
||||
std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn)
|
||||
{
|
||||
generic_string a = getSortKey(aIn);
|
||||
generic_string b = getSortKey(bIn);
|
||||
|
||||
long long compareResult = 0;
|
||||
size_t i = 0;
|
||||
while (compareResult == 0)
|
||||
{
|
||||
if (i >= a.length() || i >= b.length())
|
||||
{
|
||||
compareResult = a.compare(min(i, a.length()), generic_string::npos, b, min(i, b.length()), generic_string::npos);
|
||||
break;
|
||||
}
|
||||
|
||||
bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9';
|
||||
bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9';
|
||||
|
||||
// One is number and one is string
|
||||
if (aChunkIsNum != bChunkIsNum)
|
||||
{
|
||||
compareResult = a[i] - b[i];
|
||||
// No need to update i; compareResult != 0
|
||||
}
|
||||
// Both are numbers
|
||||
else if (aChunkIsNum)
|
||||
{
|
||||
size_t delta = 0;
|
||||
compareResult = std::stoll(a.substr(i)) - std::stoll(b.substr(i), &delta);
|
||||
i += delta;
|
||||
}
|
||||
// Both are strings
|
||||
else
|
||||
{
|
||||
size_t aChunkEnd = a.find_first_of(L"1234567890", i);
|
||||
size_t bChunkEnd = b.find_first_of(L"1234567890", i);
|
||||
compareResult = a.compare(i, aChunkEnd - i, b, i, bChunkEnd - i);
|
||||
i = aChunkEnd;
|
||||
}
|
||||
}
|
||||
|
||||
if (isDescending())
|
||||
{
|
||||
return compareResult > 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return compareResult < 0;
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
std::sort(lines.begin(), lines.end(), [this](generic_string a, generic_string b)
|
||||
{
|
||||
long long compareResult = 0;
|
||||
size_t i = 0;
|
||||
while (compareResult == 0)
|
||||
{
|
||||
if (i >= a.length() || i >= b.length())
|
||||
{
|
||||
compareResult = a.compare(min(i,a.length()), generic_string::npos, b, min(i,b.length()), generic_string::npos);
|
||||
break;
|
||||
}
|
||||
|
||||
bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9';
|
||||
bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9';
|
||||
|
||||
// One is number and one is string
|
||||
if (aChunkIsNum != bChunkIsNum)
|
||||
{
|
||||
compareResult = a[i] - b[i];
|
||||
// No need to update i; compareResult != 0
|
||||
}
|
||||
// Both are numbers
|
||||
else if (aChunkIsNum)
|
||||
{
|
||||
size_t delta = 0;
|
||||
compareResult = std::stoll(a.substr(i)) - std::stoll(b.substr(i), &delta);
|
||||
i += delta;
|
||||
}
|
||||
// Both are strings
|
||||
else
|
||||
{
|
||||
size_t aChunkEnd = a.find_first_of(L"1234567890", i);
|
||||
size_t bChunkEnd = b.find_first_of(L"1234567890", i);
|
||||
compareResult = a.compare(i, aChunkEnd-i, b, i, bChunkEnd-i);
|
||||
i = aChunkEnd;
|
||||
}
|
||||
}
|
||||
|
||||
if (isDescending())
|
||||
{
|
||||
return compareResult > 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return compareResult < 0;
|
||||
}
|
||||
});
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
};
|
||||
|
||||
// Convert each line to a number and then sort.
|
||||
// The conversion must be implemented in classes which inherit from this, see prepareStringForConversion and convertStringToNumber.
|
||||
template<typename T_Num>
|
||||
|
@ -612,7 +612,7 @@ void Notepad_plus::command(int id)
|
||||
}
|
||||
else if (id == IDM_EDIT_SORTLINES_INTEGER_DESCENDING || id == IDM_EDIT_SORTLINES_INTEGER_ASCENDING)
|
||||
{
|
||||
pSorter = std::unique_ptr<ISorter>(new IntegerSorter(isDescending, fromColumn, toColumn));
|
||||
pSorter = std::unique_ptr<ISorter>(new NaturalSorter(isDescending, fromColumn, toColumn));
|
||||
}
|
||||
else if (id == IDM_EDIT_SORTLINES_DECIMALCOMMA_DESCENDING || id == IDM_EDIT_SORTLINES_DECIMALCOMMA_ASCENDING)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user