From e65b4c37bfe64e48302424a81547dfb0c29bc362 Mon Sep 17 00:00:00 2001 From: nrecker Date: Sat, 14 Apr 2018 15:10:34 -0400 Subject: [PATCH] Fix Sort Lines as Integers issue: use Natural Sort algorithm This changes the line operations "Sort as Integers Ascending" and "Sort as Integers Descending" to sort by Natural Sort Order, in which consecutive numerals are considered as one character. This causes "2" < "10", just like in the old Integer sort, but also "foo 2" < "foo 10", which was not previously available functionality. In cases where every line is a single integer, Natural Sort functions exactly the same as Integer Sort; when every line begins with a single integer, it is a valid Integer Sort. Close #4413, fix #2025 --- PowerEditor/src/MISC/Common/Sorters.h | 119 ++++++++++++++++++++++++++ PowerEditor/src/NppCommands.cpp | 2 +- 2 files changed, 120 insertions(+), 1 deletion(-) diff --git a/PowerEditor/src/MISC/Common/Sorters.h b/PowerEditor/src/MISC/Common/Sorters.h index b0d335f6..7191c19c 100644 --- a/PowerEditor/src/MISC/Common/Sorters.h +++ b/PowerEditor/src/MISC/Common/Sorters.h @@ -114,6 +114,125 @@ public: } }; +// Treat consecutive numerals as one number +// Otherwise it is a lexicographic sort +class NaturalSorter : public ISorter +{ +public: + NaturalSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; + + std::vector sort(std::vector lines) override + { + // Note that both branches here are equivalent in the sense that they give always give the same answer. + // However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling + // getSortKey() so many times. + if (isSortingSpecificColumns()) + { + std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn) + { + generic_string a = getSortKey(aIn); + generic_string b = getSortKey(bIn); + + long long compareResult = 0; + size_t i = 0; + while (compareResult == 0) + { + if (i >= a.length() || i >= b.length()) + { + compareResult = a.compare(min(i, a.length()), generic_string::npos, b, min(i, b.length()), generic_string::npos); + break; + } + + bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9'; + bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9'; + + // One is number and one is string + if (aChunkIsNum != bChunkIsNum) + { + compareResult = a[i] - b[i]; + // No need to update i; compareResult != 0 + } + // Both are numbers + else if (aChunkIsNum) + { + size_t delta = 0; + compareResult = std::stoll(a.substr(i)) - std::stoll(b.substr(i), &delta); + i += delta; + } + // Both are strings + else + { + size_t aChunkEnd = a.find_first_of(L"1234567890", i); + size_t bChunkEnd = b.find_first_of(L"1234567890", i); + compareResult = a.compare(i, aChunkEnd - i, b, i, bChunkEnd - i); + i = aChunkEnd; + } + } + + if (isDescending()) + { + return compareResult > 0; + } + else + { + return compareResult < 0; + } + }); + } + else + { + std::sort(lines.begin(), lines.end(), [this](generic_string a, generic_string b) + { + long long compareResult = 0; + size_t i = 0; + while (compareResult == 0) + { + if (i >= a.length() || i >= b.length()) + { + compareResult = a.compare(min(i,a.length()), generic_string::npos, b, min(i,b.length()), generic_string::npos); + break; + } + + bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9'; + bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9'; + + // One is number and one is string + if (aChunkIsNum != bChunkIsNum) + { + compareResult = a[i] - b[i]; + // No need to update i; compareResult != 0 + } + // Both are numbers + else if (aChunkIsNum) + { + size_t delta = 0; + compareResult = std::stoll(a.substr(i)) - std::stoll(b.substr(i), &delta); + i += delta; + } + // Both are strings + else + { + size_t aChunkEnd = a.find_first_of(L"1234567890", i); + size_t bChunkEnd = b.find_first_of(L"1234567890", i); + compareResult = a.compare(i, aChunkEnd-i, b, i, bChunkEnd-i); + i = aChunkEnd; + } + } + + if (isDescending()) + { + return compareResult > 0; + } + else + { + return compareResult < 0; + } + }); + } + return lines; + } +}; + // Convert each line to a number and then sort. // The conversion must be implemented in classes which inherit from this, see prepareStringForConversion and convertStringToNumber. template diff --git a/PowerEditor/src/NppCommands.cpp b/PowerEditor/src/NppCommands.cpp index 292107d7..5956fe85 100644 --- a/PowerEditor/src/NppCommands.cpp +++ b/PowerEditor/src/NppCommands.cpp @@ -612,7 +612,7 @@ void Notepad_plus::command(int id) } else if (id == IDM_EDIT_SORTLINES_INTEGER_DESCENDING || id == IDM_EDIT_SORTLINES_INTEGER_ASCENDING) { - pSorter = std::unique_ptr(new IntegerSorter(isDescending, fromColumn, toColumn)); + pSorter = std::unique_ptr(new NaturalSorter(isDescending, fromColumn, toColumn)); } else if (id == IDM_EDIT_SORTLINES_DECIMALCOMMA_DESCENDING || id == IDM_EDIT_SORTLINES_DECIMALCOMMA_ASCENDING) {