diff --git a/PowerEditor/src/MISC/Common/Sorters.h b/PowerEditor/src/MISC/Common/Sorters.h index b0d335f6..7191c19c 100644 --- a/PowerEditor/src/MISC/Common/Sorters.h +++ b/PowerEditor/src/MISC/Common/Sorters.h @@ -114,6 +114,125 @@ public: } }; +// Treat consecutive numerals as one number +// Otherwise it is a lexicographic sort +class NaturalSorter : public ISorter +{ +public: + NaturalSorter(bool isDescending, size_t fromColumn, size_t toColumn) : ISorter(isDescending, fromColumn, toColumn) { }; + + std::vector sort(std::vector lines) override + { + // Note that both branches here are equivalent in the sense that they give always give the same answer. + // However, if we are *not* sorting specific columns, then we get a 40% speed improvement by not calling + // getSortKey() so many times. + if (isSortingSpecificColumns()) + { + std::sort(lines.begin(), lines.end(), [this](generic_string aIn, generic_string bIn) + { + generic_string a = getSortKey(aIn); + generic_string b = getSortKey(bIn); + + long long compareResult = 0; + size_t i = 0; + while (compareResult == 0) + { + if (i >= a.length() || i >= b.length()) + { + compareResult = a.compare(min(i, a.length()), generic_string::npos, b, min(i, b.length()), generic_string::npos); + break; + } + + bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9'; + bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9'; + + // One is number and one is string + if (aChunkIsNum != bChunkIsNum) + { + compareResult = a[i] - b[i]; + // No need to update i; compareResult != 0 + } + // Both are numbers + else if (aChunkIsNum) + { + size_t delta = 0; + compareResult = std::stoll(a.substr(i)) - std::stoll(b.substr(i), &delta); + i += delta; + } + // Both are strings + else + { + size_t aChunkEnd = a.find_first_of(L"1234567890", i); + size_t bChunkEnd = b.find_first_of(L"1234567890", i); + compareResult = a.compare(i, aChunkEnd - i, b, i, bChunkEnd - i); + i = aChunkEnd; + } + } + + if (isDescending()) + { + return compareResult > 0; + } + else + { + return compareResult < 0; + } + }); + } + else + { + std::sort(lines.begin(), lines.end(), [this](generic_string a, generic_string b) + { + long long compareResult = 0; + size_t i = 0; + while (compareResult == 0) + { + if (i >= a.length() || i >= b.length()) + { + compareResult = a.compare(min(i,a.length()), generic_string::npos, b, min(i,b.length()), generic_string::npos); + break; + } + + bool aChunkIsNum = a[i] >= L'0' && a[i] <= L'9'; + bool bChunkIsNum = b[i] >= L'0' && b[i] <= L'9'; + + // One is number and one is string + if (aChunkIsNum != bChunkIsNum) + { + compareResult = a[i] - b[i]; + // No need to update i; compareResult != 0 + } + // Both are numbers + else if (aChunkIsNum) + { + size_t delta = 0; + compareResult = std::stoll(a.substr(i)) - std::stoll(b.substr(i), &delta); + i += delta; + } + // Both are strings + else + { + size_t aChunkEnd = a.find_first_of(L"1234567890", i); + size_t bChunkEnd = b.find_first_of(L"1234567890", i); + compareResult = a.compare(i, aChunkEnd-i, b, i, bChunkEnd-i); + i = aChunkEnd; + } + } + + if (isDescending()) + { + return compareResult > 0; + } + else + { + return compareResult < 0; + } + }); + } + return lines; + } +}; + // Convert each line to a number and then sort. // The conversion must be implemented in classes which inherit from this, see prepareStringForConversion and convertStringToNumber. template diff --git a/PowerEditor/src/NppCommands.cpp b/PowerEditor/src/NppCommands.cpp index 292107d7..5956fe85 100644 --- a/PowerEditor/src/NppCommands.cpp +++ b/PowerEditor/src/NppCommands.cpp @@ -612,7 +612,7 @@ void Notepad_plus::command(int id) } else if (id == IDM_EDIT_SORTLINES_INTEGER_DESCENDING || id == IDM_EDIT_SORTLINES_INTEGER_ASCENDING) { - pSorter = std::unique_ptr(new IntegerSorter(isDescending, fromColumn, toColumn)); + pSorter = std::unique_ptr(new NaturalSorter(isDescending, fromColumn, toColumn)); } else if (id == IDM_EDIT_SORTLINES_DECIMALCOMMA_DESCENDING || id == IDM_EDIT_SORTLINES_DECIMALCOMMA_ASCENDING) {