[Base] Don't use reverse iterator for UTF-8.

[Base] Don't use reverse iterator for UTF-8. Fixes #1760.
This commit is contained in:
gibbed 2021-05-01 15:12:21 -05:00 committed by Rick Gibbed
parent b5fc1efad5
commit dbd980150b

View file

@ -19,9 +19,7 @@
namespace utfcpp = utf8; namespace utfcpp = utf8;
using citer = std::string_view::const_iterator; using citer = std::string_view::const_iterator;
using criter = std::string_view::const_reverse_iterator;
using utf8_citer = utfcpp::iterator<std::string_view::const_iterator>; using utf8_citer = utfcpp::iterator<std::string_view::const_iterator>;
using utf8_criter = utfcpp::iterator<std::string_view::const_reverse_iterator>;
namespace xe::utf8 { namespace xe::utf8 {
@ -54,25 +52,10 @@ std::pair<utf8_citer, utf8_citer> make_citer(const utf8_citer begin,
utf8_citer(end.base(), begin.base(), end.base())}; utf8_citer(end.base(), begin.base(), end.base())};
} }
std::pair<utf8_criter, utf8_criter> make_criter(const std::string_view view) {
return {utf8_criter(view.crbegin(), view.crbegin(), view.crend()),
utf8_criter(view.crend(), view.crbegin(), view.crend())};
}
std::pair<utf8_criter, utf8_criter> make_criter(const utf8_criter begin,
const utf8_criter end) {
return {utf8_criter(begin.base(), begin.base(), end.base()),
utf8_criter(end.base(), begin.base(), end.base())};
}
size_t byte_length(utf8_citer begin, utf8_citer end) { size_t byte_length(utf8_citer begin, utf8_citer end) {
return size_t(std::distance(begin.base(), end.base())); return size_t(std::distance(begin.base(), end.base()));
} }
size_t byte_length(utf8_criter begin, utf8_criter end) {
return size_t(std::distance(begin.base(), end.base()));
}
size_t count(const std::string_view view) { size_t count(const std::string_view view) {
return size_t(utfcpp::distance(view.cbegin(), view.cend())); return size_t(utfcpp::distance(view.cbegin(), view.cend()));
} }
@ -435,21 +418,23 @@ bool ends_with(const std::string_view haystack, const std::string_view needle) {
return false; return false;
} }
auto [haystack_begin, haystack_end] = make_criter(haystack); auto [haystack_begin, haystack_end] = make_citer(haystack);
auto [needle_begin, needle_end] = make_criter(needle); auto [needle_begin, needle_end] = make_citer(needle);
auto needle_count = count(needle); auto needle_count = count(needle);
auto it = haystack_begin; auto it = haystack_end;
auto end = it; auto end = it;
for (size_t i = 0; i < needle_count; ++i) { --it;
if (end == haystack_end) {
for (size_t i = 1; i < needle_count; ++i) {
if (it == haystack_begin) {
// not enough room in target for search // not enough room in target for search
return false; return false;
} }
++end; --it;
} }
auto [sub_start, sub_end] = make_criter(it, end); auto [sub_start, sub_end] = make_citer(it, end);
return std::equal(needle_begin, needle_end, sub_start, sub_end); return std::equal(needle_begin, needle_end, sub_start, sub_end);
} }
@ -461,21 +446,23 @@ bool ends_with_case(const std::string_view haystack,
return false; return false;
} }
auto [haystack_begin, haystack_end] = make_criter(haystack); auto [haystack_begin, haystack_end] = make_citer(haystack);
auto [needle_begin, needle_end] = make_criter(needle); auto [needle_begin, needle_end] = make_citer(needle);
auto needle_count = count(needle); auto needle_count = count(needle);
auto it = haystack_begin; auto it = haystack_end;
auto end = it; auto end = it;
--it;
for (size_t i = 0; i < needle_count; ++i) { for (size_t i = 0; i < needle_count; ++i) {
if (end == haystack_end) { if (it == haystack_begin) {
// not enough room in target for search // not enough room in target for search
return false; return false;
} }
++end; --it;
} }
auto [sub_start, sub_end] = make_criter(it, end); auto [sub_start, sub_end] = make_citer(it, end);
return std::equal(needle_begin, needle_end, sub_start, sub_end, return std::equal(needle_begin, needle_end, sub_start, sub_end,
equal_ascii_case); equal_ascii_case);
} }
@ -492,7 +479,9 @@ std::string join_paths(const std::string_view left_path,
return std::string(left_path); return std::string(left_path);
} }
auto [it, end] = make_criter(left_path); utf8_citer it;
std::tie(std::ignore, it) = make_citer(left_path);
--it;
std::string result = std::string(left_path); std::string result = std::string(left_path);
if (*it != static_cast<uint32_t>(separator)) { if (*it != static_cast<uint32_t>(separator)) {
@ -563,25 +552,38 @@ std::string find_name_from_path(const std::string_view path,
return std::string(); return std::string();
} }
auto [begin, end] = make_criter(path); auto [begin, end] = make_citer(path);
auto it = begin; auto it = end;
--it;
// path is padded with separator
size_t padding = 0; size_t padding = 0;
if (*it == uint32_t(separator)) { if (*it == uint32_t(separator)) {
++it; --it;
--end;
padding = 1; padding = 1;
} }
if (it == end) { // path is just separator
if (it == begin) {
return std::string(); return std::string();
} }
it = std::find(it, end, uint32_t(separator)); // search for separator
if (it == end) { while (it != begin) {
if (*it == uint32_t(separator)) {
break;
}
--it;
}
// no separator -- copy entire string (except trailing separator)
if (it == begin) {
return std::string(path.substr(0, path.size() - padding)); return std::string(path.substr(0, path.size() - padding));
} }
auto length = byte_length(begin, it); auto length = byte_length(std::next(it), end);
auto offset = path.length() - length; auto offset = path.length() - length;
return std::string(path.substr(offset, length - padding)); return std::string(path.substr(offset, length - padding));
} }
@ -593,20 +595,25 @@ std::string find_base_name_from_path(const std::string_view path,
return std::string(); return std::string();
} }
auto [begin, end] = make_criter(name); auto [begin, end] = make_citer(name);
auto it = std::find(begin, end, uint32_t('.')); auto it = end;
if (it == end) { --it;
while (it != begin) {
if (*it == uint32_t('.')) {
break;
}
--it;
}
if (it == begin) {
return name; return name;
} }
it++; auto length = byte_length(it, end);
if (it == end) { auto offset = name.length() - length;
return std::string(); return std::string(name.substr(0, offset));
}
auto length = name.length() - byte_length(begin, it);
return std::string(name.substr(0, length));
} }
std::string find_base_path(const std::string_view path, char32_t separator) { std::string find_base_path(const std::string_view path, char32_t separator) {
@ -614,25 +621,29 @@ std::string find_base_path(const std::string_view path, char32_t separator) {
return std::string(); return std::string();
} }
auto [begin, end] = make_criter(path); auto [begin, end] = make_citer(path);
auto it = end;
--it;
auto it = begin;
if (*it == uint32_t(separator)) { if (*it == uint32_t(separator)) {
++it; --it;
} }
it = std::find(it, end, uint32_t(separator)); while (it != begin) {
if (it == end) { if (*it == uint32_t(separator)) {
break;
}
--it;
}
if (it == begin) {
return std::string(); return std::string();
} }
++it; auto length = byte_length(it, end);
if (it == end) { auto offset = path.length() - length;
return std::string(); return std::string(path.substr(0, offset));
}
auto length = path.length() - byte_length(begin, it);
return std::string(path.substr(0, length));
} }
std::string canonicalize_path(const std::string_view path, char32_t separator) { std::string canonicalize_path(const std::string_view path, char32_t separator) {