From c78113df56df6aea73e27c334b9d54e2ec0227a3 Mon Sep 17 00:00:00 2001 From: Zach Laine Date: Sun, 10 Dec 2023 16:13:13 -0600 Subject: [PATCH] Add the latest version of Boost.Text components used here. This comes from 2c9d55a8 of the Boost.Tet clang_support branch. Fixes #24. --- include/boost/parser/detail/printing.hpp | 4 +- include/boost/parser/detail/printing_impl.hpp | 19 +- .../boost/parser/detail/text/algorithm.hpp | 6 +- include/boost/parser/detail/text/concepts.hpp | 140 +- include/boost/parser/detail/text/config.hpp | 56 +- include/boost/parser/detail/text/dangling.hpp | 23 - .../parser/detail/text/detail/algorithm.hpp | 132 +- .../parser/detail/text/detail/unpack.hpp | 362 -- .../parser/detail/text/in_out_result.hpp | 47 + include/boost/parser/detail/text/subrange.hpp | 4 +- .../detail/text/transcode_algorithm.hpp | 319 +- .../parser/detail/text/transcode_iterator.hpp | 3477 +++++++---------- .../detail/text/transcode_iterator_fwd.hpp | 101 + .../parser/detail/text/transcode_view.hpp | 1259 +++--- include/boost/parser/detail/text/trie.hpp | 3 +- include/boost/parser/detail/text/unpack.hpp | 274 ++ include/boost/parser/detail/text/utf.hpp | 28 +- include/boost/parser/error_handling.hpp | 6 +- include/boost/parser/error_handling_fwd.hpp | 6 +- include/boost/parser/parser.hpp | 98 +- 20 files changed, 3013 insertions(+), 3351 deletions(-) delete mode 100644 include/boost/parser/detail/text/dangling.hpp delete mode 100644 include/boost/parser/detail/text/detail/unpack.hpp create mode 100644 include/boost/parser/detail/text/in_out_result.hpp create mode 100644 include/boost/parser/detail/text/transcode_iterator_fwd.hpp create mode 100644 include/boost/parser/detail/text/unpack.hpp diff --git a/include/boost/parser/detail/printing.hpp b/include/boost/parser/detail/printing.hpp index d1babb9b..5bf56489 100644 --- a/include/boost/parser/detail/printing.hpp +++ b/include/boost/parser/detail/printing.hpp @@ -306,7 +306,7 @@ namespace boost { namespace parser { namespace detail { static_assert( std::is_integral>{}, ""); static_assert(SizeofValueType == 4, ""); - auto utf8 = text::as_utf8(first_, last_); + auto utf8 = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first_, last_) | text::as_utf8; auto first = utf8.begin(); auto last = utf8.end(); if (quote) @@ -330,7 +330,7 @@ namespace boost { namespace parser { namespace detail { bool quote, int64_t trace_input_cps) { - auto utf32 = text::as_utf32(first_, last_); + auto utf32 = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first_, last_) | text::as_utf32; auto first = utf32.begin(); auto const last = utf32.end(); static_assert(sizeof(*first_) == 1); diff --git a/include/boost/parser/detail/printing_impl.hpp b/include/boost/parser/detail/printing_impl.hpp index 119b82f3..327d4be0 100644 --- a/include/boost/parser/detail/printing_impl.hpp +++ b/include/boost/parser/detail/printing_impl.hpp @@ -426,7 +426,7 @@ namespace boost { namespace parser { namespace detail { static void call(Context const & context, std::ostream & os, Expected expected) { - std::array cps = {{(uint32_t)expected}}; + std::array cps = {{(char32_t)expected}}; auto const r = text::as_utf8(cps); os << "'"; for (auto c : r) { @@ -579,9 +579,13 @@ namespace boost { namespace parser { namespace detail { int components) { os << "string(\""; - for (auto c : - text::as_utf8(parser.expected_first_, parser.expected_last_)) { - detail::print_char(os, c); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE( + parser.expected_first_, parser.expected_last_) | + text::as_utf8; + auto it = r.begin(); + for (; it != r.end(); + ++it) { + detail::print_char(os, *it); } os << "\")"; } @@ -594,9 +598,10 @@ namespace boost { namespace parser { namespace detail { int components) { os << "\""; - for (auto c : text::as_utf8( - parser.parser_.expected_first_, - parser.parser_.expected_last_)) { + for (auto c : BOOST_PARSER_DETAIL_TEXT_SUBRANGE( + parser.parser_.expected_first_, + parser.parser_.expected_last_) | + text::as_utf8) { detail::print_char(os, c); } os << "\""; diff --git a/include/boost/parser/detail/text/algorithm.hpp b/include/boost/parser/detail/text/algorithm.hpp index bec75fac..9e7aef99 100644 --- a/include/boost/parser/detail/text/algorithm.hpp +++ b/include/boost/parser/detail/text/algorithm.hpp @@ -321,9 +321,9 @@ namespace boost::parser::detail { namespace text { if (first1 == last1 || first2 == last2) return {first1, first1}; - if (std::next(first2) == last2) { + if (detail::next(first2) == last2) { auto const it = parser::detail::text::find(first1, last1, *first2); - return {it, std::next(it)}; + return {it, detail::next(it)}; } auto it = first1; @@ -333,7 +333,7 @@ namespace boost::parser::detail { namespace text { if (first1 == last1) return {first1, first1}; - auto it2 = std::next(first2); + auto it2 = detail::next(first2); it = first1; if (++it == last1) return {it, it}; diff --git a/include/boost/parser/detail/text/concepts.hpp b/include/boost/parser/detail/text/concepts.hpp index 5ec4ac69..01e80d86 100644 --- a/include/boost/parser/detail/text/concepts.hpp +++ b/include/boost/parser/detail/text/concepts.hpp @@ -8,18 +8,30 @@ #include #include +#include -#if defined(BOOST_TEXT_DOXYGEN) || BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS #include +#include namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 { //[ concepts_concepts +#ifdef _MSC_VER + inline constexpr format wchar_t_format = format::utf16; +#else + inline constexpr format wchar_t_format = format::utf32; +#endif + template - concept code_unit = std::integral && sizeof(T) == (int)F; + concept code_unit = (std::same_as && F == format::utf8) || + (std::same_as && F == format::utf16) || + (std::same_as && F == format::utf32) || + (std::same_as && F == format::utf8) || + (std::same_as && F == wchar_t_format); template concept utf8_code_unit = code_unit; @@ -32,14 +44,19 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template concept code_unit_iter = - std::bidirectional_iterator && code_unit, F>; + std::input_iterator && code_unit, F>; + + template + concept utf_code_unit = + utf8_code_unit || utf16_code_unit || utf32_code_unit; + template concept code_unit_pointer = std::is_pointer_v && code_unit, F>; template - concept code_unit_range = std::ranges::bidirectional_range && + concept code_unit_range = std::ranges::input_range && code_unit, F>; template @@ -82,11 +99,19 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template concept code_point_range = utf32_range; + template + concept utf_iter = utf8_iter || utf16_iter || utf32_iter; + template + concept utf_pointer = + utf8_pointer || utf16_pointer || utf32_pointer; + template + concept utf_range = utf8_range || utf16_range || utf32_range; + template concept grapheme_iter = // clang-format off - std::bidirectional_iterator && + std::input_iterator && code_point_range> && requires(T t) { { t.base() } -> code_point_iter; @@ -94,7 +119,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME }; template - concept grapheme_range = std::ranges::bidirectional_range && + concept grapheme_range = std::ranges::input_range && grapheme_iter>; template @@ -118,63 +143,62 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME namespace dtl { - template - concept eraseable_sized_bidi_range = + template + concept eraseable_insertable_sized_bidi_range = // clang-format off std::ranges::sized_range && - std::ranges::bidirectional_range && requires(T t) { - { t.erase(t.begin(), t.end()) } -> - std::same_as>; - // clang-format on - }; + std::ranges::input_range && + requires(T t, CodeUnit const * it) { + { t.erase(t.begin(), t.end()) } -> + std::same_as>; + { t.insert(t.end(), it, it) } -> + std::same_as>; + }; + // clang-format on } template concept utf8_string = // clang-format off - dtl::eraseable_sized_bidi_range && utf8_code_unit> && - requires(T t, char const * it) { - { t.insert(t.end(), it, it) } -> - std::same_as>; + dtl::eraseable_insertable_sized_bidi_range< + T, std::ranges::range_value_t>; // clang-format on - }; template concept utf16_string = // clang-format off - dtl::eraseable_sized_bidi_range && utf16_code_unit> && - requires(T t, uint16_t const * it) { - { t.insert(t.end(), it, it) } -> - std::same_as>; + dtl::eraseable_insertable_sized_bidi_range< + T, std::ranges::range_value_t>; // clang-format on - }; template concept utf_string = utf8_string || utf16_string; template // clang-format off - concept transcoding_error_handler = requires(T t) { - { t("") } -> code_point; + concept transcoding_error_handler = requires(T t, std::string_view msg) { + { t(msg) } -> std::same_as; // clang-format on }; - template - concept utf_iter = utf8_iter || utf16_iter || utf32_iter; - template // clang-format off concept utf_range_like = - utf8_range> || - utf16_range> || - utf32_range> || - utf8_pointer> || - utf16_pointer> || - utf32_pointer>; + utf_range> || + utf_pointer>; // clang-format on + template + concept utf8_range_like = utf8_code_unit> || + utf8_pointer>; + template + concept utf16_range_like = utf16_code_unit> || + utf16_pointer>; + template + concept utf32_range_like = utf32_code_unit> || + utf32_pointer>; //] // Clang 13 defines __cpp_lib_concepts but not std::indirectly copyable. @@ -194,4 +218,52 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME #endif +namespace boost::parser::detail { namespace text { namespace detail { + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + + template + using iterator_t = std::ranges::iterator_t; + template + using sentinel_t = std::ranges::sentinel_t; + template + using iter_value_t = std::iter_value_t; + template + using iter_reference_t = std::iter_reference_t; + template + using range_value_t = std::ranges::range_value_t; + template + using range_reference_t = std::ranges::range_reference_t; + template + using range_difference_t = std::ranges::range_difference_t; + +#else + + template + using iterator_t = decltype(detail::begin(std::declval())); + template + using sentinel_t = decltype(detail::end(std::declval())); + template + using iter_value_t = typename std::iterator_traits::value_type; + template + using iter_reference_t = decltype(*std::declval()); + template + using range_value_t = iter_value_t>; + template + using range_reference_t = iter_reference_t>; + template + using range_difference_t = std::ptrdiff_t; + + template + constexpr bool code_unit_v = +#if defined(__cpp_char8_t) + std::is_same_v || +#endif + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v; + +#endif + +}}} + #endif diff --git a/include/boost/parser/detail/text/config.hpp b/include/boost/parser/detail/text/config.hpp index f5ff0eee..de87a79c 100644 --- a/include/boost/parser/detail/text/config.hpp +++ b/include/boost/parser/detail/text/config.hpp @@ -8,17 +8,54 @@ #include +// Included for definition of __cpp_lib_concepts. +#include +#if 202002L <= __cplusplus && defined(__cpp_impl_coroutine) && __has_include() +#include +#endif + + +// The contents of in libstdc++ is incomplete (e.g. no owning_view) +// before GCC 12. #if !BOOST_PARSER_USE_CONCEPTS # define BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS 0 #else -// This is now hard-coded to use the pre-C++20 code path. There are a bunch -// of really odd compile errorswith Clang+libstdc++ I can't be bothered to -// address right now. (The latest version of Boost.Text might fix these -// errors, but there's also no pre-C++20 code path in that version of Text.) -# define BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS 0 +# define BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS 1 +#endif + +// GCC 12 claims to support 201907L <= __cpp_deduction_guides, but does not. +#if defined(__cpp_deduction_guides) && 201907L <= __cpp_deduction_guides && \ + (!defined(__GNUC__) || 13 <= __GNUC__) +#define BOOST_PARSER_DETAIL_TEXT_USE_ALIAS_CTAD 1 +#else +#define BOOST_PARSER_DETAIL_TEXT_USE_ALIAS_CTAD 0 +#endif + +#if defined(__cpp_lib_ranges) +namespace boost::parser::detail { namespace text { namespace detail { + inline constexpr auto begin = std::ranges::begin; + inline constexpr auto end = std::ranges::end; +}}} +#else +#include #endif +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS +# define BOOST_PARSER_DETAIL_TEXT_SUBRANGE std::ranges::subrange +#else +# include +# define BOOST_PARSER_DETAIL_TEXT_SUBRANGE boost::parser::detail::text::subrange +#endif + +namespace boost::parser::detail { namespace text { +#if defined(__cpp_char8_t) + using char8_type = char8_t; +#else + using char8_type = char; +#endif +}} + // The inline namespaces v1 and v2 represent pre- and post-C++20. v1 is // inline for standards before C++20, and v2 is inline for C++20 and later. // Note that this only applies to code for which a v2 namespace alternative @@ -32,13 +69,4 @@ # define BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 namespace v2 #endif -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS -namespace boost::parser::detail { namespace text { namespace detail { - inline constexpr auto begin = std::ranges::begin; - inline constexpr auto end = std::ranges::end; -}}} -#else -#include -#endif - #endif diff --git a/include/boost/parser/detail/text/dangling.hpp b/include/boost/parser/detail/text/dangling.hpp deleted file mode 100644 index 7135d564..00000000 --- a/include/boost/parser/detail/text/dangling.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2020 T. Zachary Laine -// -// Distributed under the Boost Software License, Version 1.0. (See -// accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) -#ifndef BOOST_PARSER_DETAIL_TEXT_DANGLING_HPP -#define BOOST_PARSER_DETAIL_TEXT_DANGLING_HPP - -#include - - -#if defined(BOOST_TEXT_DOXYGEN) || BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - -namespace boost::parser::detail::text { - /** An alias template analogous to `std::ranges::borrowed_subrange_t`. */ - template - using borrowed_view_t = std:: - conditional_t, V, std::ranges::dangling>; -} - -#endif - -#endif diff --git a/include/boost/parser/detail/text/detail/algorithm.hpp b/include/boost/parser/detail/text/detail/algorithm.hpp index 377f0bbd..07e1c4c4 100644 --- a/include/boost/parser/detail/text/detail/algorithm.hpp +++ b/include/boost/parser/detail/text/detail/algorithm.hpp @@ -6,7 +6,6 @@ #ifndef BOOST_PARSER_DETAIL_TEXT_DETAIL_ALGORITHM_HPP #define BOOST_PARSER_DETAIL_TEXT_DETAIL_ALGORITHM_HPP -#include #include #include #include @@ -14,29 +13,39 @@ #include #include #include +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS +#include +#endif #include namespace boost::parser::detail { namespace text { namespace detail { + template + auto prev(I it) + { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + return std::ranges::prev(it); +#else + return std::prev(it); +#endif + } + template + auto next(I it) + { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + return std::ranges::next(it); +#else + return std::next(it); +#endif + } + template using remove_cv_ref_t = typename std::remove_cv::type>::type; #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - - template - using iterator_t = std::ranges::iterator_t; - template - using sentinel_t = std::ranges::sentinel_t; - template - using iter_value_t = std::iter_value_t; - template - using iter_reference_t = std::iter_reference_t; - template - using range_value_t = std::ranges::range_value_t; - // A grapheme_range that has a sentinel type that is not an iterator, but // that is comparable with T's interator type. template @@ -59,17 +68,6 @@ namespace boost::parser::detail { namespace text { namespace detail { #else - template - using iterator_t = decltype(detail::begin(std::declval())); - template - using sentinel_t = decltype(detail::end(std::declval())); - template - using iter_value_t = typename std::iterator_traits::value_type; - template - using iter_reference_t = decltype(*std::declval()); - template - using range_value_t = iter_value_t>; - template using has_base = decltype(std::declval().base()); template @@ -98,6 +96,41 @@ namespace boost::parser::detail { namespace text { namespace detail { #endif +#if 0 // TODO + template + struct void_ + { + using type = void; + static constexpr bool value = true; + }; + + template + using void_t = typename void_::type; + + template + struct fixup_ptr + { + using type = T; + }; + + template + using remove_v_t = typename std::remove_volatile::type; + + template + struct fixup_ptr + { + using type = remove_v_t const *; + }; + + template + using fixup_ptr_t = typename fixup_ptr::type; + + template + using remove_cv_ref_t = + typename std::remove_cv::type>::type; +#endif + + template using has_begin = decltype(*detail::begin(std::declval())); template @@ -881,72 +914,39 @@ namespace boost::parser::detail { namespace text { namespace detail { return detail::hash_combine_(retval, cps); } - template - constexpr bool is_cu8_v = std::integral_constant< - bool, - std::is_same_v -#if defined(__cpp_char8_t) - || std::is_same_v -#endif - >{}; - template using char_value_expr = std::integral_constant< bool, - is_cu8_v::value_type>>; + std::is_integral< + typename std::iterator_traits::value_type>::value && + sizeof(typename std::iterator_traits::value_type) == 1>; template constexpr bool is_char_ptr_v = std::is_pointer::value && detected_or_t::value; - template - constexpr bool is_cu16_v = std::integral_constant< - bool, -#if defined(_MSC_VER) - std::is_same_v || -#endif - std::is_same_v>{}; - template using _16_value_expr = std::integral_constant< bool, - is_cu16_v::value_type>>; + std::is_integral< + typename std::iterator_traits::value_type>::value && + sizeof(typename std::iterator_traits::value_type) == 2>; template constexpr bool is_16_ptr_v = std::is_pointer::value && detected_or_t::value; - template - constexpr bool is_cp_v = std::integral_constant< - bool, -#if !defined(_MSC_VER) - std::is_same_v || -#endif - std::is_same_v>{}; - template using cp_value_expr = std::integral_constant< bool, - is_cp_v::value_type>>; + std::is_integral< + typename std::iterator_traits::value_type>::value && + sizeof(typename std::iterator_traits::value_type) == 4>; template constexpr bool is_cp_ptr_v = std::is_pointer::value && detected_or_t::value; - template - using iter_traits_value_expr = - typename std::iterator_traits::value_type; - - template - using iter_traits_value_t = - detected_or_t>; - - template - constexpr bool is_utf_ptr_v = std::is_pointer_v> && - (is_cu8_v> || - is_cu16_v> || - is_cp_v>); - }}} #endif diff --git a/include/boost/parser/detail/text/detail/unpack.hpp b/include/boost/parser/detail/text/detail/unpack.hpp deleted file mode 100644 index 9ad894fe..00000000 --- a/include/boost/parser/detail/text/detail/unpack.hpp +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright (C) 2020 T. Zachary Laine -// -// Distributed under the Boost Software License, Version 1.0. (See -// accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) -#ifndef BOOST_PARSER_DETAIL_TEXT_DETAIL_UNPACK_HPP -#define BOOST_PARSER_DETAIL_TEXT_DETAIL_UNPACK_HPP - -#include - - -namespace boost::parser::detail { namespace text { namespace detail { - - struct no_op_repack - { - template - T operator()(T x) const - { - return x; - } - }; - - // Using this custom template is quite a bit faster than using lambdas. - // Unexpected. - template - struct repacker_t - { - auto operator()(I it) const - { - return then_(Iterator(first_, it, last_)); - } - - [[no_unique_address]] I first_; - [[no_unique_address]] S last_; - [[no_unique_address]] Then then_; - }; - template - auto repacker(I first, S last, Then then) - { - return repacker_t{first, last, then}; - } - - struct utf8_tag - {}; - struct utf16_tag - {}; - struct utf32_tag - {}; - - template< - typename Tag, - typename Iter, - typename Sentinel = Iter, - typename Repack = no_op_repack> - struct tagged_range - { - Iter f_; - [[no_unique_address]] Sentinel l_; - Tag tag_; - Repack repack_; - }; - - template - auto make_tagged_range(Tag tag, Iter f, Sentinel l, Repack repack) - { - return tagged_range{f, l, tag, repack}; - } - - template< - typename Iter, - typename Sentinel, - typename Repack, - bool UTF8 = is_char_iter_v, - bool UTF16 = is_16_iter_v, - bool UTF32 = is_cp_iter_v> - struct unpack_iterator_and_sentinel_impl - {}; - - template - struct unpack_iterator_and_sentinel_impl< - Iter, - Sentinel, - Repack, - true, - false, - false> - { - static constexpr auto - call(Iter first, Sentinel last, Repack repack) - { - return detail::make_tagged_range(utf8_tag{}, first, last, repack); - } - }; - template - struct unpack_iterator_and_sentinel_impl< - Iter, - Sentinel, - Repack, - false, - true, - false> - { - static constexpr auto - call(Iter first, Sentinel last, Repack repack) - { - return detail::make_tagged_range(utf16_tag{}, first, last, repack); - } - }; - template - struct unpack_iterator_and_sentinel_impl< - Iter, - Sentinel, - Repack, - false, - false, - true> - { - static constexpr auto - call(Iter first, Sentinel last, Repack repack) - { - return detail::make_tagged_range(utf32_tag{}, first, last, repack); - } - }; - - template - constexpr auto unpack_iterator_and_sentinel( - Iter first, Sentinel last, Repack repack = no_op_repack{}) - -> decltype(unpack_iterator_and_sentinel_impl< - std::remove_cv_t, - std::remove_cv_t, - Repack>::call(first, last, repack)) - { - using iterator = std::remove_cv_t; - using sentinel = std::remove_cv_t; - return detail:: - unpack_iterator_and_sentinel_impl::call( - first, last, repack); - } - - // 8 -> 32 - template - constexpr auto unpack_iterator_and_sentinel( - utf_8_to_32_iterator first, - utf_8_to_32_iterator last, - Repack repack = no_op_repack{}); - template - constexpr auto unpack_iterator_and_sentinel( - utf_8_to_32_iterator first, - Sentinel last, - Repack repack = no_op_repack{}); - // 32 -> 8 - template - constexpr auto unpack_iterator_and_sentinel( - utf_32_to_8_iterator first, - utf_32_to_8_iterator last, - Repack repack = no_op_repack{}); - template - constexpr auto unpack_iterator_and_sentinel( - utf_32_to_8_iterator first, - Sentinel last, - Repack repack = no_op_repack{}); - // 16 -> 32 - template - constexpr auto unpack_iterator_and_sentinel( - utf_16_to_32_iterator first, - utf_16_to_32_iterator last, - Repack repack = no_op_repack{}); - template - constexpr auto unpack_iterator_and_sentinel( - utf_16_to_32_iterator first, - Sentinel last, - Repack repack = no_op_repack{}); - // 32 -> 16 - template - constexpr auto unpack_iterator_and_sentinel( - utf_32_to_16_iterator first, - utf_32_to_16_iterator last, - Repack repack = no_op_repack{}); - template - constexpr auto unpack_iterator_and_sentinel( - utf_32_to_16_iterator first, - Sentinel last, - Repack repack = no_op_repack{}); - // 8 -> 16 - template - constexpr auto unpack_iterator_and_sentinel( - utf_8_to_16_iterator first, - utf_8_to_16_iterator last, - Repack repack = no_op_repack{}); - template - constexpr auto unpack_iterator_and_sentinel( - utf_8_to_16_iterator first, - Sentinel last, - Repack repack = no_op_repack{}); - // 16 -> 8 - template - constexpr auto unpack_iterator_and_sentinel( - utf_16_to_8_iterator first, - utf_16_to_8_iterator last, - Repack repack = no_op_repack{}); - template - constexpr auto unpack_iterator_and_sentinel( - utf_16_to_8_iterator first, - Sentinel last, - Repack repack = no_op_repack{}); - - // 8 -> 32 - template - constexpr auto unpack_iterator_and_sentinel( - utf_8_to_32_iterator first, - utf_8_to_32_iterator last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last.base(), - detail::repacker>( - first.begin(), first.end(), repack)); - } - template - constexpr auto unpack_iterator_and_sentinel( - utf_8_to_32_iterator first, - Sentinel last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last, - detail::repacker>( - first.begin(), first.end(), repack)); - } - // 32 -> 8 - template - constexpr auto unpack_iterator_and_sentinel( - utf_32_to_8_iterator first, - utf_32_to_8_iterator last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last.base(), - detail::repacker>( - first.begin(), first.end(), repack)); - } - template - constexpr auto unpack_iterator_and_sentinel( - utf_32_to_8_iterator first, - Sentinel last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last, - detail::repacker>( - first.begin(), first.end(), repack)); - } - - // 16 -> 32 - template - constexpr auto unpack_iterator_and_sentinel( - utf_16_to_32_iterator first, - utf_16_to_32_iterator last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last.base(), - detail::repacker>( - first.begin(), first.end(), repack)); - } - template - constexpr auto unpack_iterator_and_sentinel( - utf_16_to_32_iterator first, - Sentinel last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last, - detail::repacker>( - first.begin(), first.end(), repack)); - } - // 32 -> 16 - template - constexpr auto unpack_iterator_and_sentinel( - utf_32_to_16_iterator first, - utf_32_to_16_iterator last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last.base(), - detail::repacker>( - first.begin(), first.end(), repack)); - } - template - constexpr auto unpack_iterator_and_sentinel( - utf_32_to_16_iterator first, - Sentinel last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last, - detail::repacker>( - first.begin(), first.end(), repack)); - } - - // 8 -> 16 - template - constexpr auto unpack_iterator_and_sentinel( - utf_8_to_16_iterator first, - utf_8_to_16_iterator last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last.base(), - detail::repacker>( - first.begin(), first.end(), repack)); - } - template - constexpr auto unpack_iterator_and_sentinel( - utf_8_to_16_iterator first, - Sentinel last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last, - detail::repacker>( - first.begin(), first.end(), repack)); - } - // 16 -> 8 - template - constexpr auto unpack_iterator_and_sentinel( - utf_16_to_8_iterator first, - utf_16_to_8_iterator last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last.base(), - detail::repacker>( - first.begin(), first.end(), repack)); - } - template - constexpr auto unpack_iterator_and_sentinel( - utf_16_to_8_iterator first, - Sentinel last, - Repack repack) - { - return detail::unpack_iterator_and_sentinel( - first.base(), - last, - detail::repacker>( - first.begin(), first.end(), repack)); - } - -}}} - -#endif diff --git a/include/boost/parser/detail/text/in_out_result.hpp b/include/boost/parser/detail/text/in_out_result.hpp new file mode 100644 index 00000000..f84a6fa8 --- /dev/null +++ b/include/boost/parser/detail/text/in_out_result.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2020 T. Zachary Laine +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_PARSER_DETAIL_TEXT_IN_OUT_RESULT_HPP +#define BOOST_PARSER_DETAIL_TEXT_IN_OUT_RESULT_HPP + +#include + + +namespace boost::parser::detail { namespace text { + + /** A replacement for C++20's `std::ranges::in_out_result` for use in + pre-C++20 build modes. */ + template + struct in_out_result + { + [[no_unique_address]] I in; + [[no_unique_address]] O out; + }; + +}} + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + +#include + +namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 { + + namespace dtl { + template + std::ranges::borrowed_iterator_t result_iterator(R &&); + template + requires std::is_pointer_v> + Ptr result_iterator(Ptr &&); + + template + using uc_result_iterator = + decltype(dtl::result_iterator(std::declval())); + } + +}}} + +#endif + +#endif diff --git a/include/boost/parser/detail/text/subrange.hpp b/include/boost/parser/detail/text/subrange.hpp index 59018143..6f191b51 100644 --- a/include/boost/parser/detail/text/subrange.hpp +++ b/include/boost/parser/detail/text/subrange.hpp @@ -33,11 +33,11 @@ namespace boost::parser::detail { namespace text { [[nodiscard]] constexpr subrange next(std::ptrdiff_t n = 1) const { - return subrange{std::next(first_), last_}; + return subrange{detail::next(first_), last_}; } [[nodiscard]] constexpr subrange prev(std::ptrdiff_t n = 1) const { - return subrange{std::prev(first_), last_}; + return subrange{detail::prev(first_), last_}; } constexpr subrange & advance(std::ptrdiff_t n) diff --git a/include/boost/parser/detail/text/transcode_algorithm.hpp b/include/boost/parser/detail/text/transcode_algorithm.hpp index ce1def78..a29697d0 100644 --- a/include/boost/parser/detail/text/transcode_algorithm.hpp +++ b/include/boost/parser/detail/text/transcode_algorithm.hpp @@ -7,13 +7,15 @@ #ifndef BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ALGORITHM_HPP #define BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ALGORITHM_HPP +#include #include -#include - +#include #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS #include #endif +#include + namespace boost::parser::detail { namespace text { @@ -46,14 +48,10 @@ namespace boost::parser::detail { namespace text { using utf_range_like_iterator_t = typename utf_range_like_iterator::type; - /** The result returned from some variations of the transcode - algorithms. */ + /** An alias for `in_out_result` returned by algorithms that perform a + transcoding copy. */ template - struct transcode_result - { - Iter in; - OutIter out; - }; + using transcode_result = in_out_result; namespace detail { template @@ -181,9 +179,17 @@ namespace boost::parser::detail { namespace text { first, last, n, out, std::input_iterator_tag{}); } + template + struct tag_t + {}; + template transcode_result transcode_to_8( - utf8_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { for (; first != last && (!UseN || n); ++first, ++out) { *out = *first; @@ -194,7 +200,11 @@ namespace boost::parser::detail { namespace text { template transcode_result transcode_to_16( - utf8_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { return detail::transcode_utf_8_to_16( first, @@ -206,7 +216,11 @@ namespace boost::parser::detail { namespace text { template transcode_result transcode_to_32( - utf8_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { return detail::transcode_utf_8_to_32( first, @@ -218,7 +232,11 @@ namespace boost::parser::detail { namespace text { template transcode_result transcode_to_8( - utf16_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { uint32_t const high_surrogate_max = 0xdbff; uint16_t const high_surrogate_base = 0xd7c0; @@ -230,7 +248,7 @@ namespace boost::parser::detail { namespace text { if (hi <= high_surrogate_max) { ++first; if (first == last) { - uint32_t const cp = replacement_character(); + uint32_t const cp = replacement_character; out = detail::read_into_utf8_iter(cp, out); ++out; return {first, out}; @@ -245,7 +263,7 @@ namespace boost::parser::detail { namespace text { } } out = detail::read_into_utf8_iter( - replacement_character(), out); + replacement_character, out); } else { out = detail::read_into_utf8_iter(hi, out); } @@ -256,7 +274,11 @@ namespace boost::parser::detail { namespace text { template transcode_result transcode_to_16( - utf16_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { for (; first != last && (!UseN || n); ++first, ++out, --n) { *out = *first; @@ -266,7 +288,11 @@ namespace boost::parser::detail { namespace text { template transcode_result transcode_to_32( - utf16_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { uint32_t const high_surrogate_max = 0xdbff; uint16_t const high_surrogate_base = 0xd7c0; @@ -278,7 +304,7 @@ namespace boost::parser::detail { namespace text { if (hi <= high_surrogate_max) { ++first; if (first == last) { - *out = replacement_character(); + *out = replacement_character; ++out; return {first, out}; } @@ -292,7 +318,7 @@ namespace boost::parser::detail { namespace text { continue; } } - *out = replacement_character(); + *out = replacement_character; ++out; } else { *out = hi; @@ -305,7 +331,11 @@ namespace boost::parser::detail { namespace text { template transcode_result transcode_to_8( - utf32_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { for (; first != last && (!UseN || n); ++first, --n) { out = detail::read_into_utf8_iter(*first, out); @@ -315,7 +345,11 @@ namespace boost::parser::detail { namespace text { template transcode_result transcode_to_16( - utf32_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { for (; first != last && (!UseN || n); ++first, --n) { out = detail::read_into_utf16_iter(*first, out); @@ -325,7 +359,11 @@ namespace boost::parser::detail { namespace text { template transcode_result transcode_to_32( - utf32_tag, Iter first, Sentinel last, std::ptrdiff_t n, OutIter out) + tag_t, + Iter first, + Sentinel last, + std::ptrdiff_t n, + OutIter out) { for (; first != last && (!UseN || n); ++first, ++out, --n) { *out = *first; @@ -342,7 +380,8 @@ namespace boost::parser::detail { namespace text { InputIter first, Sentinel last, std::ptrdiff_t n, OutIter out) { auto const r = detail::unpack_iterator_and_sentinel(first, last); - return detail::transcode_to_32(r.tag_, r.f_, r.l_, n, out); + return detail::transcode_to_32( + detail::tag_t{}, r.first, r.last, n, out); } /** Copies the first `n` code points in the range [first, last) to out, @@ -352,7 +391,8 @@ namespace boost::parser::detail { namespace text { InputIter first, Sentinel last, std::ptrdiff_t n, OutIter out) { auto const r = detail::unpack_iterator_and_sentinel(first, last); - return detail::transcode_to_32(r.tag_, r.f_, r.l_, n, out); + return detail::transcode_to_32( + detail::tag_t{}, r.first, r.last, n, out); } /** Copies the first `n` code points in the range [first, last) to out, @@ -414,26 +454,26 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template< std::input_iterator I, std::sentinel_for S, - std::output_iterator O> + std::output_iterator O> requires (utf8_code_unit> || utf32_code_unit>) transcode_result transcode_to_utf16(I first, S last, O out); /** Copies the code points in the range `[p, null_sentinel)` to `out`, changing the encoding to UTF-16. */ - template O> + template O> requires (utf8_pointer || utf32_pointer) transcode_result transcode_to_utf16(Ptr p, O out); /** Copies the code points in the array `arr` to `out`, changing the encoding to UTF-16. */ - template O> + template O> requires (utf8_code_unit || utf32_code_unit) transcode_result transcode_to_utf16(Char (&arr)[N], O out); /** Copies the code points in the range `r` to `out`, changing the encoding to UTF-16. */ - template O> + template O> requires (utf8_code_unit> || utf32_code_unit>) transcode_result, O> @@ -487,11 +527,11 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Range && r, std::ptrdiff_t n, OutIter out) -> transcode_result { - auto const u = detail::unpack_iterator_and_sentinel( + auto const u = text::unpack_iterator_and_sentinel( detail::begin(r), detail::end(r)); - auto unpacked = - detail::transcode_to_8(u.tag_, u.f_, u.l_, n, out); - return {u.repack_(unpacked.in), unpacked.out}; + auto unpacked = detail::transcode_to_8( + detail::tag_t{}, u.first, u.last, n, out); + return {u.repack(unpacked.in), unpacked.out}; } }; @@ -502,7 +542,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Ptr p, std::ptrdiff_t n, OutIter out) { return detail::transcode_to_8( - detail::utf16_tag{}, p, null_sentinel, n, out); + detail::tag_t{}, p, null_sentinel, n, out); } }; @@ -513,7 +553,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Ptr p, std::ptrdiff_t n, OutIter out) { return detail::transcode_to_8( - detail::utf32_tag{}, p, null_sentinel, n, out); + detail::tag_t{}, p, null_sentinel, n, out); } }; @@ -529,11 +569,11 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Range && r, std::ptrdiff_t n, OutIter out) -> transcode_result { - auto const u = detail::unpack_iterator_and_sentinel( + auto const u = text::unpack_iterator_and_sentinel( detail::begin(r), detail::end(r)); - auto unpacked = - detail::transcode_to_16(u.tag_, u.f_, u.l_, n, out); - return {u.repack_(unpacked.in), unpacked.out}; + auto unpacked = detail::transcode_to_16( + detail::tag_t{}, u.first, u.last, n, out); + return {u.repack(unpacked.in), unpacked.out}; } }; @@ -544,7 +584,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Ptr p, std::ptrdiff_t n, OutIter out) { return detail::transcode_to_16( - detail::utf8_tag{}, p, null_sentinel, n, out); + detail::tag_t{}, p, null_sentinel, n, out); } }; @@ -555,7 +595,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Ptr p, std::ptrdiff_t n, OutIter out) { return detail::transcode_to_16( - detail::utf32_tag{}, p, null_sentinel, n, out); + detail::tag_t{}, p, null_sentinel, n, out); } }; @@ -571,11 +611,11 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Range && r, std::ptrdiff_t n, OutIter out) -> transcode_result { - auto const u = detail::unpack_iterator_and_sentinel( + auto const u = text::unpack_iterator_and_sentinel( detail::begin(r), detail::end(r)); - auto unpacked = - detail::transcode_to_32(u.tag_, u.f_, u.l_, n, out); - return {u.repack_(unpacked.in), unpacked.out}; + auto unpacked = detail::transcode_to_32( + detail::tag_t{}, u.first, u.last, n, out); + return {u.repack(unpacked.in), unpacked.out}; } }; @@ -586,7 +626,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Ptr p, std::ptrdiff_t n, OutIter out) { return detail::transcode_to_32( - detail::utf8_tag{}, p, null_sentinel, n, out); + detail::tag_t{}, p, null_sentinel, n, out); } }; @@ -597,7 +637,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME call(Ptr p, std::ptrdiff_t n, OutIter out) { return detail::transcode_to_32( - detail::utf16_tag{}, p, null_sentinel, n, out); + detail::tag_t{}, p, null_sentinel, n, out); } }; } @@ -606,10 +646,10 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME transcode_result transcode_to_utf8( Iter first, Sentinel last, OutIter out) { - auto const r = detail::unpack_iterator_and_sentinel(first, last); - auto unpacked = - detail::transcode_to_8(r.tag_, r.f_, r.l_, -1, out); - return {r.repack_(unpacked.in), unpacked.out}; + auto const r = text::unpack_iterator_and_sentinel(first, last); + auto unpacked = detail::transcode_to_8( + detail::tag_t{}, r.first, r.last, -1, out); + return {r.repack(unpacked.in), unpacked.out}; } template @@ -624,10 +664,10 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME transcode_result transcode_to_utf16( Iter first, Sentinel last, OutIter out) { - auto const r = detail::unpack_iterator_and_sentinel(first, last); - auto unpacked = - detail::transcode_to_16(r.tag_, r.f_, r.l_, -1, out); - return {r.repack_(unpacked.in), unpacked.out}; + auto const r = text::unpack_iterator_and_sentinel(first, last); + auto unpacked = detail::transcode_to_16( + detail::tag_t{}, r.first, r.last, -1, out); + return {r.repack(unpacked.in), unpacked.out}; } template @@ -642,10 +682,10 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME transcode_result transcode_to_utf32( Iter first, Sentinel last, OutIter out) { - auto const r = detail::unpack_iterator_and_sentinel(first, last); - auto unpacked = - detail::transcode_to_32(r.tag_, r.f_, r.l_, -1, out); - return {r.repack_(unpacked.in), unpacked.out}; + auto const r = text::unpack_iterator_and_sentinel(first, last); + auto unpacked = detail::transcode_to_32( + detail::tag_t{}, r.first, r.last, -1, out); + return {r.repack(unpacked.in), unpacked.out}; } template @@ -658,7 +698,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME }}} -#if defined(BOOST_TEXT_DOXYGEN) || BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 { @@ -668,47 +708,28 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME std::input_iterator I, std::sentinel_for S, std::output_iterator O> - // clang-format off - requires (utf16_code_unit> || - utf32_code_unit>) + requires( + utf16_code_unit> || + utf32_code_unit>) transcode_result transcode_to_utf8(I first, S last, O out) - // clang-format on { - auto const r = detail::unpack_iterator_and_sentinel(first, last); - auto unpacked = - detail::transcode_to_8(r.tag_, r.f_, r.l_, -1, out); - return {r.repack_(unpacked.in), unpacked.out}; + auto const r = text::unpack_iterator_and_sentinel(first, last); + auto unpacked = detail::transcode_to_8( + detail::tag_t{}, r.first, r.last, -1, out); + return {r.repack(unpacked.in), unpacked.out}; } - template O> - // clang-format off - requires (utf16_pointer || utf32_pointer) - transcode_result transcode_to_utf8(Ptr p, O out) + template O> + requires(utf16_range_like || utf32_range_like) + transcode_result, O> transcode_to_utf8( + R && r, O out) { - // clang-format on - return text::transcode_to_utf8(p, null_sentinel, out); - } - - template O> - // clang-format off - requires (utf16_code_unit || utf32_code_unit) - transcode_result transcode_to_utf8(Char (&arr)[N], O out) - { - // clang-format on - return text::transcode_to_utf8( - std::ranges::begin(arr), std::ranges::end(arr), out); - } - - template O> - // clang-format off - requires (utf16_code_unit> || - utf32_code_unit>) - transcode_result, O> - transcode_to_utf8(R && r, O out) - { - // clang-format on - return text::transcode_to_utf8( - std::ranges::begin(r), std::ranges::end(r), out); + if constexpr (std::is_pointer_v>) { + return text::transcode_to_utf8(r, null_sentinel, out); + } else { + return text::transcode_to_utf8( + std::ranges::begin(r), std::ranges::end(r), out); + } } @@ -717,48 +738,29 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template< std::input_iterator I, std::sentinel_for S, - std::output_iterator O> - // clang-format off - requires (utf8_code_unit> || - utf32_code_unit>) + std::output_iterator O> + requires( + utf8_code_unit> || + utf32_code_unit>) transcode_result transcode_to_utf16(I first, S last, O out) - // clang-format on - { - auto const r = detail::unpack_iterator_and_sentinel(first, last); - auto unpacked = - detail::transcode_to_16(r.tag_, r.f_, r.l_, -1, out); - return {r.repack_(unpacked.in), unpacked.out}; - } - - template O> - // clang-format off - requires (utf8_pointer || utf32_pointer) - transcode_result transcode_to_utf16(Ptr p, O out) { - // clang-format on - return text::transcode_to_utf16(p, null_sentinel, out); + auto const r = text::unpack_iterator_and_sentinel(first, last); + auto unpacked = detail::transcode_to_16( + detail::tag_t{}, r.first, r.last, -1, out); + return {r.repack(unpacked.in), unpacked.out}; } - template O> - // clang-format off - requires (utf8_code_unit || utf32_code_unit) - transcode_result transcode_to_utf16(Char (&arr)[N], O out) + template O> + requires(utf8_range_like || utf32_range_like) + transcode_result, O> transcode_to_utf16( + R && r, O out) { - // clang-format on - return text::transcode_to_utf16( - std::ranges::begin(arr), std::ranges::end(arr), out); - } - - template O> - // clang-format off - requires (utf8_code_unit> || - utf32_code_unit>) - transcode_result, O> - transcode_to_utf16(R && r, O out) - { - // clang-format on - return text::transcode_to_utf16( - std::ranges::begin(r), std::ranges::end(r), out); + if constexpr (std::is_pointer_v>) { + return text::transcode_to_utf16(r, null_sentinel, out); + } else { + return text::transcode_to_utf16( + std::ranges::begin(r), std::ranges::end(r), out); + } } @@ -768,47 +770,28 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME std::input_iterator I, std::sentinel_for S, std::output_iterator O> - // clang-format off - requires (utf8_code_unit> || - utf16_code_unit>) + requires( + utf8_code_unit> || + utf16_code_unit>) transcode_result transcode_to_utf32(I first, S last, O out) - // clang-format on { - auto const r = detail::unpack_iterator_and_sentinel(first, last); - auto unpacked = - detail::transcode_to_32(r.tag_, r.f_, r.l_, -1, out); - return {r.repack_(unpacked.in), unpacked.out}; + auto const r = text::unpack_iterator_and_sentinel(first, last); + auto unpacked = detail::transcode_to_32( + detail::tag_t{}, r.first, r.last, -1, out); + return {r.repack(unpacked.in), unpacked.out}; } - template O> - // clang-format off - requires (utf8_pointer || utf16_pointer) - transcode_result transcode_to_utf32(Ptr p, O out) + template O> + requires(utf8_range_like || utf16_range_like) + transcode_result, O> transcode_to_utf32( + R && r, O out) { - // clang-format on - return text::transcode_to_utf32(p, null_sentinel, out); - } - - template O> - // clang-format off - requires (utf8_code_unit || utf16_code_unit) - transcode_result transcode_to_utf32(Char (&arr)[N], O out) - { - // clang-format on - return text::transcode_to_utf32( - std::ranges::begin(arr), std::ranges::end(arr), out); - } - - template O> - // clang-format off - requires (utf8_code_unit> || - utf16_code_unit>) - transcode_result, O> - transcode_to_utf32(R && r, O out) - { - // clang-format on - return text::transcode_to_utf32( - std::ranges::begin(r), std::ranges::end(r), out); + if constexpr (std::is_pointer_v>) { + return text::transcode_to_utf32(r, null_sentinel, out); + } else { + return text::transcode_to_utf32( + std::ranges::begin(r), std::ranges::end(r), out); + } } }}} diff --git a/include/boost/parser/detail/text/transcode_iterator.hpp b/include/boost/parser/detail/text/transcode_iterator.hpp index e053a693..424341c2 100644 --- a/include/boost/parser/detail/text/transcode_iterator.hpp +++ b/include/boost/parser/detail/text/transcode_iterator.hpp @@ -7,8 +7,7 @@ #define BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ITERATOR_HPP #include - -#include +#include #include #include #include @@ -19,17 +18,18 @@ #include #include #include - +#include namespace boost::parser::detail { namespace text { namespace { - constexpr uint16_t high_surrogate_base = 0xd7c0; - constexpr uint16_t low_surrogate_base = 0xdc00; + constexpr char16_t high_surrogate_base = 0xd7c0; + constexpr char16_t low_surrogate_base = 0xdc00; constexpr char32_t high_surrogate_min = 0xd800; constexpr char32_t high_surrogate_max = 0xdbff; constexpr char32_t low_surrogate_min = 0xdc00; constexpr char32_t low_surrogate_max = 0xdfff; + constexpr char32_t replacement_character = 0xfffd; } namespace detail { @@ -82,18 +82,18 @@ namespace boost::parser::detail { namespace text { constexpr OutIter write_cp_utf16(char32_t cp, OutIter out) { if (cp < 0x10000) { - *out = static_cast(cp); + *out = static_cast(cp); ++out; } else { - *out = static_cast(cp >> 10) + high_surrogate_base; + *out = static_cast(cp >> 10) + high_surrogate_base; ++out; - *out = static_cast(cp & 0x3ff) + low_surrogate_base; + *out = static_cast(cp & 0x3ff) + low_surrogate_base; ++out; } return out; } - inline constexpr char32_t surrogates_to_cp(uint16_t hi, uint16_t lo) + inline constexpr char32_t surrogates_to_cp(char16_t hi, char16_t lo) { return char32_t((hi - high_surrogate_base) << 10) + (lo - low_surrogate_base); @@ -108,13 +108,34 @@ namespace boost::parser::detail { namespace text { using enable_utf16_cp = std::enable_if, U>; template using enable_utf16_cp_t = typename enable_utf16_cp::type; - } - /** The replacement character used to mark invalid portions of a Unicode - sequence when converting between two encodings. + template + auto bidirectional_at_most() + { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + if constexpr (std::bidirectional_iterator) { + return std::bidirectional_iterator_tag{}; + } else if constexpr (std::forward_iterator) { + return std::forward_iterator_tag{}; + } else if constexpr (std::input_iterator) { + return std::input_iterator_tag{}; + } +#else + using category = + typename std::iterator_traits::iterator_category; + if constexpr (std::is_base_of_v< + std::bidirectional_iterator_tag, + category>) { + return std::bidirectional_iterator_tag{}; + } else { + return category{}; + } +#endif + } - \see Unicode 3.2/C10 */ - constexpr char32_t replacement_character() { return 0xfffd; } + template + using bidirectional_at_most_t = decltype(bidirectional_at_most()); + } /** Returns true iff `c` is a Unicode surrogate. */ inline constexpr bool surrogate(char32_t c) @@ -164,20 +185,21 @@ namespace boost::parser::detail { namespace text { /** Returns true iff `c` is a UTF-8 lead code unit (which must be followed by 1-3 following units). */ - constexpr bool lead_code_unit(unsigned char c) + constexpr bool lead_code_unit(char8_type c) { - return uint8_t(c - 0xc2) <= 0x32; + return uint8_t((unsigned char)c - 0xc2) <= 0x32; } /** Returns true iff `c` is a UTF-8 continuation code unit. */ - constexpr bool continuation(unsigned char c) { return (int8_t)c < -0x40; } + constexpr bool continuation(char8_type c) { return (int8_t)c < -0x40; } /** Given the first (and possibly only) code unit of a UTF-8-encoded code point, returns the number of bytes occupied by that code point (in the range `[1, 4]`). Returns a value < 0 if `first_unit` is not a valid initial UTF-8 code unit. */ - inline constexpr int utf8_code_units(unsigned char first_unit) + inline constexpr int utf8_code_units(char8_type first_unit_) { + auto first_unit = (unsigned int)first_unit_; return first_unit <= 0x7f ? 1 : boost::parser::detail::text::lead_code_unit(first_unit) ? int(0xe0 <= first_unit) + int(0xf0 <= first_unit) + 2 @@ -188,7 +210,7 @@ namespace boost::parser::detail { namespace text { point, returns the number of code units occupied by that code point (in the range `[1, 2]`). Returns a negative value if `first_unit` is not a valid initial UTF-16 code unit. */ - inline constexpr int utf16_code_units(uint16_t first_unit) + inline constexpr int utf16_code_units(char16_t first_unit) { if (boost::parser::detail::text::low_surrogate(first_unit)) return -1; @@ -357,7 +379,7 @@ namespace boost::parser::detail { namespace text { Iter retval = it; int backup = 0; - while (backup < 4 && it != first && + while (backup < 4 && retval != first && boost::parser::detail::text::continuation(*--retval)) { ++backup; } @@ -538,10 +560,10 @@ namespace boost::parser::detail { namespace text { char_class const class_ = octet_classes[cu]; state = transitions[state + class_]; if (state == err) - return replacement_character(); + return replacement_character; ++first; } else { - return replacement_character(); + return replacement_character; } } @@ -562,25 +584,28 @@ namespace boost::parser::detail { namespace text { using reference = void; using iterator_category = std::output_iterator_tag; - trans_ins_iter() {} - trans_ins_iter(Iter it) : it_(it) {} - Derived & operator*() { return derived(); } - Derived & operator++() { return derived(); } - Derived operator++(int) { return derived(); } - Iter base() const { return it_; } + constexpr trans_ins_iter() {} + constexpr trans_ins_iter(Iter it) : it_(it) {} + constexpr Derived & operator*() { return derived(); } + constexpr Derived & operator++() { return derived(); } + constexpr Derived operator++(int) { return derived(); } + constexpr Iter base() const { return it_; } protected: - Iter & iter() { return it_; } + constexpr Iter & iter() { return it_; } private: - Derived & derived() { return static_cast(*this); } + constexpr Derived & derived() + { + return static_cast(*this); + } Iter it_; }; - template + template using trans_iter = stl_interfaces::iterator_interface< Derived, - std::bidirectional_iterator_tag, + bidirectional_at_most_t, ValueType, ValueType>; } @@ -591,51 +616,55 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME #if defined(BOOST_TEXT_DOXYGEN) - /** Returns the first code unit in `[first, last)` that is not properly - UTF-8 encoded, or `last` if no such code unit is found. */ - template - requires std::random_access_iterator - constexpr I find_invalid_encoding(I first, I last); - - /** Returns the first code unit in `[first, last)` that is not properly - UTF-16 encoded, or `last` if no such code unit is found. */ - template - requires std::random_access_iterator - constexpr I find_invalid_encoding(I first, I last); - - /** Returns true iff `[first, last)` is properly UTF-8 encoded. */ - template - requires std::random_access_iterator - constexpr bool encoded(I first, I last); - - /** Returns true iff `[first, last)` is properly UTF-16 encoded */ - template - requires std::random_access_iterator - constexpr bool encoded(I first, I last); - - /** Returns true iff `[first, last)` is empty or the initial UTF-8 code - units in `[first, last)` form a valid Unicode code point. */ - template - requires std::random_access_iterator - constexpr bool starts_encoded(I first, I last); - - /** Returns true iff `[first, last)` is empty or the initial UTF-16 code - units in `[first, last)` form a valid Unicode code point. */ - template - requires std::random_access_iterator - constexpr bool starts_encoded(I first, I last); - - /** Returns true iff `[first, last)` is empty or the final UTF-8 code - units in `[first, last)` form a valid Unicode code point. */ - template - requires std::random_access_iterator - constexpr bool ends_encoded(I first, I last); - - /** Returns true iff `[first, last)` is empty or the final UTF-16 code - units in `[first, last)` form a valid Unicode code point. */ - template - requires std::random_access_iterator - constexpr bool ends_encoded(I first, I last); + /** Returns the first code unit in `[r.begin(), r.end())` that is not + properly UTF-8 encoded, or `r.begin() + std::distance(r)` if no such + code unit is found. */ + template + requires std::ranges::forward_range + constexpr std::ranges::borrowed_iterator_t find_invalid_encoding(R && r); + + /** Returns the first code unit in `[r.begin(), r.end())` that is not + properly UTF-16 encoded, or `r.begin() + std::distance(r)` if no such + code unit is found. */ + template + requires std::ranges::forward_range + constexpr std::ranges::borrowed_iterator_t find_invalid_encoding(R && r); + + /** Returns true iff `r` is properly UTF-8 encoded. */ + template + requires std::ranges::forward_range + constexpr bool encoded(R && r); + + /** Returns true iff `r` is properly UTF-16 encoded */ + template + requires std::ranges::forward_range + constexpr bool encoded(R && r); + + /** Returns true iff `r` is empty or the initial UTF-8 code units in `r` + form a valid Unicode code point. */ + template + requires std::ranges::forward_range + constexpr bool starts_encoded(R && r); + + /** Returns true iff `r` is empty or the initial UTF-16 code units in `r` + form a valid Unicode code point. */ + template + requires std::ranges::forward_range + constexpr bool starts_encoded(R && r); + + /** Returns true iff `r` is empty or the final UTF-8 code units in `r` + form a valid Unicode code point. */ + template + requires std::ranges::bidirectional_range && + std::ranges::common_range + constexpr bool ends_encoded(R && r); + + /** Returns true iff `r` is empty or the final UTF-16 code units in `r` + form a valid Unicode code point. */ + template + requires std::ranges::bidirectional_range && + std::ranges::common_range + constexpr bool ends_encoded(R && r); #endif @@ -751,12 +780,13 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 { - template - // clang-format off - requires std::random_access_iterator - constexpr I find_invalid_encoding(I first, I last) - // clang-format on + template + requires std::ranges::forward_range + constexpr std::ranges::borrowed_iterator_t find_invalid_encoding(R && r) { + auto first = std::ranges::begin(r); + auto last = std::ranges::end(r); + while (first != last) { int const cp_bytes = boost::parser::detail::text::utf8_code_units(*first); if (cp_bytes == -1 || last - first < cp_bytes) @@ -768,15 +798,20 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME first += cp_bytes; } - return last; + if constexpr (std::ranges::borrowed_range) { + return last; + } else { + return std::ranges::dangling{}; + } } - template - // clang-format off - requires std::random_access_iterator - constexpr I find_invalid_encoding(I first, I last) - // clang-format on + template + requires std::ranges::forward_range + constexpr std::ranges::borrowed_iterator_t find_invalid_encoding(R && r) { + auto first = std::ranges::begin(r); + auto last = std::ranges::end(r); + while (first != last) { int const cp_units = boost::parser::detail::text::utf16_code_units(*first); if (cp_units == -1 || last - first < cp_units) @@ -788,33 +823,36 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME first += cp_units; } - return last; + if constexpr (std::ranges::borrowed_range) { + return last; + } else { + return std::ranges::dangling{}; + } } - template - // clang-format off - requires std::random_access_iterator - constexpr bool encoded(I first, I last) - // clang-format on + template + requires std::ranges::forward_range + constexpr bool encoded(R && r) { - return boost::parser::detail::text::find_invalid_encoding(first, last) == last; + return boost::parser::detail::text::v1::find_invalid_encoding(r.begin(), r.end()) == + r.end(); } - template - // clang-format off - requires std::random_access_iterator - constexpr bool encoded(I first, I last) - // clang-format on + template + requires std::ranges::forward_range + constexpr bool encoded(R && r) { - return boost::parser::detail::text::find_invalid_encoding(first, last) == last; + return boost::parser::detail::text::v1::find_invalid_encoding(r.begin(), r.end()) == + r.end(); } - template - // clang-format off - requires std::random_access_iterator - constexpr bool starts_encoded(I first, I last) - // clang-format on + template + requires std::ranges::forward_range + constexpr bool starts_encoded(R && r) { + auto first = std::ranges::begin(r); + auto last = std::ranges::end(r); + if (first == last) return true; @@ -825,12 +863,13 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME return !detail::end_of_invalid_utf8(first); } - template - // clang-format off - requires std::random_access_iterator - constexpr bool starts_encoded(I first, I last) - // clang-format on + template + requires std::ranges::forward_range + constexpr bool starts_encoded(R && r) { + auto first = std::ranges::begin(r); + auto last = std::ranges::end(r); + if (first == last) return true; @@ -841,12 +880,14 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME return cp_units == 1 || boost::parser::detail::text::low_surrogate(*(first + 1)); } - template - // clang-format off - requires std::random_access_iterator - constexpr bool ends_encoded(I first, I last) - // clang-format on + template + requires std::ranges::bidirectional_range && + std::ranges::common_range + constexpr bool ends_encoded(R && r) { + auto first = std::ranges::begin(r); + auto last = std::ranges::end(r); + if (first == last) return true; @@ -857,12 +898,14 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME return boost::parser::detail::text::starts_encoded(it, last); } - template - // clang-format off - requires std::random_access_iterator - constexpr bool ends_encoded(I first, I last) - // clang-format on + template + requires std::ranges::bidirectional_range && + std::ranges::common_range + constexpr bool ends_encoded(R && r) { + auto first = std::ranges::begin(r); + auto last = std::ranges::end(r); + if (first == last) return true; @@ -883,387 +926,64 @@ namespace boost::parser::detail { namespace text { provides the Unicode replacement character on errors. */ struct use_replacement_character { - constexpr char32_t operator()(char const *) const + constexpr char32_t operator()(std::string_view) const noexcept { - return replacement_character(); + return replacement_character; } }; - /** A sentinel type that compares equal to a pointer to a 1-, 2-, or 4-byte integral value, iff the pointer is null. */ struct null_sentinel_t { - constexpr null_sentinel_t base() const { return {}; } - }; - -#if defined(__cpp_inline_variables) - inline constexpr null_sentinel_t null_sentinel; -#else - namespace { - constexpr null_sentinel_t null_sentinel; - } -#endif - -#if defined(BOOST_TEXT_DOXYGEN) - - template - requires utf8_code_unit || utf16_code_unit || utf32_code_unit - constexpr auto operator==(T * p, null_sentinel_t); - - template - requires utf8_code_unit || utf16_code_unit || utf32_code_unit - constexpr auto operator!=(T * p, null_sentinel_t); - - template - requires utf8_code_unit || utf16_code_unit || utf32_code_unit - constexpr auto operator==(null_sentinel_t, T * p); - - template - requires utf8_code_unit || utf16_code_unit || utf32_code_unit - constexpr auto operator!=(null_sentinel_t, T * p); - -#else - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - - template - // clang-format off - requires utf8_code_unit || utf16_code_unit || utf32_code_unit - constexpr auto operator==(T * p, null_sentinel_t) - // clang-format on - { - return *p == 0; - } -#if 1 // TODO: This should not be necessary, one better support for op== - // rewriting is widely supported. - template - // clang-format off - requires utf8_code_unit || utf16_code_unit || utf32_code_unit - constexpr auto operator!=(T * p, null_sentinel_t) - // clang-format on - { - return *p != 0; - } - template - // clang-format off - requires utf8_code_unit || utf16_code_unit || utf32_code_unit - constexpr auto operator==(null_sentinel_t, T * p) - // clang-format on - { - return *p == 0; - } - template - // clang-format off - requires utf8_code_unit || utf16_code_unit || utf32_code_unit - constexpr auto operator!=(null_sentinel_t, T * p) - // clang-format on - { - return *p != 0; - } -#endif - -#else - - namespace detail { - template> - struct null_sent_eq_dispatch - {}; - - template - struct null_sent_eq_dispatch - { - static constexpr bool call(Ptr p) { return *p == 0; } - }; - } - - template - constexpr auto operator==(T * p, null_sentinel_t) - -> decltype(detail::null_sent_eq_dispatch::call(p)) - { - return detail::null_sent_eq_dispatch::call(p); - } - template - constexpr auto operator!=(T * p, null_sentinel_t) - -> decltype(detail::null_sent_eq_dispatch::call(p)) - { - return !detail::null_sent_eq_dispatch::call(p); - } - template - constexpr auto operator==(null_sentinel_t, T * p) - -> decltype(detail::null_sent_eq_dispatch::call(p)) - { - return detail::null_sent_eq_dispatch::call(p); - } - template - constexpr auto operator!=(null_sentinel_t, T * p) - -> decltype(detail::null_sent_eq_dispatch::call(p)) - { - return !detail::null_sent_eq_dispatch::call(p); - } - -#endif - -#endif - - - /** A UTF-8 to UTF-16 converting iterator. */ -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf8_iter I, - std::sentinel_for S = I, - transcoding_error_handler ErrorHandler = use_replacement_character> -#else - template< - typename I, - typename Sentinel = I, - typename ErrorHandler = use_replacement_character> -#endif - struct utf_8_to_16_iterator; - - - /** A UTF-32 to UTF-8 converting iterator. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf32_iter I, - std::sentinel_for S = I, - transcoding_error_handler ErrorHandler = use_replacement_character> + template + requires std::default_initializable> && + std::equality_comparable_with, std::iter_value_t> #else - template< - typename I, - typename S = I, - typename ErrorHandler = use_replacement_character> -#endif - struct utf_32_to_8_iterator - : detail::trans_iter, char> - { -#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - static_assert( - std::is_same< - typename std::iterator_traits::iterator_category, - std::bidirectional_iterator_tag>::value || - std::is_same< - typename std::iterator_traits::iterator_category, - std::random_access_iterator_tag>::value, - "utf_32_to_8_iterator requires its I parameter to be at least " - "bidirectional."); - static_assert( - sizeof(typename std::iterator_traits::value_type) == 4, - "utf_32_to_8_iterator requires its I parameter to produce a " - "4-byte value_type."); + template #endif - - constexpr utf_32_to_8_iterator() : - first_(), it_(), last_(), index_(4), buf_() - {} - explicit constexpr utf_32_to_8_iterator(I first, I it, S last) : - first_(first), it_(it), last_(last), index_(0), buf_() - { - if (it_ != last_) - read_into_buf(); - } -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template - // clang-format off - requires std::convertible_to && std::convertible_to -#else - template< - typename I2, - typename S2, - typename Enable = std::enable_if_t< - std::is_convertible::value && - std::is_convertible::value>> -#endif - constexpr utf_32_to_8_iterator( - utf_32_to_8_iterator const & other) : - // clang-format on - first_(other.first_), - it_(other.it_), - last_(other.last_), - index_(other.index_), - buf_(other.buf_) - {} - - constexpr I begin() const { return first_; } - constexpr S end() const { return last_; } - - constexpr char operator*() const - { - return buf_[index_]; - } - - constexpr I base() const { return it_; } - - constexpr utf_32_to_8_iterator & operator++() - { - ++index_; - if (at_buf_end()) { - BOOST_PARSER_DEBUG_ASSERT(it_ != last_); - ++it_; - index_ = 0; - if (it_ != last_) - read_into_buf(); - } - return *this; - } - - constexpr utf_32_to_8_iterator & operator--() - { - if (0 < index_) { - --index_; - } else { - BOOST_PARSER_DEBUG_ASSERT(it_ != first_); - --it_; - auto out = read_into_buf(); - index_ = out - buf_.data() - 1; - } - return *this; - } - - template< - typename I1, - typename S1, - typename I2, - typename S2, - typename ErrorHandler2> - friend constexpr auto operator==( - utf_32_to_8_iterator const & lhs, - utf_32_to_8_iterator const & rhs) - -> decltype(lhs.base() == rhs.base()); - - friend bool - operator==(utf_32_to_8_iterator lhs, utf_32_to_8_iterator rhs) + friend constexpr auto operator==(I it, null_sentinel_t) { - return lhs.base() == rhs.base() && lhs.index_ == rhs.index_; + return *it == detail::iter_value_t{}; } - - using base_type = - detail::trans_iter, char>; - using base_type::operator++; - using base_type::operator--; - -#ifndef BOOST_TEXT_DOXYGEN - private: - constexpr bool buf_empty() const { return index_ == 4; } - - constexpr bool at_buf_end() const { return buf_[index_] == '\0'; } - - constexpr char * read_into_buf() +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + friend constexpr auto operator!=(I it, null_sentinel_t) { - char32_t cp = static_cast(*it_); - index_ = 0; - char * retval = detail::read_into_buf(cp, buf_.data()); - *retval = 0; - return retval; + return *it != detail::iter_value_t{}; } - - I first_; - I it_; - S last_; - int index_; - std::array buf_; - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf32_iter I2, - std::sentinel_for S2, - transcoding_error_handler ErrorHandler2> -#else - template -#endif - friend struct utf_32_to_8_iterator; - #endif }; - template - constexpr auto operator==( - utf_32_to_8_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() == rhs) - { - return lhs.base() == rhs; - } - - template - constexpr auto operator==( - Sentinel lhs, - utf_32_to_8_iterator const & rhs) - -> decltype(rhs.base() == lhs) - { - return rhs.base() == lhs; - } - - template - constexpr auto operator!=( - utf_32_to_8_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() != rhs) - { - return lhs.base() != rhs; - } - - template - constexpr auto operator!=( - Sentinel lhs, - utf_32_to_8_iterator const & rhs) - -> decltype(rhs.base() != lhs) - { - return rhs.base() != lhs; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator==( - utf_32_to_8_iterator const & lhs, - utf_32_to_8_iterator const & - rhs) -> decltype(lhs.base() == rhs.base()) - { - return lhs.base() == rhs.base() && rhs.index_ == lhs.index_; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator!=( - utf_32_to_8_iterator const & lhs, - utf_32_to_8_iterator const & - rhs) -> decltype(!(lhs == rhs)) - { - return !(lhs == rhs); +#if defined(__cpp_inline_variables) + inline constexpr null_sentinel_t null_sentinel; +#else + namespace { + constexpr null_sentinel_t null_sentinel; } - +#endif /** An out iterator that converts UTF-32 to UTF-8. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template Iter> + template Iter> #else template #endif struct utf_32_to_8_out_iterator : detail::trans_ins_iter, Iter> { - utf_32_to_8_out_iterator() {} - explicit utf_32_to_8_out_iterator(Iter it) : + constexpr utf_32_to_8_out_iterator() {} + explicit constexpr utf_32_to_8_out_iterator(Iter it) : detail::trans_ins_iter, Iter>(it) {} - utf_32_to_8_out_iterator & operator=(char32_t cp) + constexpr utf_32_to_8_out_iterator & operator=(char32_t cp) { auto & out = this->iter(); out = detail::write_cp_utf8(cp, out); return *this; } - - Iter base() const - { - return const_cast(this)->iter(); - } }; /** An insert-iterator analogous to std::insert_iterator, that also @@ -1273,15 +993,15 @@ namespace boost::parser::detail { namespace text { utf_32_to_8_insert_iterator, std::insert_iterator> { - utf_32_to_8_insert_iterator() {} - utf_32_to_8_insert_iterator( + constexpr utf_32_to_8_insert_iterator() {} + constexpr utf_32_to_8_insert_iterator( Cont & c, typename Cont::iterator it) : detail::trans_ins_iter< utf_32_to_8_insert_iterator, std::insert_iterator>(std::insert_iterator(c, it)) {} - utf_32_to_8_insert_iterator & operator=(char32_t cp) + constexpr utf_32_to_8_insert_iterator & operator=(char32_t cp) { auto & out = this->iter(); out = detail::write_cp_utf8(cp, out); @@ -1297,15 +1017,15 @@ namespace boost::parser::detail { namespace text { utf_32_to_8_front_insert_iterator, std::front_insert_iterator> { - utf_32_to_8_front_insert_iterator() {} - explicit utf_32_to_8_front_insert_iterator(Cont & c) : + constexpr utf_32_to_8_front_insert_iterator() {} + explicit constexpr utf_32_to_8_front_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_32_to_8_front_insert_iterator, std::front_insert_iterator>( std::front_insert_iterator(c)) {} - utf_32_to_8_front_insert_iterator & operator=(char32_t cp) + constexpr utf_32_to_8_front_insert_iterator & operator=(char32_t cp) { auto & out = this->iter(); out = detail::write_cp_utf8(cp, out); @@ -1321,15 +1041,15 @@ namespace boost::parser::detail { namespace text { utf_32_to_8_back_insert_iterator, std::back_insert_iterator> { - utf_32_to_8_back_insert_iterator() {} - explicit utf_32_to_8_back_insert_iterator(Cont & c) : + constexpr utf_32_to_8_back_insert_iterator() {} + explicit constexpr utf_32_to_8_back_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_32_to_8_back_insert_iterator, std::back_insert_iterator>( std::back_insert_iterator(c)) {} - utf_32_to_8_back_insert_iterator & operator=(char32_t cp) + constexpr utf_32_to_8_back_insert_iterator & operator=(char32_t cp) { auto & out = this->iter(); out = detail::write_cp_utf8(cp, out); @@ -1338,472 +1058,63 @@ namespace boost::parser::detail { namespace text { }; - /** A UTF-8 to UTF-32 converting iterator. */ + namespace detail { + template + OutIter assign_8_to_32_insert( + unsigned char cu, char32_t & cp, int & state, OutIter out) + { + auto write = [&] { + *out = cp; + ++out; + state = invalid_table_state; + }; + auto start_cp = [&] { + first_cu const info = first_cus[cu]; + state = info.next; + cp = info.initial_octet; + if (state == bgn) + write(); + }; + if (state == invalid_table_state) { + start_cp(); + } else { + cp = (cp << 6) | (cu & 0x3f); + char_class const class_ = octet_classes[cu]; + state = transitions[state + class_]; + if (state == bgn) { + write(); + } else if (state == err) { + *out = replacement_character; + ++out; + start_cp(); + } + } + return out; + } + } + + /** An out iterator that converts UTF-8 to UTF-32. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf8_iter I, - std::sentinel_for S = I, - transcoding_error_handler ErrorHandler = use_replacement_character> + template Iter> #else - template< - typename I, - typename S = I, - typename ErrorHandler = use_replacement_character> -#endif - struct utf_8_to_32_iterator - : detail::trans_iter, char32_t> - { - constexpr utf_8_to_32_iterator() : first_(), it_(), last_() {} - explicit constexpr utf_8_to_32_iterator(I first, I it, S last) : - first_(first), it_(it), last_(last) - {} -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template - // clang-format off - requires std::convertible_to && std::convertible_to -#else - template< - typename I2, - typename S2, - typename Enable = std::enable_if_t< - std::is_convertible::value && - std::is_convertible::value>> -#endif - constexpr utf_8_to_32_iterator( - utf_8_to_32_iterator const & other) : - // clang-format on - first_(other.first_), - it_(other.it_), - last_(other.last_) - {} - - constexpr I begin() const { return first_; } - constexpr S end() const { return last_; } - - constexpr char32_t operator*() const - { - BOOST_PARSER_DEBUG_ASSERT(!at_end(it_)); - unsigned char curr_c = *it_; - if (curr_c < 0x80) - return curr_c; - return get_value().value_; - } - - constexpr I base() const { return it_; } - - constexpr utf_8_to_32_iterator & operator++() - { - BOOST_PARSER_DEBUG_ASSERT(it_ != last_); - it_ = increment(); - return *this; - } - - constexpr utf_8_to_32_iterator & operator--() - { - BOOST_PARSER_DEBUG_ASSERT(it_ != first_); - it_ = detail::decrement(first_, it_); - return *this; - } - - friend bool - operator==(utf_8_to_32_iterator lhs, utf_8_to_32_iterator rhs) - { - return lhs.base() == rhs.base(); - } - - using base_type = detail:: - trans_iter, char32_t>; - using base_type::operator++; - using base_type::operator--; - -#ifndef BOOST_TEXT_DOXYGEN - private: - struct get_value_result - { - char32_t value_; - I it_; - }; - - constexpr bool check_continuation( - unsigned char c, - unsigned char lo = 0x80, - unsigned char hi = 0xbf) const - { - if (detail::in(lo, c, hi)) { - return true; - } else { - ErrorHandler{}( - "Invalid UTF-8 sequence; an expected continuation " - "code unit is missing."); - return false; - } - } - - constexpr bool at_end(I it) const - { - if (it == last_) { - ErrorHandler{}( - "Invalid UTF-8 sequence; expected another code unit " - "before the end of string."); - return true; - } else { - return false; - } - } - - constexpr get_value_result get_value() const - { - // It turns out that this naive implementation is faster than the - // table implementation for the converting iterators. -#if 1 - /* - Unicode 3.9/D92 - Table 3-7. Well-Formed UTF-8 Byte Sequences - - Code Points First Byte Second Byte Third Byte Fourth Byte - =========== ========== =========== ========== =========== - U+0000..U+007F 00..7F - U+0080..U+07FF C2..DF 80..BF - U+0800..U+0FFF E0 A0..BF 80..BF - U+1000..U+CFFF E1..EC 80..BF 80..BF - U+D000..U+D7FF ED 80..9F 80..BF - U+E000..U+FFFF EE..EF 80..BF 80..BF - U+10000..U+3FFFF F0 90..BF 80..BF 80..BF - U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF - U+100000..U+10FFFF F4 80..8F 80..BF 80..BF - */ - - char32_t value = 0; - I next = it_; - unsigned char curr_c = *next; - - // One-byte case handled by caller - - // Two-byte - if (detail::in(0xc2, curr_c, 0xdf)) { - value = curr_c & 0b00011111; - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - // Three-byte - } else if (curr_c == 0xe0) { - value = curr_c & 0b00001111; - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c, 0xa0, 0xbf)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - } else if (detail::in(0xe1, curr_c, 0xec)) { - value = curr_c & 0b00001111; - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - } else if (curr_c == 0xed) { - value = curr_c & 0b00001111; - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c, 0x80, 0x9f)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - } else if (detail::in(0xed, curr_c, 0xef)) { - value = curr_c & 0b00001111; - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - // Four-byte - } else if (curr_c == 0xf0) { - value = curr_c & 0b00000111; - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c, 0x90, 0xbf)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - } else if (detail::in(0xf1, curr_c, 0xf3)) { - value = curr_c & 0b00000111; - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - } else if (curr_c == 0xf4) { - value = curr_c & 0b00000111; - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c, 0x80, 0x8f)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - if (at_end(next)) - return get_value_result{replacement_character(), next}; - curr_c = *next; - if (!check_continuation(curr_c)) - return get_value_result{replacement_character(), next}; - value = (value << 6) + (curr_c & 0b00111111); - ++next; - } else { - value = ErrorHandler{}("Invalid initial UTF-8 code unit."); - ++next; - } - return get_value_result{value, next}; -#else - I next = it_; - char32_t const value = detail::advance(next, last_); - return get_value_result{value, next}; -#endif - } - - constexpr I increment() const - { - unsigned char curr_c = *it_; - if (curr_c < 0x80) - return std::next(it_); - return get_value().it_; - } - - I first_; - I it_; - S last_; - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf8_iter I2, - std::sentinel_for S2, - transcoding_error_handler ErrorHandler2> -#else - template -#endif - friend struct utf_8_to_16_iterator; - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf8_iter I2, - std::sentinel_for S2, - transcoding_error_handler ErrorHandler2> -#else - template -#endif - friend struct utf_8_to_32_iterator; - -#endif - }; - - template - constexpr auto operator==( - utf_8_to_32_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() == rhs) - { - return lhs.base() == rhs; - } - - template - constexpr auto operator==( - Sentinel lhs, - utf_8_to_32_iterator const & rhs) - -> decltype(rhs.base() == lhs) - { - return rhs.base() == lhs; - } - - template - constexpr auto operator!=( - utf_8_to_32_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() != rhs) - { - return lhs.base() != rhs; - } - - template - constexpr auto operator!=( - Sentinel lhs, - utf_8_to_32_iterator const & rhs) - -> decltype(rhs.base() != lhs) - { - return rhs.base() != lhs; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator==( - utf_8_to_32_iterator const & lhs, - utf_8_to_32_iterator const & - rhs) -> decltype(lhs.base() == rhs.base()) - { - return lhs.base() == rhs.base(); - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator!=( - utf_8_to_32_iterator const & lhs, - utf_8_to_32_iterator const & - rhs) -> decltype(!(lhs == rhs)) - { - return !(lhs == rhs); - } - - - namespace detail { - template - OutIter assign_8_to_32_insert( - unsigned char cu, char32_t & cp, int & state, OutIter out) - { - auto write = [&] { - *out = cp; - ++out; - state = invalid_table_state; - }; - auto start_cp = [&] { - first_cu const info = first_cus[cu]; - state = info.next; - cp = info.initial_octet; - if (state == bgn) - write(); - }; - if (state == invalid_table_state) { - start_cp(); - } else { - cp = (cp << 6) | (cu & 0x3f); - char_class const class_ = octet_classes[cu]; - state = transitions[state + class_]; - if (state == bgn) { - write(); - } else if (state == err) { - *out = replacement_character(); - ++out; - start_cp(); - } - } - return out; - } - } - - /** An out iterator that converts UTF-8 to UTF-32. */ -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template Iter> -#else - template + template #endif struct utf_8_to_32_out_iterator : detail::trans_ins_iter, Iter> { - utf_8_to_32_out_iterator() {} - explicit utf_8_to_32_out_iterator(Iter it) : + constexpr utf_8_to_32_out_iterator() {} + explicit constexpr utf_8_to_32_out_iterator(Iter it) : detail::trans_ins_iter, Iter>(it), state_(detail::invalid_table_state) {} - utf_8_to_32_out_iterator & operator=(uint16_t cu) + constexpr utf_8_to_32_out_iterator & operator=(char8_type cu) { auto & out = this->iter(); out = detail::assign_8_to_32_insert(cu, cp_, state_, out); return *this; } - Iter base() const - { - return const_cast(this)->iter(); - } - #ifndef BOOST_TEXT_DOXYGEN private: int state_; @@ -1818,8 +1129,8 @@ namespace boost::parser::detail { namespace text { utf_8_to_32_insert_iterator, std::insert_iterator> { - utf_8_to_32_insert_iterator() {} - utf_8_to_32_insert_iterator( + constexpr utf_8_to_32_insert_iterator() {} + constexpr utf_8_to_32_insert_iterator( Cont & c, typename Cont::iterator it) : detail::trans_ins_iter< utf_8_to_32_insert_iterator, @@ -1827,7 +1138,7 @@ namespace boost::parser::detail { namespace text { state_(detail::invalid_table_state) {} - utf_8_to_32_insert_iterator & operator=(uint16_t cu) + constexpr utf_8_to_32_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_8_to_32_insert(cu, cp_, state_, out); @@ -1849,8 +1160,8 @@ namespace boost::parser::detail { namespace text { utf_8_to_32_front_insert_iterator, std::front_insert_iterator> { - utf_8_to_32_front_insert_iterator() {} - explicit utf_8_to_32_front_insert_iterator(Cont & c) : + constexpr utf_8_to_32_front_insert_iterator() {} + explicit constexpr utf_8_to_32_front_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_8_to_32_front_insert_iterator, std::front_insert_iterator>( @@ -1858,7 +1169,7 @@ namespace boost::parser::detail { namespace text { state_(detail::invalid_table_state) {} - utf_8_to_32_front_insert_iterator & operator=(uint16_t cu) + constexpr utf_8_to_32_front_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_8_to_32_insert(cu, cp_, state_, out); @@ -1880,8 +1191,8 @@ namespace boost::parser::detail { namespace text { utf_8_to_32_back_insert_iterator, std::back_insert_iterator> { - utf_8_to_32_back_insert_iterator() {} - explicit utf_8_to_32_back_insert_iterator(Cont & c) : + constexpr utf_8_to_32_back_insert_iterator() {} + explicit constexpr utf_8_to_32_back_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_8_to_32_back_insert_iterator, std::back_insert_iterator>( @@ -1889,7 +1200,7 @@ namespace boost::parser::detail { namespace text { state_(detail::invalid_table_state) {} - utf_8_to_32_back_insert_iterator & operator=(uint16_t cu) + constexpr utf_8_to_32_back_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_8_to_32_insert(cu, cp_, state_, out); @@ -1904,246 +1215,26 @@ namespace boost::parser::detail { namespace text { }; - /** A UTF-32 to UTF-16 converting iterator. */ -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf32_iter I, - std::sentinel_for S = I, - transcoding_error_handler ErrorHandler = use_replacement_character> -#else - template< - typename I, - typename S = I, - typename ErrorHandler = use_replacement_character> -#endif - struct utf_32_to_16_iterator - : detail:: - trans_iter, uint16_t> - { -#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - static_assert( - std::is_same< - typename std::iterator_traits::iterator_category, - std::bidirectional_iterator_tag>::value || - std::is_same< - typename std::iterator_traits::iterator_category, - std::random_access_iterator_tag>::value, - "utf_32_to_16_iterator requires its I parameter to be at " - "least " - "bidirectional."); - static_assert( - sizeof(typename std::iterator_traits::value_type) == 4, - "utf_32_to_16_iterator requires its I parameter to produce a " - "4-byte value_type."); -#endif - - constexpr utf_32_to_16_iterator() : - first_(), it_(), last_(), index_(2), buf_() - {} - explicit constexpr utf_32_to_16_iterator(I first, I it, S last) : - first_(first), it_(it), last_(last), index_(0), buf_() - { - if (it_ != last_) - read_into_buf(); - } -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template - // clang-format off - requires std::convertible_to && std::convertible_to -#else - template< - typename I2, - typename S2, - typename Enable = std::enable_if_t< - std::is_convertible::value && - std::is_convertible::value>> -#endif - constexpr utf_32_to_16_iterator( - utf_32_to_16_iterator const & other) - // clang-format on - : - first_(other.first_), - it_(other.it_), - last_(other.last_), - index_(other.index_), - buf_(other.buf_) - {} - - constexpr I begin() const { return first_; } - constexpr S end() const { return last_; } - - constexpr uint16_t operator*() const - { - return buf_[index_]; - } - - constexpr I base() const { return it_; } - - constexpr utf_32_to_16_iterator & operator++() - { - ++index_; - if (at_buf_end()) { - BOOST_PARSER_DEBUG_ASSERT(it_ != last_); - ++it_; - index_ = 0; - if (it_ != last_) - read_into_buf(); - } - return *this; - } - - constexpr utf_32_to_16_iterator & operator--() - { - if (0 < index_) { - --index_; - } else { - BOOST_PARSER_DEBUG_ASSERT(it_ != first_); - --it_; - auto out = read_into_buf(); - index_ = out - buf_.data() - 1; - } - return *this; - } - - template< - typename I1, - typename S1, - typename I2, - typename S2, - typename ErrorHandler2> - friend constexpr auto operator==( - utf_32_to_16_iterator const & lhs, - utf_32_to_16_iterator const & rhs) - -> decltype(lhs.base() == rhs.base()); - - friend bool operator==( - utf_32_to_16_iterator lhs, utf_32_to_16_iterator rhs) - { - return lhs.base() == rhs.base() && lhs.index_ == rhs.index_; - } - - using base_type = detail:: - trans_iter, uint16_t>; - using base_type::operator++; - using base_type::operator--; - -#ifndef BOOST_TEXT_DOXYGEN - private: - constexpr bool at_buf_end() const { return buf_[index_] == 0; } - - constexpr uint16_t * read_into_buf() - { - auto const last = detail::write_cp_utf16(*it_, buf_.data()); - *last = 0; - return last; - } - - I first_; - I it_; - S last_; - int index_; - std::array buf_; - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf32_iter I2, - std::sentinel_for S2, - transcoding_error_handler ErrorHandler2> -#else - template -#endif - friend struct utf_32_to_16_iterator; -#endif - }; - - template - constexpr auto operator==( - utf_32_to_16_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() == rhs) - { - return lhs.base() == rhs; - } - - template - constexpr auto operator==( - Sentinel lhs, - utf_32_to_16_iterator const & - rhs) -> decltype(rhs.base() == lhs) - { - return rhs.base() == lhs; - } - - template - constexpr auto operator!=( - utf_32_to_16_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() != rhs) - { - return lhs.base() != rhs; - } - - template - constexpr auto operator!=( - Sentinel lhs, - utf_32_to_16_iterator const & - rhs) -> decltype(rhs.base() != lhs) - { - return rhs.base() != lhs; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator==( - utf_32_to_16_iterator const & lhs, - utf_32_to_16_iterator const & - rhs) -> decltype(lhs.base() == rhs.base()) - { - return lhs.base() == rhs.base() && rhs.index_ == lhs.index_; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator!=( - utf_32_to_16_iterator const & lhs, - utf_32_to_16_iterator const & - rhs) -> decltype(!(lhs == rhs)) - { - return !(lhs == rhs); - } - - /** An out iterator that converts UTF-8 to UTF-16. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template Iter> + template Iter> #else template #endif struct utf_32_to_16_out_iterator : detail::trans_ins_iter, Iter> { - utf_32_to_16_out_iterator() {} - explicit utf_32_to_16_out_iterator(Iter it) : + constexpr utf_32_to_16_out_iterator() {} + explicit constexpr utf_32_to_16_out_iterator(Iter it) : detail::trans_ins_iter, Iter>(it) {} - utf_32_to_16_out_iterator & operator=(char32_t cp) + constexpr utf_32_to_16_out_iterator & operator=(char32_t cp) { auto & out = this->iter(); out = detail::write_cp_utf16(cp, out); return *this; } - - Iter base() const - { - return const_cast(this)->iter(); - } }; /** An insert-iterator analogous to std::insert_iterator, that also @@ -2154,15 +1245,15 @@ namespace boost::parser::detail { namespace text { utf_32_to_16_insert_iterator, std::insert_iterator> { - utf_32_to_16_insert_iterator() {} - utf_32_to_16_insert_iterator( + constexpr utf_32_to_16_insert_iterator() {} + constexpr utf_32_to_16_insert_iterator( Cont & c, typename Cont::iterator it) : detail::trans_ins_iter< utf_32_to_16_insert_iterator, std::insert_iterator>(std::insert_iterator(c, it)) {} - utf_32_to_16_insert_iterator & operator=(char32_t cp) + constexpr utf_32_to_16_insert_iterator & operator=(char32_t cp) { auto & out = this->iter(); out = detail::write_cp_utf16(cp, out); @@ -2178,15 +1269,15 @@ namespace boost::parser::detail { namespace text { utf_32_to_16_front_insert_iterator, std::front_insert_iterator> { - utf_32_to_16_front_insert_iterator() {} - explicit utf_32_to_16_front_insert_iterator(Cont & c) : + constexpr utf_32_to_16_front_insert_iterator() {} + explicit constexpr utf_32_to_16_front_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_32_to_16_front_insert_iterator, std::front_insert_iterator>( std::front_insert_iterator(c)) {} - utf_32_to_16_front_insert_iterator & operator=(char32_t cp) + constexpr utf_32_to_16_front_insert_iterator & operator=(char32_t cp) { auto & out = this->iter(); out = detail::write_cp_utf16(cp, out); @@ -2202,15 +1293,15 @@ namespace boost::parser::detail { namespace text { utf_32_to_16_back_insert_iterator, std::back_insert_iterator> { - utf_32_to_16_back_insert_iterator() {} - explicit utf_32_to_16_back_insert_iterator(Cont & c) : + constexpr utf_32_to_16_back_insert_iterator() {} + explicit constexpr utf_32_to_16_back_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_32_to_16_back_insert_iterator, std::back_insert_iterator>( std::back_insert_iterator(c)) {} - utf_32_to_16_back_insert_iterator & operator=(char32_t cp) + constexpr utf_32_to_16_back_insert_iterator & operator=(char32_t cp) { auto & out = this->iter(); out = detail::write_cp_utf16(cp, out); @@ -2219,256 +1310,14 @@ namespace boost::parser::detail { namespace text { }; - /** A UTF-16 to UTF-32 converting iterator. */ -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf16_iter I, - std::sentinel_for S = I, - transcoding_error_handler ErrorHandler = use_replacement_character> -#else - template< - typename I, - typename S = I, - typename ErrorHandler = use_replacement_character> -#endif - struct utf_16_to_32_iterator - : detail:: - trans_iter, char32_t> - { -#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - static_assert( - std::is_same< - typename std::iterator_traits::iterator_category, - std::bidirectional_iterator_tag>::value || - std::is_same< - typename std::iterator_traits::iterator_category, - std::random_access_iterator_tag>::value, - "utf_16_to_32_iterator requires its I parameter to be at " - "least " - "bidirectional."); - static_assert( - sizeof(typename std::iterator_traits::value_type) == 2, - "utf_16_to_32_iterator requires its I parameter to produce a " - "2-byte value_type."); -#endif - - constexpr utf_16_to_32_iterator() : first_(), it_(), last_() {} - explicit constexpr utf_16_to_32_iterator(I first, I it, S last) : - first_(first), it_(it), last_(last) - {} -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template - // clang-format off - requires std::convertible_to && std::convertible_to -#else - template< - typename I2, - typename S2, - typename Enable = std::enable_if_t< - std::is_convertible::value && - std::is_convertible::value>> -#endif - constexpr utf_16_to_32_iterator( - utf_16_to_32_iterator const & other) - : - // clang-format off - first_(other.first_), it_(other.it_), last_(other.last_) - {} - - constexpr I begin() const { return first_; } - constexpr S end() const { return last_; } - - constexpr char32_t operator*() const - { - BOOST_PARSER_DEBUG_ASSERT(!at_end(it_)); - return get_value(*it_).value_; - } - - constexpr I base() const { return it_; } - - constexpr utf_16_to_32_iterator & operator++() - { - BOOST_PARSER_DEBUG_ASSERT(it_ != last_); - it_ = increment(); - return *this; - } - - constexpr utf_16_to_32_iterator & operator--() - { - BOOST_PARSER_DEBUG_ASSERT(it_ != first_); - if (boost::parser::detail::text::low_surrogate(*--it_)) { - if (it_ != first_ && - boost::parser::detail::text::high_surrogate(*std::prev(it_))) - --it_; - } - return *this; - } - - friend bool operator==( - utf_16_to_32_iterator lhs, utf_16_to_32_iterator rhs) - { - return lhs.base() == rhs.base(); - } - - using base_type = detail:: - trans_iter, char32_t>; - using base_type::operator++; - using base_type::operator--; - -#ifndef BOOST_TEXT_DOXYGEN - private: - struct get_value_result - { - char32_t value_; - I it_; - }; - - constexpr bool at_end(I it) const - { - if (it == last_) { - ErrorHandler{}( - "Invalid UTF-16 sequence; expected another code unit " - "before the end of string."); - return true; - } else { - return false; - } - } - - constexpr get_value_result get_value(uint16_t curr) const - { - char32_t value = 0; - I next = std::next(it_); - - if (high_surrogate(curr)) { - value = (curr - high_surrogate_base) << 10; - if (at_end(next)) { - return get_value_result{replacement_character(), next}; - } - curr = *next++; - if (!low_surrogate(curr)) { - return get_value_result{replacement_character(), next}; - } - value += curr - low_surrogate_base; - } else if (low_surrogate(curr)) { - value = ErrorHandler{}("Invalid initial UTF-16 code unit."); - return get_value_result{replacement_character(), next}; - } else { - value = curr; - } - - if (!unreserved_scalar_value(value)) { - value = ErrorHandler{}( - "UTF-16 sequence results in a non-scalar value, or a " - "reserved scalar value UTF-32 code point."); - } - - return get_value_result{value, next}; - } - - constexpr I increment() const - { - return get_value(*it_).it_; - } - - I first_; - I it_; - S last_; - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf32_iter I2, - std::sentinel_for S2, - transcoding_error_handler ErrorHandler2> -#else - template -#endif - friend struct utf_32_to_16_iterator; - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf16_iter I2, - std::sentinel_for S2, - transcoding_error_handler ErrorHandler2> -#else - template -#endif - friend struct utf_16_to_32_iterator; - -#endif - }; - - template - constexpr auto operator==( - utf_16_to_32_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() == rhs) - { - return lhs.base() == rhs; - } - - template - constexpr auto operator==( - Sentinel lhs, - utf_16_to_32_iterator const & - rhs) -> decltype(rhs.base() == lhs) - { - return rhs.base() == lhs; - } - - template - constexpr auto operator!=( - utf_16_to_32_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() != rhs) - { - return lhs.base() != rhs; - } - - template - constexpr auto operator!=( - Sentinel lhs, - utf_16_to_32_iterator const & - rhs) -> decltype(rhs.base() != lhs) - { - return rhs.base() != lhs; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator==( - utf_16_to_32_iterator const & lhs, - utf_16_to_32_iterator const & - rhs) -> decltype(lhs.base() == rhs.base()) - { - return lhs.base() == rhs.base(); - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator!=( - utf_16_to_32_iterator const & lhs, - utf_16_to_32_iterator const & - rhs) -> decltype(!(lhs == rhs)) - { - return !(lhs == rhs); - } - - namespace detail { template OutIter - assign_16_to_32_insert(uint16_t & prev_cu, uint16_t cu, OutIter out) + assign_16_to_32_insert(char16_t & prev_cu, char16_t cu, OutIter out) { if (high_surrogate(cu)) { if (prev_cu) { - *out = replacement_character(); + *out = replacement_character; ++out; } prev_cu = cu; @@ -2477,13 +1326,13 @@ namespace boost::parser::detail { namespace text { *out = detail::surrogates_to_cp(prev_cu, cu); ++out; } else { - *out = replacement_character(); + *out = replacement_character; ++out; } prev_cu = 0; } else { if (prev_cu) { - *out = replacement_character(); + *out = replacement_character; ++out; } *out = cu; @@ -2503,27 +1352,22 @@ namespace boost::parser::detail { namespace text { struct utf_16_to_32_out_iterator : detail::trans_ins_iter, Iter> { - utf_16_to_32_out_iterator() {} - explicit utf_16_to_32_out_iterator(Iter it) : + constexpr utf_16_to_32_out_iterator() {} + explicit constexpr utf_16_to_32_out_iterator(Iter it) : detail::trans_ins_iter, Iter>(it), prev_cu_(0) {} - utf_16_to_32_out_iterator & operator=(uint16_t cu) + constexpr utf_16_to_32_out_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_16_to_32_insert(prev_cu_, cu, out); return *this; } - Iter base() const - { - return const_cast(this)->iter(); - } - #ifndef BOOST_TEXT_DOXYGEN private: - uint16_t prev_cu_; + char16_t prev_cu_; #endif }; @@ -2535,8 +1379,8 @@ namespace boost::parser::detail { namespace text { utf_16_to_32_insert_iterator, std::insert_iterator> { - utf_16_to_32_insert_iterator() {} - utf_16_to_32_insert_iterator( + constexpr utf_16_to_32_insert_iterator() {} + constexpr utf_16_to_32_insert_iterator( Cont & c, typename Cont::iterator it) : detail::trans_ins_iter< utf_16_to_32_insert_iterator, @@ -2544,7 +1388,7 @@ namespace boost::parser::detail { namespace text { prev_cu_(0) {} - utf_16_to_32_insert_iterator & operator=(uint16_t cu) + constexpr utf_16_to_32_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_16_to_32_insert(prev_cu_, cu, out); @@ -2553,7 +1397,7 @@ namespace boost::parser::detail { namespace text { #ifndef BOOST_TEXT_DOXYGEN private: - uint16_t prev_cu_; + char16_t prev_cu_; #endif }; @@ -2565,8 +1409,8 @@ namespace boost::parser::detail { namespace text { utf_16_to_32_front_insert_iterator, std::front_insert_iterator> { - utf_16_to_32_front_insert_iterator() {} - explicit utf_16_to_32_front_insert_iterator(Cont & c) : + constexpr utf_16_to_32_front_insert_iterator() {} + explicit constexpr utf_16_to_32_front_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_16_to_32_front_insert_iterator, std::front_insert_iterator>( @@ -2574,7 +1418,7 @@ namespace boost::parser::detail { namespace text { prev_cu_(0) {} - utf_16_to_32_front_insert_iterator & operator=(uint16_t cu) + constexpr utf_16_to_32_front_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_16_to_32_insert(prev_cu_, cu, out); @@ -2583,7 +1427,7 @@ namespace boost::parser::detail { namespace text { #ifndef BOOST_TEXT_DOXYGEN private: - uint16_t prev_cu_; + char16_t prev_cu_; #endif }; @@ -2595,8 +1439,8 @@ namespace boost::parser::detail { namespace text { utf_16_to_32_back_insert_iterator, std::back_insert_iterator> { - utf_16_to_32_back_insert_iterator() {} - explicit utf_16_to_32_back_insert_iterator(Cont & c) : + constexpr utf_16_to_32_back_insert_iterator() {} + explicit constexpr utf_16_to_32_back_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_16_to_32_back_insert_iterator, std::back_insert_iterator>( @@ -2604,7 +1448,7 @@ namespace boost::parser::detail { namespace text { prev_cu_(0) {} - utf_16_to_32_back_insert_iterator & operator=(uint16_t cu) + constexpr utf_16_to_32_back_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_16_to_32_insert(prev_cu_, cu, out); @@ -2613,301 +1457,31 @@ namespace boost::parser::detail { namespace text { #ifndef BOOST_TEXT_DOXYGEN private: - uint16_t prev_cu_; -#endif - }; - - - /** A UTF-16 to UTF-8 converting iterator. */ -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf16_iter I, - std::sentinel_for S = I, - transcoding_error_handler ErrorHandler = use_replacement_character> -#else - template< - typename I, - typename S = I, - typename ErrorHandler = use_replacement_character> -#endif - struct utf_16_to_8_iterator - : detail::trans_iter, char> - { -#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - static_assert( - std::is_same< - typename std::iterator_traits::iterator_category, - std::bidirectional_iterator_tag>::value || - std::is_same< - typename std::iterator_traits::iterator_category, - std::random_access_iterator_tag>::value, - "utf_16_to_8_iterator requires its I parameter to be at least " - "bidirectional."); - static_assert( - sizeof(typename std::iterator_traits::value_type) == 2, - "utf_16_to_8_iterator requires its I parameter to produce a " - "2-byte value_type."); -#endif - - constexpr utf_16_to_8_iterator() : - first_(), it_(), last_(), index_(4), buf_() - {} - explicit constexpr - utf_16_to_8_iterator(I first, I it, S last) : - first_(first), it_(it), last_(last), index_(0), buf_() - { - if (it_ != last_) - read_into_buf(); - } -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template - // clang-format off - requires std::convertible_to && std::convertible_to -#else - template< - typename I2, - typename S2, - typename Enable = std::enable_if_t< - std::is_convertible::value && - std::is_convertible::value>> -#endif - constexpr utf_16_to_8_iterator( - utf_16_to_8_iterator const & other) : - // clang-format on - first_(other.first_), - it_(other.it_), - last_(other.last_), - index_(other.index_), - buf_(other.buf_) - {} - - constexpr I begin() const { return first_; } - constexpr S end() const { return last_; } - - constexpr char operator*() const { return buf_[index_]; } - - constexpr I base() const { return it_; } - - constexpr utf_16_to_8_iterator & operator++() - { - ++index_; - if (at_buf_end()) { - BOOST_PARSER_DEBUG_ASSERT(it_ != last_); - increment(); - index_ = 0; - if (it_ != last_) - read_into_buf(); - } - return *this; - } - - constexpr utf_16_to_8_iterator & operator--() - { - if (0 < index_) { - --index_; - } else { - BOOST_PARSER_DEBUG_ASSERT(it_ != first_); - decrement(); - auto out = read_into_buf(); - index_ = out - buf_.data() - 1; - } - return *this; - } - - template< - typename I1, - typename S1, - typename I2, - typename S2, - typename ErrorHandler2> - friend constexpr auto operator==( - utf_16_to_8_iterator const & lhs, - utf_16_to_8_iterator const & rhs) - -> decltype(lhs.base() == rhs.base()); - - friend bool - operator==(utf_16_to_8_iterator lhs, utf_16_to_8_iterator rhs) - { - return lhs.base() == rhs.base() && lhs.index_ == rhs.index_; - } - - using base_type = - detail::trans_iter, char>; - using base_type::operator++; - using base_type::operator--; - -#ifndef BOOST_TEXT_DOXYGEN - private: - constexpr bool at_end(I it) const - { - if (it == last_) { - ErrorHandler{}( - "Invalid UTF-16 sequence; expected another code unit " - "before the end of string."); - return true; - } else { - return false; - } - } - - constexpr bool at_buf_end() const { return buf_[index_] == '\0'; } - - constexpr char * read_into_buf() - { - I next = it_; - - char32_t first = static_cast(*next); - char32_t second = 0; - char32_t cp = first; - if (boost::parser::detail::text::high_surrogate(first)) { - if (at_end(++next)) - cp = replacement_character(); - else { - second = static_cast(*next); - if (!boost::parser::detail::text::low_surrogate(second)) { - ErrorHandler{}( - "Invalid UTF-16 sequence; expected low surrogate " - "after high surrogate."); - cp = replacement_character(); - } else { - cp = (first << 10) + second + surrogate_offset; - } - } - } else if (boost::parser::detail::text::surrogate(first)) { - ErrorHandler{}("Invalid initial UTF-16 code unit."); - cp = replacement_character(); - } - - char * retval = detail::read_into_buf(cp, buf_.data()); - *retval = 0; - return retval; - } - - constexpr void increment() - { - if (boost::parser::detail::text::high_surrogate(*it_)) { - ++it_; - if (it_ != last_ && boost::parser::detail::text::low_surrogate(*it_)) - ++it_; - } else { - ++it_; - } - } - - constexpr void decrement() - { - if (boost::parser::detail::text::low_surrogate(*--it_)) { - if (it_ != first_) - --it_; - } - } - - I first_; - I it_; - S last_; - int index_; - std::array buf_; - - // Unicode 3.8/D71-D74 - - static char32_t const surrogate_offset = - 0x10000 - (high_surrogate_min << 10) - low_surrogate_min; - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf16_iter I2, - std::sentinel_for S2, - transcoding_error_handler ErrorHandler2> -#else - template -#endif - friend struct utf_16_to_8_iterator; + char16_t prev_cu_; #endif }; - template - constexpr auto operator==( - utf_16_to_8_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() == rhs) - { - return lhs.base() == rhs; - } - - template - constexpr auto operator==( - Sentinel lhs, - utf_16_to_8_iterator const & rhs) - -> decltype(rhs.base() == lhs) - { - return rhs.base() == lhs; - } - - template - constexpr auto operator!=( - utf_16_to_8_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() != rhs) - { - return lhs.base() != rhs; - } - - template - constexpr auto operator!=( - Sentinel lhs, - utf_16_to_8_iterator const & rhs) - -> decltype(rhs.base() != lhs) - { - return rhs.base() != lhs; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator==( - utf_16_to_8_iterator const & lhs, - utf_16_to_8_iterator const & - rhs) -> decltype(lhs.base() == rhs.base()) - { - return lhs.base() == rhs.base() && rhs.index_ == lhs.index_; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator!=( - utf_16_to_8_iterator const & lhs, - utf_16_to_8_iterator const & - rhs) -> decltype(!(lhs == rhs)) - { - return !(lhs == rhs); - } - namespace detail { template OutIter - assign_16_to_8_insert(uint16_t & prev_cu, uint16_t cu, OutIter out) + assign_16_to_8_insert(char16_t & prev_cu, char16_t cu, OutIter out) { if (high_surrogate(cu)) { if (prev_cu) - out = detail::write_cp_utf8(replacement_character(), out); + out = detail::write_cp_utf8(replacement_character, out); prev_cu = cu; } else if (low_surrogate(cu)) { if (prev_cu) { auto const cp = detail::surrogates_to_cp(prev_cu, cu); out = detail::write_cp_utf8(cp, out); } else { - out = detail::write_cp_utf8(replacement_character(), out); + out = detail::write_cp_utf8(replacement_character, out); } prev_cu = 0; } else { if (prev_cu) - out = detail::write_cp_utf8(replacement_character(), out); + out = detail::write_cp_utf8(replacement_character, out); out = detail::write_cp_utf8(cu, out); prev_cu = 0; } @@ -2917,34 +1491,29 @@ namespace boost::parser::detail { namespace text { /** An out iterator that converts UTF-16 to UTF-8. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template Iter> + template Iter> #else template #endif struct utf_16_to_8_out_iterator : detail::trans_ins_iter, Iter> { - utf_16_to_8_out_iterator() {} - explicit utf_16_to_8_out_iterator(Iter it) : + constexpr utf_16_to_8_out_iterator() {} + explicit constexpr utf_16_to_8_out_iterator(Iter it) : detail::trans_ins_iter, Iter>(it), prev_cu_(0) {} - utf_16_to_8_out_iterator & operator=(uint16_t cu) + constexpr utf_16_to_8_out_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_16_to_8_insert(prev_cu_, cu, out); return *this; } - Iter base() const - { - return const_cast(this)->iter(); - } - #ifndef BOOST_TEXT_DOXYGEN private: - uint16_t prev_cu_; + char16_t prev_cu_; #endif }; @@ -2955,8 +1524,8 @@ namespace boost::parser::detail { namespace text { utf_16_to_8_insert_iterator, std::insert_iterator> { - utf_16_to_8_insert_iterator() {} - utf_16_to_8_insert_iterator( + constexpr utf_16_to_8_insert_iterator() {} + constexpr utf_16_to_8_insert_iterator( Cont & c, typename Cont::iterator it) : detail::trans_ins_iter< utf_16_to_8_insert_iterator, @@ -2964,7 +1533,7 @@ namespace boost::parser::detail { namespace text { prev_cu_(0) {} - utf_16_to_8_insert_iterator & operator=(uint16_t cu) + constexpr utf_16_to_8_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_16_to_8_insert(prev_cu_, cu, out); @@ -2973,7 +1542,7 @@ namespace boost::parser::detail { namespace text { #ifndef BOOST_TEXT_DOXYGEN private: - uint16_t prev_cu_; + char16_t prev_cu_; #endif }; @@ -2985,8 +1554,8 @@ namespace boost::parser::detail { namespace text { utf_16_to_8_front_insert_iterator, std::front_insert_iterator> { - utf_16_to_8_front_insert_iterator() {} - explicit utf_16_to_8_front_insert_iterator(Cont & c) : + constexpr utf_16_to_8_front_insert_iterator() {} + explicit constexpr utf_16_to_8_front_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_16_to_8_front_insert_iterator, std::front_insert_iterator>( @@ -2994,7 +1563,7 @@ namespace boost::parser::detail { namespace text { prev_cu_(0) {} - utf_16_to_8_front_insert_iterator & operator=(uint16_t cu) + constexpr utf_16_to_8_front_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_16_to_8_insert(prev_cu_, cu, out); @@ -3003,7 +1572,7 @@ namespace boost::parser::detail { namespace text { #ifndef BOOST_TEXT_DOXYGEN private: - uint16_t prev_cu_; + char16_t prev_cu_; #endif }; @@ -3015,8 +1584,8 @@ namespace boost::parser::detail { namespace text { utf_16_to_8_back_insert_iterator, std::back_insert_iterator> { - utf_16_to_8_back_insert_iterator() {} - explicit utf_16_to_8_back_insert_iterator(Cont & c) : + constexpr utf_16_to_8_back_insert_iterator() {} + explicit constexpr utf_16_to_8_back_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_16_to_8_back_insert_iterator, std::back_insert_iterator>( @@ -3024,7 +1593,7 @@ namespace boost::parser::detail { namespace text { prev_cu_(0) {} - utf_16_to_8_back_insert_iterator & operator=(uint16_t cu) + constexpr utf_16_to_8_back_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_16_to_8_insert(prev_cu_, cu, out); @@ -3033,260 +1602,66 @@ namespace boost::parser::detail { namespace text { #ifndef BOOST_TEXT_DOXYGEN private: - uint16_t prev_cu_; + char16_t prev_cu_; #endif }; -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf8_iter I, - std::sentinel_for S, - transcoding_error_handler ErrorHandler> -#else - template -#endif - struct utf_8_to_16_iterator - : detail::trans_iter, uint16_t> - { - constexpr utf_8_to_16_iterator() : it_(), index_(2), buf_() {} - explicit constexpr utf_8_to_16_iterator(I first, I it, S last) : - it_(first, it, last), index_(0), buf_() + namespace detail { + template + OutIter assign_8_to_16_insert( + unsigned char cu, char32_t & cp, int & state, OutIter out) { - if (it_.it_ != it_.last_) - read_into_buf(); - } -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template - // clang-format off - requires std::convertible_to && std::convertible_to -#else - template< - typename I2, - typename S2, - typename Enable = std::enable_if_t< - std::is_convertible::value && - std::is_convertible::value>> -#endif - constexpr utf_8_to_16_iterator( - utf_8_to_16_iterator const & other) : - // clang-format on - it_(other.it_), - index_(other.index_), - buf_(other.buf_) - {} - - constexpr I begin() const { return it_.begin(); } - constexpr S end() const { return it_.end(); } - - constexpr uint16_t operator*() const - { - return buf_[index_]; - } - - constexpr I base() const - { - return it_.base(); - } - - constexpr utf_8_to_16_iterator & operator++() - { - ++index_; - if (at_buf_end()) { - BOOST_PARSER_DEBUG_ASSERT(it_.it_ != it_.last_); - ++it_; - index_ = 0; - if (it_.it_ != it_.last_) - read_into_buf(); - } - return *this; - } - - constexpr utf_8_to_16_iterator & operator--() - { - if (0 < index_) { - --index_; - } else { - BOOST_PARSER_DEBUG_ASSERT(it_.it_ != it_.first_); - --it_; - auto out = read_into_buf(); - index_ = out - buf_.data() - 1; - } - return *this; - } - - template< - typename I1, - typename S1, - typename I2, - typename S2, - typename ErrorHandler2> - friend constexpr auto operator==( - utf_8_to_16_iterator const & lhs, - utf_8_to_16_iterator const & rhs) - -> decltype(lhs.base() == rhs.base()); - - friend bool - operator==(utf_8_to_16_iterator lhs, utf_8_to_16_iterator rhs) - { - return lhs.base() == rhs.base() && lhs.index_ == rhs.index_; - } - - using base_type = detail:: - trans_iter, uint16_t>; - using base_type::operator++; - using base_type::operator--; - -#ifndef BOOST_TEXT_DOXYGEN - private: - constexpr bool at_buf_end() const { return buf_[index_] == 0; } - - constexpr uint16_t * read_into_buf() - { - auto const last = detail::write_cp_utf16(*it_, buf_.data()); - *last = 0; - return last; - } - - utf_8_to_32_iterator it_; - int index_; - std::array buf_; - -#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template< - utf8_iter I2, - std::sentinel_for S2, - transcoding_error_handler ErrorHandler2> -#else - template -#endif - friend struct utf_8_to_16_iterator; -#endif - }; - - template - constexpr auto operator==( - utf_8_to_16_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() == rhs) - { - return lhs.base() == rhs; - } - - template - constexpr auto operator==( - Sentinel lhs, - utf_8_to_16_iterator const & rhs) - -> decltype(rhs.base() == lhs) - { - return rhs.base() == lhs; - } - - template - constexpr auto operator!=( - utf_8_to_16_iterator const & lhs, - Sentinel rhs) -> decltype(lhs.base() != rhs) - { - return lhs.base() != rhs; - } - - template - constexpr auto operator!=( - Sentinel lhs, - utf_8_to_16_iterator const & rhs) - -> decltype(rhs.base() != lhs) - { - return rhs.base() != lhs; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator==( - utf_8_to_16_iterator const & lhs, - utf_8_to_16_iterator const & rhs) - -> decltype(lhs.base() == rhs.base()) - { - return lhs.base() == rhs.base() && rhs.index_ == lhs.index_; - } - - template< - typename Iter1, - typename Sentinel1, - typename Iter2, - typename Sentinel2, - typename ErrorHandler> - constexpr auto operator!=( - utf_8_to_16_iterator const & lhs, - utf_8_to_16_iterator const & rhs) - -> decltype(!(lhs == rhs)) - { - return !(lhs == rhs); - } - - - namespace detail { - template - OutIter assign_8_to_16_insert( - unsigned char cu, char32_t & cp, int & state, OutIter out) - { - auto write = [&] { - out = detail::write_cp_utf16(cp, out); - state = invalid_table_state; - }; - auto start_cp = [&] { - first_cu const info = first_cus[cu]; - state = info.next; - cp = info.initial_octet; - if (state == bgn) - write(); - }; - if (state == invalid_table_state) { - start_cp(); - } else { - cp = (cp << 6) | (cu & 0x3f); - char_class const class_ = octet_classes[cu]; - state = transitions[state + class_]; - if (state == bgn) { - write(); - } else if (state == err) { - out = detail::write_cp_utf16(replacement_character(), out); - start_cp(); - } - } - return out; + auto write = [&] { + out = detail::write_cp_utf16(cp, out); + state = invalid_table_state; + }; + auto start_cp = [&] { + first_cu const info = first_cus[cu]; + state = info.next; + cp = info.initial_octet; + if (state == bgn) + write(); + }; + if (state == invalid_table_state) { + start_cp(); + } else { + cp = (cp << 6) | (cu & 0x3f); + char_class const class_ = octet_classes[cu]; + state = transitions[state + class_]; + if (state == bgn) { + write(); + } else if (state == err) { + out = detail::write_cp_utf16(replacement_character, out); + start_cp(); + } + } + return out; } } /** An out iterator that converts UTF-8 to UTF-16. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template Iter> + template Iter> #else template #endif struct utf_8_to_16_out_iterator : detail::trans_ins_iter, Iter> { - utf_8_to_16_out_iterator() {} - explicit utf_8_to_16_out_iterator(Iter it) : + constexpr utf_8_to_16_out_iterator() {} + explicit constexpr utf_8_to_16_out_iterator(Iter it) : detail::trans_ins_iter, Iter>(it), state_(detail::invalid_table_state) {} - utf_8_to_16_out_iterator & operator=(uint16_t cu) + constexpr utf_8_to_16_out_iterator & operator=(char8_type cu) { auto & out = this->iter(); out = detail::assign_8_to_16_insert(cu, cp_, state_, out); return *this; } - Iter base() const - { - return const_cast(this)->iter(); - } - #ifndef BOOST_TEXT_DOXYGEN private: int state_; @@ -3301,8 +1676,8 @@ namespace boost::parser::detail { namespace text { utf_8_to_16_insert_iterator, std::insert_iterator> { - utf_8_to_16_insert_iterator() {} - utf_8_to_16_insert_iterator( + constexpr utf_8_to_16_insert_iterator() {} + constexpr utf_8_to_16_insert_iterator( Cont & c, typename Cont::iterator it) : detail::trans_ins_iter< utf_8_to_16_insert_iterator, @@ -3310,7 +1685,7 @@ namespace boost::parser::detail { namespace text { state_(detail::invalid_table_state) {} - utf_8_to_16_insert_iterator & operator=(uint16_t cu) + constexpr utf_8_to_16_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_8_to_16_insert(cu, cp_, state_, out); @@ -3332,8 +1707,8 @@ namespace boost::parser::detail { namespace text { utf_8_to_16_front_insert_iterator, std::front_insert_iterator> { - utf_8_to_16_front_insert_iterator() {} - explicit utf_8_to_16_front_insert_iterator(Cont & c) : + constexpr utf_8_to_16_front_insert_iterator() {} + explicit constexpr utf_8_to_16_front_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_8_to_16_front_insert_iterator, std::front_insert_iterator>( @@ -3341,7 +1716,7 @@ namespace boost::parser::detail { namespace text { state_(detail::invalid_table_state) {} - utf_8_to_16_front_insert_iterator & operator=(uint16_t cu) + constexpr utf_8_to_16_front_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_8_to_16_insert(cu, cp_, state_, out); @@ -3363,8 +1738,8 @@ namespace boost::parser::detail { namespace text { utf_8_to_16_back_insert_iterator, std::back_insert_iterator> { - utf_8_to_16_back_insert_iterator() {} - explicit utf_8_to_16_back_insert_iterator(Cont & c) : + constexpr utf_8_to_16_back_insert_iterator() {} + explicit constexpr utf_8_to_16_back_insert_iterator(Cont & c) : detail::trans_ins_iter< utf_8_to_16_back_insert_iterator, std::back_insert_iterator>( @@ -3372,7 +1747,7 @@ namespace boost::parser::detail { namespace text { state_(detail::invalid_table_state) {} - utf_8_to_16_back_insert_iterator & operator=(uint16_t cu) + constexpr utf_8_to_16_back_insert_iterator & operator=(char16_t cu) { auto & out = this->iter(); out = detail::assign_8_to_16_insert(cu, cp_, state_, out); @@ -3388,15 +1763,15 @@ namespace boost::parser::detail { namespace text { }} -#include +#include namespace boost::parser::detail { namespace text { namespace detail { - template + template struct make_utf8_dispatch; template<> - struct make_utf8_dispatch + struct make_utf8_dispatch { template static constexpr Iter call(Iter first, Iter it, Sentinel last) @@ -3406,43 +1781,55 @@ namespace boost::parser::detail { namespace text { namespace detail { }; template<> - struct make_utf8_dispatch + struct make_utf8_dispatch { template - static constexpr utf_16_to_8_iterator + static constexpr utf_iterator< + format::utf16, + format::utf8, + Iter, + Sentinel> call(Iter first, Iter it, Sentinel last) { - return utf_16_to_8_iterator(first, it, last); + return {first, it, last}; } }; template<> - struct make_utf8_dispatch + struct make_utf8_dispatch { template - static constexpr utf_32_to_8_iterator + static constexpr utf_iterator< + format::utf32, + format::utf8, + Iter, + Sentinel> call(Iter first, Iter it, Sentinel last) { - return utf_32_to_8_iterator(first, it, last); + return {first, it, last}; } }; - template + template struct make_utf16_dispatch; template<> - struct make_utf16_dispatch + struct make_utf16_dispatch { template - static constexpr utf_8_to_16_iterator + static constexpr utf_iterator< + format::utf8, + format::utf16, + Iter, + Sentinel> call(Iter first, Iter it, Sentinel last) { - return utf_8_to_16_iterator(first, it, last); + return {first, it, last}; } }; template<> - struct make_utf16_dispatch + struct make_utf16_dispatch { template static constexpr Iter call(Iter first, Iter it, Sentinel last) @@ -3452,43 +1839,55 @@ namespace boost::parser::detail { namespace text { namespace detail { }; template<> - struct make_utf16_dispatch + struct make_utf16_dispatch { template - static constexpr utf_32_to_16_iterator + static constexpr utf_iterator< + format::utf32, + format::utf16, + Iter, + Sentinel> call(Iter first, Iter it, Sentinel last) { - return utf_32_to_16_iterator(first, it, last); + return {first, it, last}; } }; - template + template struct make_utf32_dispatch; template<> - struct make_utf32_dispatch + struct make_utf32_dispatch { template - static constexpr utf_8_to_32_iterator + static constexpr utf_iterator< + format::utf8, + format::utf32, + Iter, + Sentinel> call(Iter first, Iter it, Sentinel last) { - return utf_8_to_32_iterator(first, it, last); + return {first, it, last}; } }; template<> - struct make_utf32_dispatch + struct make_utf32_dispatch { template - static constexpr utf_16_to_32_iterator + static constexpr utf_iterator< + format::utf16, + format::utf32, + Iter, + Sentinel> call(Iter first, Iter it, Sentinel last) { - return utf_16_to_32_iterator(first, it, last); + return {first, it, last}; } }; template<> - struct make_utf32_dispatch + struct make_utf32_dispatch { template static constexpr Iter call(Iter first, Iter it, Sentinel last) @@ -3586,7 +1985,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME /** Returns a `utf_32_to_8_out_iterator` constructed from the given iterator. */ - template O> + template O> utf_32_to_8_out_iterator utf_32_to_8_out(O it); /** Returns a `utf_8_to_32_out_iterator` constructed from the given @@ -3596,7 +1995,7 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME /** Returns a `utf_32_to_16_out_iterator` constructed from the given iterator. */ - template O> + template O> utf_32_to_16_out_iterator utf_32_to_16_out(O it); /** Returns a `utf_16_to_32_out_iterator` constructed from the given @@ -3606,27 +2005,27 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME /** Returns a `utf_16_to_8_out_iterator` constructed from the given iterator. */ - template O> + template O> utf_16_to_8_out_iterator utf_16_to_8_out(O it); /** Returns a `utf_8_to_16_out_iterator` constructed from the given iterator. */ - template O> + template O> utf_8_to_16_out_iterator utf_8_to_16_out(O it); /** Returns an iterator equivalent to `it` that transcodes `[first, last)` to UTF-8. */ - template S> + template S> auto utf8_iterator(I first, I it, S last); /** Returns an iterator equivalent to `it` that transcodes `[first, last)` to UTF-16. */ - template S> + template S> auto utf16_iterator(I first, I it, S last); /** Returns an iterator equivalent to `it` that transcodes `[first, last)` to UTF-32. */ - template S> + template S> auto utf32_iterator(I first, I it, S last); /** Returns a inserting iterator that transcodes from UTF-8 to UTF-8, @@ -3751,34 +2150,34 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template auto utf8_iterator(Iter first, Iter it, Sentinel last) { - auto const unpacked = detail::unpack_iterator_and_sentinel(first, last); + auto const unpacked = text::unpack_iterator_and_sentinel(first, last); auto const unpacked_it = - detail::unpack_iterator_and_sentinel(it, last).f_; - using tag_type = decltype(unpacked.tag_); - return detail::make_utf8_dispatch::call( - unpacked.f_, unpacked_it, unpacked.l_); + text::unpack_iterator_and_sentinel(it, last).first; + constexpr format tag = unpacked.format_tag; + return detail::make_utf8_dispatch::call( + unpacked.first, unpacked_it, unpacked.last); } template auto utf16_iterator(Iter first, Iter it, Sentinel last) { - auto const unpacked = detail::unpack_iterator_and_sentinel(first, last); + auto const unpacked = text::unpack_iterator_and_sentinel(first, last); auto const unpacked_it = - detail::unpack_iterator_and_sentinel(it, last).f_; - using tag_type = decltype(unpacked.tag_); - return detail::make_utf16_dispatch::call( - unpacked.f_, unpacked_it, unpacked.l_); + text::unpack_iterator_and_sentinel(it, last).first; + constexpr format tag = unpacked.format_tag; + return detail::make_utf16_dispatch::call( + unpacked.first, unpacked_it, unpacked.last); } template auto utf32_iterator(Iter first, Iter it, Sentinel last) { - auto const unpacked = detail::unpack_iterator_and_sentinel(first, last); + auto const unpacked = text::unpack_iterator_and_sentinel(first, last); auto const unpacked_it = - detail::unpack_iterator_and_sentinel(it, last).f_; - using tag_type = decltype(unpacked.tag_); - return detail::make_utf32_dispatch::call( - unpacked.f_, unpacked_it, unpacked.l_); + text::unpack_iterator_and_sentinel(it, last).first; + constexpr format tag = unpacked.format_tag; + return detail::make_utf32_dispatch::call( + unpacked.first, unpacked_it, unpacked.last); } template @@ -3886,56 +2285,56 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 { - template O> - utf_32_to_8_out_iterator utf_32_to_8_out(O it) + template O> + constexpr utf_32_to_8_out_iterator utf_32_to_8_out(O it) { return utf_32_to_8_out_iterator(it); } template O> - utf_8_to_32_out_iterator utf_8_to_32_out(O it) + constexpr utf_8_to_32_out_iterator utf_8_to_32_out(O it) { return utf_8_to_32_out_iterator(it); } - template O> - utf_32_to_16_out_iterator utf_32_to_16_out(O it) + template O> + constexpr utf_32_to_16_out_iterator utf_32_to_16_out(O it) { return utf_32_to_16_out_iterator(it); } template O> - utf_16_to_32_out_iterator utf_16_to_32_out(O it) + constexpr utf_16_to_32_out_iterator utf_16_to_32_out(O it) { return utf_16_to_32_out_iterator(it); } - template O> - utf_16_to_8_out_iterator utf_16_to_8_out(O it) + template O> + constexpr utf_16_to_8_out_iterator utf_16_to_8_out(O it) { return utf_16_to_8_out_iterator(it); } - template O> - utf_8_to_16_out_iterator utf_8_to_16_out(O it) + template O> + constexpr utf_8_to_16_out_iterator utf_8_to_16_out(O it) { return utf_8_to_16_out_iterator(it); } - template S> - auto utf8_iterator(I first, I it, S last) + template S> + constexpr auto utf8_iterator(I first, I it, S last) { return v1::utf8_iterator(first, it, last); } - template S> - auto utf16_iterator(I first, I it, S last) + template S> + constexpr auto utf16_iterator(I first, I it, S last) { return v1::utf16_iterator(first, it, last); } - template S> - auto utf32_iterator(I first, I it, S last) + template S> + constexpr auto utf32_iterator(I first, I it, S last) { return v1::utf32_iterator(first, it, last); } @@ -3943,8 +2342,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf8_inserter(Cont & c, typename Cont::iterator it) + utf_code_unit + constexpr auto from_utf8_inserter(Cont & c, typename Cont::iterator it) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -3959,8 +2358,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf16_inserter(Cont & c, typename Cont::iterator it) + utf_code_unit + constexpr auto from_utf16_inserter(Cont & c, typename Cont::iterator it) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -3975,8 +2374,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf32_inserter(Cont & c, typename Cont::iterator it) + utf_code_unit + constexpr auto from_utf32_inserter(Cont & c, typename Cont::iterator it) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -3991,8 +2390,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf8_back_inserter(Cont & c) + utf_code_unit + constexpr auto from_utf8_back_inserter(Cont & c) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -4007,8 +2406,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf16_back_inserter(Cont & c) + utf_code_unit + constexpr auto from_utf16_back_inserter(Cont & c) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -4023,8 +2422,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf32_back_inserter(Cont & c) + utf_code_unit + constexpr auto from_utf32_back_inserter(Cont & c) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -4039,8 +2438,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf8_front_inserter(Cont & c) + utf_code_unit + constexpr auto from_utf8_front_inserter(Cont & c) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -4055,8 +2454,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf16_front_inserter(Cont & c) + utf_code_unit + constexpr auto from_utf16_front_inserter(Cont & c) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -4071,8 +2470,8 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME template // clang-format off requires requires { typename Cont::value_type; } && - std::is_integral_v - auto from_utf32_front_inserter(Cont & c) + utf_code_unit + constexpr auto from_utf32_front_inserter(Cont & c) // clang-format on { if constexpr (sizeof(typename Cont::value_type) == 1) { @@ -4088,4 +2487,988 @@ namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAME #endif +namespace boost::parser::detail { namespace text { + namespace detail { + template + constexpr auto format_to_type() + { + if constexpr (Format == format::utf8) { + return char8_type{}; + } else if constexpr (Format == format::utf16) { + return char16_t{}; + } else { + return char32_t{}; + } + } + + template + constexpr bool is_bidi = +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::bidirectional_iterator +#else + std::is_base_of_v< + std::bidirectional_iterator_tag, + typename std::iterator_traits::iterator_category> +#endif + ; + + template> + struct first_and_curr + { + first_and_curr() = default; + first_and_curr(I curr) : curr{curr} {} + first_and_curr(const first_and_curr & other) = default; + template< + class I2 +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + , + typename Enable = std::enable_if_t> +#endif + > +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS // TODO + requires std::convertible_to +#endif + first_and_curr(const first_and_curr & other) : curr{other.curr} + {} + + I curr; + }; + template + struct first_and_curr + { + first_and_curr() = default; + first_and_curr(I first, I curr) : first{first}, curr{curr} {} + first_and_curr(const first_and_curr & other) = default; + template< + class I2 +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + , + typename Enable = std::enable_if_t> +#endif + > +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::convertible_to +#endif + first_and_curr(const first_and_curr & other) : + first{other.first}, curr{other.curr} + {} + + I first; + I curr; + }; + } + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + format FromFormat, + format ToFormat, + std::input_iterator I, + std::sentinel_for S, + transcoding_error_handler ErrorHandler> + requires std::convertible_to, detail::format_to_type_t> +#else + template< + format FromFormat, + format ToFormat, + typename I, + typename S, + typename ErrorHandler> +#endif + class utf_iterator + : public stl_interfaces::iterator_interface< + utf_iterator, + detail::bidirectional_at_most_t, + detail::format_to_type_t, + detail::format_to_type_t> + { + static_assert( + FromFormat == format::utf8 || FromFormat == format::utf16 || + FromFormat == format::utf32); + static_assert( + ToFormat == format::utf8 || ToFormat == format::utf16 || + ToFormat == format::utf32); + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + constexpr static bool is_bidirectional = std::is_base_of_v< + std::bidirectional_iterator_tag, + detail::bidirectional_at_most_t>; + template + constexpr static bool is_forward = std::is_base_of_v< + std::forward_iterator_tag, + detail::bidirectional_at_most_t>; + template + constexpr static bool is_input = !is_bidirectional && !is_forward; +#endif + + static_assert( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::forward_iterator +#else + is_forward +#endif + || noexcept(ErrorHandler{}(""))); + + public: + using value_type = detail::format_to_type_t; + + constexpr utf_iterator() = default; + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename J = I, + typename Enable = std::enable_if_t>> +#endif + constexpr utf_iterator(I first, I it, S last) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::bidirectional_iterator +#endif + : first_and_curr_{first, it}, last_(last) + { + if (curr() != last_) + read(); + } +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename J = I, + typename Enable = std::enable_if_t>> +#endif + constexpr utf_iterator(I it, S last) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires(!std::bidirectional_iterator) +#endif + : + first_and_curr_{it}, last_(last) + { + if (curr() != last_) + read(); + } + + template< + class I2, + class S2 +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + , + typename Enable = std::enable_if_t< + std::is_convertible_v && std::is_convertible_v> +#endif + > +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::convertible_to && std::convertible_to +#endif + constexpr utf_iterator( + utf_iterator const & + other) : + buf_(other.buf_), + first_and_curr_(other.first_and_curr_), + buf_index_(other.buf_index_), + buf_last_(other.buf_last_), + last_(other.last_) + {} + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename J = I, + typename Enable = std::enable_if_t>> +#endif + constexpr I begin() const +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::bidirectional_iterator +#endif + { + return first(); + } + constexpr S end() const { return last_; } + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename J = I, + typename Enable = std::enable_if_t>> +#endif + constexpr I base() const +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::forward_iterator +#endif + { + return curr(); + } + + constexpr value_type operator*() const + { + BOOST_PARSER_DEBUG_ASSERT(buf_index_ < buf_last_); + return buf_[buf_index_]; + } + + constexpr utf_iterator & operator++() + { + BOOST_PARSER_DEBUG_ASSERT(buf_index_ != buf_last_ || curr() != last_); + if (buf_index_ + 1 == buf_last_ && curr() != last_) { + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::forward_iterator +#else + is_forward +#endif + ) { + std::advance(curr(), to_increment_); + } + if (curr() == last_) + buf_index_ = 0; + else + read(); + } else if (buf_index_ + 1 <= buf_last_) { + ++buf_index_; + } + return *this; + } + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename J = I, + typename Enable = std::enable_if_t>> +#endif + constexpr utf_iterator & operator--() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::bidirectional_iterator +#endif + { + BOOST_PARSER_DEBUG_ASSERT(buf_index_ || curr() != first()); + if (!buf_index_ && curr() != first()) + read_reverse(); + else if (buf_index_) + --buf_index_; + return *this; + } + + friend constexpr bool operator==( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + utf_iterator +#else + std::enable_if_t, utf_iterator> +#endif + lhs, utf_iterator rhs) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::forward_iterator || requires(I i) { i == i; } +#endif + { + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::forward_iterator +#else + is_forward +#endif + ) { + return lhs.curr() == rhs.curr() && lhs.buf_index_ == rhs.buf_index_; + } else { + if (lhs.curr() != rhs.curr()) + return false; + + if (lhs.buf_index_ == rhs.buf_index_ && + lhs.buf_last_ == rhs.buf_last_) { + return true; + } + + return lhs.buf_index_ == lhs.buf_last_ && + rhs.buf_index_ == rhs.buf_last_; + } + } + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + friend constexpr bool operator!=( + std::enable_if_t, utf_iterator> lhs, utf_iterator rhs) + { return !(lhs == rhs); } +#endif + + friend constexpr bool operator==(utf_iterator lhs, S rhs) + { + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::forward_iterator +#else + is_forward +#endif + ) { + return lhs.curr() == rhs; + } else { + return lhs.curr() == rhs && lhs.buf_index_ == lhs.buf_last_; + } + } + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + friend constexpr bool operator!=(utf_iterator lhs, S rhs) + { return !(lhs == rhs); } +#endif + + // exposition only + using base_type = stl_interfaces::iterator_interface< + utf_iterator, + detail::bidirectional_at_most_t, + value_type, + value_type>; + using base_type::operator++; + using base_type::operator--; + + private: + constexpr char32_t decode_code_point() + { + if constexpr (FromFormat == format::utf8) { + char32_t cp = *curr(); + ++curr(); + to_increment_ = 1; + if (cp < 0x80) + return cp; + + // clang-format off + + // It turns out that this naive implementation is faster than + // the table implementation for the converting iterators. + + /* + Unicode 3.9/D92 + Table 3-7. Well-Formed UTF-8 Byte Sequences + + Code Points First Byte Second Byte Third Byte Fourth Byte + =========== ========== =========== ========== =========== + U+0000..U+007F 00..7F + U+0080..U+07FF C2..DF 80..BF + U+0800..U+0FFF E0 A0..BF 80..BF + U+1000..U+CFFF E1..EC 80..BF 80..BF + U+D000..U+D7FF ED 80..9F 80..BF + U+E000..U+FFFF EE..EF 80..BF 80..BF + U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + */ + // clang-format on + + char8_type curr_c = cp; + + auto error = [&]() { + return ErrorHandler{}("Ill-formed UTF-8."); + }; + auto next = [&]() { + ++curr(); + ++to_increment_; + }; + + // One-byte case handled above + + // Two-byte + if (detail::in(0xc2, curr_c, 0xdf)) { + cp = curr_c & 0b00011111; + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + // Three-byte + } else if (curr_c == 0xe0) { + cp = curr_c & 0b00001111; + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0xa0, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + } else if (detail::in(0xe1, curr_c, 0xec)) { + cp = curr_c & 0b00001111; + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + } else if (curr_c == 0xed) { + cp = curr_c & 0b00001111; + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0x9f)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + } else if (detail::in(0xee, curr_c, 0xef)) { + cp = curr_c & 0b00001111; + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + // Four-byte + } else if (curr_c == 0xf0) { + cp = curr_c & 0b00000111; + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x90, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + } else if (detail::in(0xf1, curr_c, 0xf3)) { + cp = curr_c & 0b00000111; + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + } else if (curr_c == 0xf4) { + cp = curr_c & 0b00000111; + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0x8f)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + if (curr() == last_) + return error(); + curr_c = *curr(); + if (!detail::in(0x80, curr_c, 0xbf)) + return error(); + cp = (cp << 6) + (curr_c & 0b00111111); + next(); + } else { + return error(); + } + return cp; + } else if constexpr (FromFormat == format::utf16) { + char16_t hi = *curr(); + ++curr(); + to_increment_ = 1; + if (!boost::parser::detail::text::surrogate(hi)) + return hi; + + if (boost::parser::detail::text::low_surrogate(hi)) { + return ErrorHandler{}( + "Invalid UTF-16 sequence; lone trailing surrogate."); + } + + // high surrogate + if (curr() == last_) { + return ErrorHandler{}( + "Invalid UTF-16 sequence; lone leading surrogate."); + } + + char16_t lo = *curr(); + ++curr(); + ++to_increment_; + if (!boost::parser::detail::text::low_surrogate(lo)) { + return ErrorHandler{}( + "Invalid UTF-16 sequence; lone leading surrogate."); + } + + return char32_t((hi - high_surrogate_base) << 10) + + (lo - low_surrogate_base); + } else { + char32_t retval = *curr(); + ++curr(); + to_increment_ = 1; + return retval; + } + } + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename J = I, + typename Enable = std::enable_if_t>> +#endif + constexpr char32_t decode_code_point_reverse() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::bidirectional_iterator +#endif + { + if constexpr (FromFormat == format::utf8) { + curr() = detail::decrement(first(), curr()); + auto initial = curr(); + char32_t cp = decode_code_point(); + curr() = initial; + return cp; + } else if constexpr (FromFormat == format::utf16) { + char16_t lo = *--curr(); + if (!boost::parser::detail::text::surrogate(lo)) + return lo; + + if (boost::parser::detail::text::high_surrogate(lo)) { + return ErrorHandler{}( + "Invalid UTF-16 sequence; lone leading surrogate."); + } + + // low surrogate + if (curr() == first()) { + return ErrorHandler{}( + "Invalid UTF-16 sequence; lone trailing surrogate."); + } + + char16_t hi = *detail::prev(curr()); + if (!boost::parser::detail::text::high_surrogate(hi)) { + return ErrorHandler{}( + "Invalid UTF-16 sequence; lone trailing surrogate."); + } + --curr(); + + return char32_t((hi - high_surrogate_base) << 10) + + (lo - low_surrogate_base); + } else { + return *--curr(); + } + } + + template + static constexpr Out encode_code_point(char32_t cp, Out out) + { + if constexpr (ToFormat == format::utf8) { + if (cp < 0x80) { + *out++ = static_cast(cp); + } else if (cp < 0x800) { + *out++ = static_cast(0xC0 + (cp >> 6)); + *out++ = static_cast(0x80 + (cp & 0x3f)); + } else if (cp < 0x10000) { + *out++ = static_cast(0xe0 + (cp >> 12)); + *out++ = static_cast(0x80 + ((cp >> 6) & 0x3f)); + *out++ = static_cast(0x80 + (cp & 0x3f)); + } else { + *out++ = static_cast(0xf0 + (cp >> 18)); + *out++ = static_cast(0x80 + ((cp >> 12) & 0x3f)); + *out++ = static_cast(0x80 + ((cp >> 6) & 0x3f)); + *out++ = static_cast(0x80 + (cp & 0x3f)); + } + } else if constexpr (ToFormat == format::utf16) { + if (cp < 0x10000) { + *out++ = static_cast(cp); + } else { + *out++ = + static_cast(cp >> 10) + high_surrogate_base; + *out++ = + static_cast(cp & 0x3ff) + low_surrogate_base; + } + } else { + *out++ = cp; + } + return out; + } + + constexpr void read() + { + I initial; + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::forward_iterator +#else + is_forward +#endif + ) { + initial = curr(); + } + if constexpr (noexcept(ErrorHandler{}(""))) { + char32_t cp = decode_code_point(); + auto it = encode_code_point(cp, buf_.begin()); + buf_index_ = 0; + buf_last_ = it - buf_.begin(); + } else { + auto buf = buf_; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + try { +#endif + char32_t cp = decode_code_point(); + auto it = encode_code_point(cp, buf_.begin()); + buf_index_ = 0; + buf_last_ = it - buf_.begin(); +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + } catch (...) { + buf_ = buf; + curr() = initial; + throw; + } +#endif + } + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::forward_iterator +#else + is_forward +#endif + ) { + curr() = initial; + } + } + + constexpr void read_reverse() + { + auto initial = curr(); + if constexpr (noexcept(ErrorHandler{}(""))) { + char32_t cp = decode_code_point_reverse(); + auto it = encode_code_point(cp, buf_.begin()); + buf_last_ = it - buf_.begin(); + buf_index_ = buf_last_ - 1; + to_increment_ = std::distance(curr(), initial); + } else { + auto buf = buf_; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + try { +#endif + char32_t cp = decode_code_point_reverse(); + auto it = encode_code_point(cp, buf_.begin()); + buf_last_ = it - buf_.begin(); + buf_index_ = buf_last_ - 1; + to_increment_ = std::distance(curr(), initial); +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + } catch (...) { + buf_ = buf; + curr() = initial; + throw; + } +#endif + } + } + +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename J = I, + typename Enable = std::enable_if_t>> +#endif + constexpr I first() const +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::bidirectional_iterator +#endif + { + return first_and_curr_.first; + } + constexpr I & curr() { return first_and_curr_.curr; } + constexpr I curr() const { return first_and_curr_.curr; } + + std::array(ToFormat)> buf_; + + detail::first_and_curr first_and_curr_; + + uint8_t buf_index_ = 0; + uint8_t buf_last_ = 0; + uint8_t to_increment_ = 0; + + [[no_unique_address]] S last_; + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + format FromFormat2, + format ToFormat2, + std::input_iterator I2, + std::sentinel_for S2, + transcoding_error_handler ErrorHandler2> + requires std::convertible_to, detail::format_to_type_t> +#else + template< + format FromFormat2, + format ToFormat2, + typename I2, + typename S2, + typename ErrorHandler2> +#endif + friend class utf_iterator; + }; + +}} + +namespace boost::parser::detail { namespace text { namespace detail { + + template + constexpr bool is_utf_iter = false; + template< + format FromFormat, + format ToFormat, + class I, + class S, + class ErrorHandler> + constexpr bool + is_utf_iter> = + true; + + // These are here because so many downstream views that use + // utf_iterator use them. + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + + template + constexpr bool common_range_v = std::ranges::common_range; + template + constexpr bool forward_range_v = std::ranges::forward_range; + template + constexpr bool bidirectional_range_v = std::ranges::bidirectional_range; + template + constexpr bool default_initializable_v = std::default_initializable; + + template + constexpr bool utf32_range_v = utf32_range; + +#else + + template + using range_expr = + decltype(detail::begin(std::declval()) == detail::end(std::declval())); + template + constexpr bool is_range_v = is_detected_v; + + template + constexpr bool common_range_v = + is_range_v && std::is_same_v, sentinel_t>; + template + constexpr bool input_range_v = is_range_v && std::is_base_of_v< + std::input_iterator_tag, + typename std::iterator_traits>::iterator_category>; + template + constexpr bool forward_range_v = is_range_v && std::is_base_of_v< + std::forward_iterator_tag, + typename std::iterator_traits>::iterator_category>; + template + constexpr bool bidirectional_range_v = is_range_v && std::is_base_of_v< + std::bidirectional_iterator_tag, + typename std::iterator_traits>::iterator_category>; + template + constexpr bool default_initializable_v = std::is_default_constructible_v; + + template + constexpr bool utf_range_v = is_range_v && code_unit_v>; + + template + constexpr bool + utf32_range_v = is_range_v && + ( +#if !defined(_MSC_VER) + std::is_same_v, wchar_t> || +#endif + std::is_same_v, char32_t>); + +#endif + + template + constexpr bool random_access_iterator_v = std::is_base_of_v< + std::random_access_iterator_tag, + typename std::iterator_traits::iterator_category>; + template + constexpr bool bidirectional_iterator_v = std::is_base_of_v< + std::bidirectional_iterator_tag, + typename std::iterator_traits::iterator_category>; + template + constexpr bool forward_iterator_v = std::is_base_of_v< + std::forward_iterator_tag, + typename std::iterator_traits::iterator_category>; + + template< + class V, + bool StoreFirst = !is_utf_iter> && common_range_v && + bidirectional_range_v, + bool StoreLast = !is_utf_iter>> + struct first_last_storage + { +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename Enable = std::enable_if_t< + default_initializable_v> && + default_initializable_v>>> +#endif + constexpr first_last_storage() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires default_initializable_v> && + default_initializable_v> +#endif + {} + constexpr first_last_storage(V & base) : + first_{detail::begin(base)}, last_{detail::end(base)} + {} + + constexpr auto begin(iterator_t & it) const { return first_; } + constexpr auto end(iterator_t & it) const { return last_; } + + iterator_t first_; + sentinel_t last_; + }; + + template + using trinary_iter_ctor = decltype(I( + std::declval().begin(), + std::declval().end(), + std::declval().end())); + + template + struct first_last_storage + { +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename Enable = + std::enable_if_t>>> +#endif + constexpr first_last_storage() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires default_initializable_v> +#endif + {} + constexpr first_last_storage(V & base) : first_{detail::begin(base)} {} + + constexpr auto begin(iterator_t & it) const { return first_; } + constexpr auto end(iterator_t & it) const { + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires { iterator_t(it.begin(), it.end(), it.end()); } +#else + is_detected_v> +#endif + ) { + return iterator_t(it.begin(), it.end(), it.end()); + } else { + return it.end(); + } + } + + iterator_t first_; + }; + + template + struct first_last_storage + { +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + typename Enable = + std::enable_if_t>>> +#endif + constexpr first_last_storage() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires default_initializable_v> +#endif + {} + constexpr first_last_storage(V & base) : last_{detail::end(base)} {} + + constexpr auto begin(iterator_t & it) const { + if constexpr (is_utf_iter>) { + return iterator_t(it.begin(), it.begin(), it.end()); + } else { + return; + } + } + constexpr auto end(iterator_t & it) const { return last_; } + + sentinel_t last_; + }; + + template + struct first_last_storage + { + constexpr first_last_storage() = default; + constexpr first_last_storage(V & base) {} + + constexpr auto begin(iterator_t & it) const { + if constexpr (is_utf_iter>) { + return iterator_t(it.begin(), it.begin(), it.end()); + } else { + return; + } + } + constexpr auto end(iterator_t & it) const { + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires { iterator_t(it.begin(), it.end(), it.end()); } +#else + is_detected_v> +#endif + ) { + return iterator_t(it.begin(), it.end(), it.end()); + } else { + return it.end(); + } + } + }; + + + template + constexpr auto uc_view_category() { + if constexpr (common_range_v && bidirectional_range_v) { + return std::bidirectional_iterator_tag{}; + } else { + return std::forward_iterator_tag{}; + } + } + + template + using uc_view_category_t = decltype(uc_view_category()); + + template + using maybe_const = std::conditional_t; + + template + constexpr bool is_empty_view = false; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + constexpr bool is_empty_view> = true; +#endif + +}}} + #endif diff --git a/include/boost/parser/detail/text/transcode_iterator_fwd.hpp b/include/boost/parser/detail/text/transcode_iterator_fwd.hpp new file mode 100644 index 00000000..54721d61 --- /dev/null +++ b/include/boost/parser/detail/text/transcode_iterator_fwd.hpp @@ -0,0 +1,101 @@ +// Copyright (C) 2023 T. Zachary Laine +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ITERATOR_FWD_HPP +#define BOOST_PARSER_DETAIL_TEXT_TRANSCODE_ITERATOR_FWD_HPP + +#include + + +namespace boost::parser::detail { namespace text { + + struct use_replacement_character; + + namespace detail { + template< + typename RepackedIterator, + typename I, + typename S, + typename Then> + struct bidi_repacker; + } +}} + +namespace boost::parser::detail { namespace text { + + namespace detail { + template + constexpr auto format_to_type(); + + template + using format_to_type_t = decltype(format_to_type()); + } + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + format FromFormat, + format ToFormat, + std::input_iterator I, + std::sentinel_for S = I, + transcoding_error_handler ErrorHandler = use_replacement_character> + requires std::convertible_to, detail::format_to_type_t> +#else + template< + format FromFormat, + format ToFormat, + typename I, + typename S = I, + typename ErrorHandler = use_replacement_character> +#endif + class utf_iterator; + +#if BOOST_PARSER_DETAIL_TEXT_USE_ALIAS_CTAD + + template< + utf8_iter I, + std::sentinel_for S = I, + transcoding_error_handler ErrorHandler = use_replacement_character> + using utf_8_to_16_iterator = + utf_iterator; + template< + utf16_iter I, + std::sentinel_for S = I, + transcoding_error_handler ErrorHandler = use_replacement_character> + using utf_16_to_8_iterator = + utf_iterator; + + + template< + utf8_iter I, + std::sentinel_for S = I, + transcoding_error_handler ErrorHandler = use_replacement_character> + using utf_8_to_32_iterator = + utf_iterator; + template< + utf32_iter I, + std::sentinel_for S = I, + transcoding_error_handler ErrorHandler = use_replacement_character> + using utf_32_to_8_iterator = + utf_iterator; + + + template< + utf16_iter I, + std::sentinel_for S = I, + transcoding_error_handler ErrorHandler = use_replacement_character> + using utf_16_to_32_iterator = + utf_iterator; + template< + utf32_iter I, + std::sentinel_for S = I, + transcoding_error_handler ErrorHandler = use_replacement_character> + using utf_32_to_16_iterator = + utf_iterator; + +#endif + +}} + +#endif diff --git a/include/boost/parser/detail/text/transcode_view.hpp b/include/boost/parser/detail/text/transcode_view.hpp index a0c0164a..78604ec2 100644 --- a/include/boost/parser/detail/text/transcode_view.hpp +++ b/include/boost/parser/detail/text/transcode_view.hpp @@ -6,671 +6,842 @@ #ifndef BOOST_PARSER_DETAIL_TEXT_TRANSCODE_VIEW_HPP #define BOOST_PARSER_DETAIL_TEXT_TRANSCODE_VIEW_HPP -#include +#include #include -#include -#include -#include +#include #include +#include + +#include namespace boost::parser::detail { namespace text { namespace detail { - - // UTF-8 - template - constexpr auto make_utf8_range_(utf8_tag, Iter f, Sentinel l) - { - return tagged_range{f, l}; - } - template - constexpr auto make_utf8_range_(utf16_tag, Iter f_, Sentinel l) - { - auto f = utf_16_to_8_iterator(f_, f_, l); - return tagged_range{f, l}; - } - template - constexpr auto make_utf8_range_(utf16_tag, Iter f_, Iter l_) - { - auto f = utf_16_to_8_iterator(f_, f_, l_); - auto l = utf_16_to_8_iterator(f_, l_, l_); - return tagged_range{f, l}; - } - template - constexpr auto make_utf8_range_(utf32_tag, Iter f_, Sentinel l) - { - auto f = utf_32_to_8_iterator(f_, f_, l); - return tagged_range{f, l}; - } - template - constexpr auto make_utf8_range_(utf32_tag, Iter f_, Iter l_) + template + constexpr auto iterator_to_tag() { - auto f = utf_32_to_8_iterator(f_, f_, l_); - auto l = utf_32_to_8_iterator(f_, l_, l_); - return tagged_range{f, l}; - } - - // UTF-16 - template - constexpr auto make_utf16_range_(utf8_tag, Iter f_, Sentinel l) - { - auto f = utf_8_to_16_iterator(f_, f_, l); - return tagged_range{f, l}; - } - template - constexpr auto make_utf16_range_(utf8_tag, Iter f_, Iter l_) - { - auto f = utf_8_to_16_iterator(f_, f_, l_); - auto l = utf_8_to_16_iterator(f_, l_, l_); - return tagged_range{f, l}; - } - template - constexpr auto make_utf16_range_(utf16_tag, Iter f, Sentinel l) - { - return tagged_range{f, l}; - } - template - constexpr auto - make_utf16_range_(utf32_tag, Iter f_, Sentinel l) - { - auto f = utf_32_to_16_iterator(f_, f_, l); - return tagged_range{f, l}; - } - template - constexpr auto make_utf16_range_(utf32_tag, Iter f_, Iter l_) - { - auto f = utf_32_to_16_iterator(f_, f_, l_); - auto l = utf_32_to_16_iterator(f_, l_, l_); - return tagged_range{f, l}; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + if constexpr (std::random_access_iterator) { + return std::random_access_iterator_tag{}; + } else if constexpr (std::bidirectional_iterator) { + return std::bidirectional_iterator_tag{}; + } else if constexpr (std::forward_iterator) { +#else + if constexpr (detail::random_access_iterator_v) { + return std::random_access_iterator_tag{}; + } else if constexpr (detail::bidirectional_iterator_v) { + return std::bidirectional_iterator_tag{}; + } else if constexpr (detail::forward_iterator_v) { +#endif + return std::forward_iterator_tag{}; + } else { + return std::input_iterator_tag{}; + } } + template + using iterator_to_tag_t = decltype(iterator_to_tag()); - // UTF-32 - template - constexpr auto make_utf32_range_(utf8_tag, Iter f_, Sentinel l) - { - auto f = utf_8_to_32_iterator(f_, f_, l); - return tagged_range{f, l}; - } - template - constexpr auto make_utf32_range_(utf8_tag, Iter f_, Iter l_) - { - auto f = utf_8_to_32_iterator(f_, f_, l_); - auto l = utf_8_to_32_iterator(f_, l_, l_); - return tagged_range{f, l}; - } - template - constexpr auto - make_utf32_range_(utf16_tag, Iter f_, Sentinel l) - { - auto f = utf_16_to_32_iterator(f_, f_, l); - return tagged_range{f, l}; - } - template - constexpr auto make_utf32_range_(utf16_tag, Iter f_, Iter l_) - { - auto f = utf_16_to_32_iterator(f_, f_, l_); - auto l = utf_16_to_32_iterator(f_, l_, l_); - return tagged_range{f, l}; - } - template - constexpr auto make_utf32_range_(utf32_tag, Iter f, Sentinel l) - { - return tagged_range{f, l}; - } +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + using with_reference = T &; + template + concept can_reference = requires { typename with_reference; }; +#endif - template - constexpr auto - make_iter(Iterator first, Iterator it, Sentinel last) - -> decltype(ResultType(first, it, last)) - { - return ResultType(first, it, last); - } - template - constexpr auto - make_iter(ResultType first, ResultType it, ResultType last) - -> decltype(ResultType(it)) - { - return it; - } - template - constexpr auto - make_iter(ResultType first, ResultType it, Sentinel last) - -> decltype(ResultType(it)) - { - return it; - } - template - constexpr auto - make_iter(Iterator first, ResultType it, ResultType last) - -> decltype(ResultType(it)) - { - return it; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + struct cast_to_charn { + constexpr Char operator()(Char c) const { return c; } + }; +#else + struct cast_to_char8; + struct cast_to_char16; + struct cast_to_char32; + template + auto function_for_tag(Arg arg) + { +#if defined(__cpp_char8_t) + if constexpr (std::is_same_v) { + return (char8_t)arg; + } else +#endif + if constexpr (std::is_same_v) { + return (char16_t)arg; + } else if constexpr (std::is_same_v) { + return (char32_t)arg; + } } +#endif } - /** A view over UTF-8 code units. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template S = I> + template + requires std::ranges::view && + std::regular_invocable> && + detail::can_reference>> #else - template + template // F is a tag type in c++17 #endif - struct utf8_view : parser::detail::stl_interfaces::view_interface> + class project_view : public stl_interfaces::view_interface> { - using iterator = I; - using sentinel = S; + V base_ = V(); - constexpr utf8_view() {} - constexpr utf8_view(iterator first, sentinel last) : - first_(detail::unpack_iterator_and_sentinel(first, last).f_), - last_(detail::unpack_iterator_and_sentinel(first, last).l_) - {} + template + class iterator; + template + class sentinel; - constexpr iterator begin() const - { - return detail::make_iter(first_, first_, last_); - } - constexpr sentinel end() const - { - return detail::make_iter(first_, last_, last_); - } + public: + constexpr project_view() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::default_initializable +#endif + = default; + constexpr explicit project_view(V base) : base_(std::move(base)) {} - friend constexpr bool operator==(utf8_view lhs, utf8_view rhs) - { - return lhs.begin() == rhs.begin() && lhs.end() == rhs.end(); - } - friend constexpr bool operator!=(utf8_view lhs, utf8_view rhs) - { - return !(lhs == rhs); - } + constexpr V& base() & { return base_; } + constexpr const V& base() const& { return base_; } + constexpr V base() && { return std::move(base_); } - /** Stream inserter; performs unformatted output, in UTF-8 - encoding. */ - friend std::ostream & operator<<(std::ostream & os, utf8_view v) - { - auto out = std::ostreambuf_iterator(os); - for (auto it = v.begin(); it != v.end(); ++it, ++out) { - *out = *it; - } - return os; - } -#if defined(BOOST_TEXT_DOXYGEN) || defined(_MSC_VER) - /** Stream inserter; performs unformatted output, in UTF-16 encoding. - Defined on Windows only. */ - friend std::wostream & operator<<(std::wostream & os, utf8_view v) - { - boost::parser::detail::text::transcode_to_utf16( - v.begin(), v.end(), std::ostreambuf_iterator(os)); - return os; - } + constexpr iterator begin() { return iterator{detail::begin(base_)}; } + constexpr iterator begin() const +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::ranges::range #endif + { return iterator{detail::begin(base_)}; } - private: - using iterator_t = decltype(detail::unpack_iterator_and_sentinel( - std::declval(), std::declval()) - .f_); - using sentinel_t = decltype(detail::unpack_iterator_and_sentinel( - std::declval(), std::declval()) - .l_); + constexpr sentinel end() { return sentinel{detail::end(base_)}; } +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + constexpr iterator end() requires std::ranges::common_range + { return iterator{detail::end(base_)}; } +#endif + constexpr sentinel end() const +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::ranges::range + { return sentinel{detail::end(base_)}; } + constexpr iterator end() const + requires std::ranges::common_range +#endif + { return iterator{detail::end(base_)}; } - iterator_t first_; - [[no_unique_address]] sentinel_t last_; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + constexpr auto size() requires std::ranges::sized_range { return std::ranges::size(base_); } + constexpr auto size() const requires std::ranges::sized_range { return std::ranges::size(base_); } +#endif }; - /** A view over UTF-16 code units. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template S = I> + template + requires std::ranges::view && + std::regular_invocable> && + detail::can_reference>> #else - template + template #endif - struct utf16_view : parser::detail::stl_interfaces::view_interface> + template + class project_view::iterator + : public boost::parser::detail::stl_interfaces::proxy_iterator_interface< + iterator, // TODO + detail::iterator_to_tag_t>>, +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::invoke_result_t> +#else + decltype(detail::function_for_tag(0)) +#endif + > { - using iterator = I; - using sentinel = S; + using iterator_type = detail::iterator_t>; + using sentinel_type = detail::sentinel_t>; + using reference_type = +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::invoke_result_t> +#else + decltype(detail::function_for_tag(0)) +#endif + ; + using sentinel = project_view::sentinel; - constexpr utf16_view() {} - constexpr utf16_view(iterator first, sentinel last) : - first_(detail::unpack_iterator_and_sentinel(first, last).f_), - last_(detail::unpack_iterator_and_sentinel(first, last).l_) - {} + friend boost::parser::detail::stl_interfaces::access; + iterator_type & base_reference() noexcept { return it_; } + iterator_type base_reference() const { return it_; } - constexpr iterator begin() const - { - return detail::make_iter(first_, first_, last_); - } - constexpr sentinel end() const - { - return detail::make_iter(first_, last_, last_); - } + iterator_type it_ = iterator_type(); - friend constexpr bool operator==(utf16_view lhs, utf16_view rhs) - { - return lhs.begin() == rhs.begin() && lhs.end() == rhs.end(); - } - friend constexpr bool operator!=(utf16_view lhs, utf16_view rhs) - { - return !(lhs == rhs); - } + friend project_view::sentinel; - /** Stream inserter; performs unformatted output, in UTF-8 - encoding. */ - friend std::ostream & operator<<(std::ostream & os, utf16_view v) - { - boost::parser::detail::text::transcode_to_utf8( - v.begin(), v.end(), std::ostreambuf_iterator(os)); - return os; - } -#if defined(BOOST_TEXT_DOXYGEN) || defined(_MSC_VER) - /** Stream inserter; performs unformatted output, in UTF-16 encoding. - Defined on Windows only. */ - friend std::wostream & operator<<(std::wostream & os, utf16_view v) - { - auto out = std::ostreambuf_iterator(os); - for (auto it = v.begin(); it != v.end(); ++it, ++out) { - *out = *it; - } - return os; - } + template +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::sentinel_for>> +#endif + friend constexpr bool operator==(const iterator & x, + const sentinel & y); + + template +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::sized_sentinel_for>> #endif + friend constexpr detail::range_difference_t> + operator-(const iterator & x, const sentinel & y); - private: - using iterator_t = decltype(detail::unpack_iterator_and_sentinel( - std::declval(), std::declval()) - .f_); - using sentinel_t = decltype(detail::unpack_iterator_and_sentinel( - std::declval(), std::declval()) - .l_); + template +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::sized_sentinel_for>> +#endif + friend constexpr detail::range_difference_t> + operator-(const sentinel & y, const iterator & x); + + public: + constexpr iterator() = default; + constexpr iterator(iterator_type it) : it_(std::move(it)) {} - iterator_t first_; - [[no_unique_address]] sentinel_t last_; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + constexpr reference_type operator*() const { return F(*it_); } +#else + constexpr reference_type operator*() const + { + return detail::function_for_tag(*it_); + } +#endif }; - /** A view over UTF-32 code units. */ #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template S = I> + template + requires std::ranges::view && + std::regular_invocable> && + detail::can_reference>> #else - template + template #endif - struct utf32_view : parser::detail::stl_interfaces::view_interface> + template + class project_view::sentinel { - using iterator = I; - using sentinel = S; + using Base = detail::maybe_const; + using sentinel_type = detail::sentinel_t; - constexpr utf32_view() {} - constexpr utf32_view(iterator first, sentinel last) : - first_(detail::unpack_iterator_and_sentinel(first, last).f_), - last_(detail::unpack_iterator_and_sentinel(first, last).l_) - {} + sentinel_type end_ = sentinel_type(); - constexpr iterator begin() const - { - return detail::make_iter(first_, first_, last_); - } - constexpr sentinel end() const - { - return detail::make_iter(first_, last_, last_); - } + public: + constexpr sentinel() = default; + constexpr explicit sentinel(sentinel_type end) : end_(std::move(end)) {} +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template> +#endif + constexpr sentinel(sentinel i) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires Const && + std::convertible_to, detail::sentinel_t> +#endif + : end_(std::move(i.end_)) + {} - friend constexpr bool operator==(utf32_view lhs, utf32_view rhs) - { - return lhs.begin() == rhs.begin() && lhs.end() == rhs.end(); - } - friend constexpr bool operator!=(utf32_view lhs, utf32_view rhs) - { - return !(lhs == rhs); - } + constexpr sentinel_type base() const { return end_; } - /** Stream inserter; performs unformatted output, in UTF-8 - encoding. */ - friend std::ostream & operator<<(std::ostream & os, utf32_view v) - { - boost::parser::detail::text::transcode_to_utf8( - v.begin(), v.end(), std::ostreambuf_iterator(os)); - return os; - } -#if defined(BOOST_TEXT_DOXYGEN) || defined(_MSC_VER) - /** Stream inserter; performs unformatted output, in UTF-16 encoding. - Defined on Windows only. */ - friend std::wostream & operator<<(std::wostream & os, utf32_view v) - { - boost::parser::detail::text::transcode_to_utf16( - v.begin(), v.end(), std::ostreambuf_iterator(os)); - return os; - } + template +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::sentinel_for>> #endif + friend constexpr bool operator==(const iterator & x, + const sentinel & y) + { return x.it_ == y.end_; } - private: - using iterator_t = decltype(detail::unpack_iterator_and_sentinel( - std::declval(), std::declval()) - .f_); - using sentinel_t = decltype(detail::unpack_iterator_and_sentinel( - std::declval(), std::declval()) - .l_); + template +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::sized_sentinel_for>> +#endif + friend constexpr detail::range_difference_t> + operator-(const iterator & x, const sentinel & y) + { return x.it_ - y.end_; } - iterator_t first_; - [[no_unique_address]] sentinel_t last_; + template +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::sized_sentinel_for>> +#endif + friend constexpr detail::range_difference_t> + operator-(const sentinel & y, const iterator & x) + { return y.end_ - x.it_; } }; -}} - -namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V1 { - - namespace dtl { - template< - typename Impl, - typename Range, - bool Pointer = detail::is_utf_ptr_v>> - struct as_utf8_dispatch - { - static constexpr auto call(Range && r) - -> decltype(Impl{}(detail::begin(r), detail::end(r))) - { - return Impl{}(detail::begin(r), detail::end(r)); - } - }; +#if BOOST_PARSER_DETAIL_TEXT_USE_ALIAS_CTAD + template + project_view(R &&) -> project_view, F>; +#endif - template - struct as_utf8_dispatch + namespace detail { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template +#else + template +#endif + struct project_impl : stl_interfaces::range_adaptor_closure> { - static constexpr auto call(Ptr p) - -> decltype(Impl{}(p, null_sentinel)) - { - return Impl{}(p, null_sentinel); - } - }; + template + using project_view_type = project_view; - struct as_utf8_impl : stl_interfaces::range_adaptor_closure - { - template - constexpr auto operator()(Iter first, Sentinel last) const - { - auto unpacked = - detail::unpack_iterator_and_sentinel(first, last); - auto r = detail::make_utf8_range_( - unpacked.tag_, unpacked.f_, unpacked.l_); - return utf8_view(r.f_, r.l_); - } - - template - constexpr auto operator()(Range && r) const - -> decltype(dtl::as_utf8_dispatch::call( - (Range &&) r)) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::viewable_range && + std::ranges::input_range && + std::regular_invocable> && + detail::can_reference>> +#else + template +#endif + [[nodiscard]] constexpr auto operator()(R && r) const { - return dtl::as_utf8_dispatch::call( - (Range &&) r); +#if BOOST_PARSER_DETAIL_TEXT_USE_ALIAS_CTAD + return project_view_type(std::forward(r)); +#else + return project_view_type(std::forward(r)); +#endif } }; } -#if defined(__cpp_inline_variables) - inline constexpr dtl::as_utf8_impl as_utf8; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template #else - namespace { - constexpr dtl::as_utf8_impl as_utf8; - } + template #endif + constexpr detail::project_impl project; - namespace dtl { - template< - typename Impl, - typename Range, - bool Pointer = detail::is_utf_ptr_v>> - struct as_utf16_dispatch - { - static constexpr auto call(Range && r) - -> decltype(Impl{}(detail::begin(r), detail::end(r))) - { - return Impl{}(detail::begin(r), detail::end(r)); - } - }; +#if BOOST_PARSER_DETAIL_TEXT_USE_ALIAS_CTAD - template - struct as_utf16_dispatch - { - static constexpr auto call(Ptr p) - -> decltype(Impl{}(p, null_sentinel)) - { - return Impl{}(p, null_sentinel); - } - }; - - struct as_utf16_impl : stl_interfaces::range_adaptor_closure - { - template - constexpr auto operator()(Iter first, Sentinel last) const - { - auto unpacked = - detail::unpack_iterator_and_sentinel(first, last); - auto r = detail::make_utf16_range_( - unpacked.tag_, unpacked.f_, unpacked.l_); - return utf16_view(r.f_, r.l_); - } + template + using char8_view = project_view{}>; + template + using char16_view = project_view{}>; + template + using char32_view = project_view{}>; - template - constexpr auto operator()(Range && r) const - -> decltype(dtl::as_utf16_dispatch:: - call((Range &&) r)) - { - return dtl::as_utf16_dispatch::call( - (Range &&) r); - } - }; - } +#else -#if defined(__cpp_inline_variables) - inline constexpr dtl::as_utf16_impl as_utf16; +#if defined(__cpp_char8_t) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::view && std::convertible_to, char8_t> + class char8_view : public project_view{}> #else - namespace { - constexpr dtl::as_utf16_impl as_utf16; - } + template + class char8_view : public project_view +#endif + { + public: + constexpr char8_view() requires std::default_initializable = default; + constexpr char8_view(V base) : +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + project_view{}>{std::move(base)} +#else + project_view{std::move(base)} +#endif + {} + }; +#endif +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::view && std::convertible_to, char16_t> + class char16_view : public project_view{}> +#else + template + class char16_view : public project_view +#endif + { + public: + constexpr char16_view() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::default_initializable +#endif + = default; + constexpr char16_view(V base) : +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + project_view{}>{std::move(base)} +#else + project_view{std::move(base)} +#endif + {} + }; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::view && std::convertible_to, char32_t> + class char32_view : public project_view{}> +#else + template + class char32_view : public project_view +#endif + { + public: + constexpr char32_view() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::default_initializable +#endif + = default; + constexpr char32_view(V base) : +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + project_view{}>{std::move(base)} +#else + project_view{std::move(base)} #endif + {} + }; - namespace dtl { - template< - typename Impl, - typename Range, - bool Pointer = detail::is_utf_ptr_v>> - struct as_utf32_dispatch - { - static constexpr auto call(Range && r) - -> decltype(Impl{}(detail::begin(r), detail::end(r))) - { - return Impl{}(detail::begin(r), detail::end(r)); - } - }; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + char8_view(R &&) -> char8_view>; + template + char16_view(R &&) -> char16_view>; + template + char32_view(R &&) -> char32_view>; +#endif - template - struct as_utf32_dispatch - { - static constexpr auto call(Ptr p) - -> decltype(Impl{}(p, null_sentinel)) - { - return Impl{}(p, null_sentinel); - } - }; +#endif - struct as_utf32_impl : stl_interfaces::range_adaptor_closure + namespace detail { + template class View, format Format> + struct as_charn_impl : stl_interfaces::range_adaptor_closure> { - template - constexpr auto operator()(Iter first, Sentinel last) const - { - auto unpacked = - detail::unpack_iterator_and_sentinel(first, last); - auto r = detail::make_utf32_range_( - unpacked.tag_, unpacked.f_, unpacked.l_); - return utf32_view(r.f_, r.l_); - } - - template - constexpr auto operator()(Range && r) const - -> decltype(dtl::as_utf32_dispatch:: - call((Range &&) r)) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires (std::ranges::viewable_range && + std::ranges::input_range && + std::convertible_to, format_to_type_t>) || + utf_pointer> +#else + template +#endif + [[nodiscard]] constexpr auto operator()(R && r) const { - return dtl::as_utf32_dispatch::call( - (Range &&) r); + using T = remove_cv_ref_t; + if constexpr (detail::is_empty_view) { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + return std::ranges::empty_view>{}; +#else + return 42; // Never gonna happen. +#endif + } else if constexpr (std::is_pointer_v) { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + return View(std::ranges::subrange(r, null_sentinel)); +#else + return View(subrange{r, null_sentinel}); +#endif + } else { + return View(std::forward(r)); + } } }; + + template + constexpr bool is_charn_view = false; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + constexpr bool is_charn_view> = true; +#endif + template + constexpr bool is_charn_view> = true; + template + constexpr bool is_charn_view> = true; } -#if defined(__cpp_inline_variables) - inline constexpr dtl::as_utf32_impl as_utf32; +#if defined(__cpp_char8_t) + inline constexpr detail::as_charn_impl as_char8_t; +#endif + inline constexpr detail::as_charn_impl as_char16_t; + inline constexpr detail::as_charn_impl as_char32_t; + + // clang-format off +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::view && std::ranges::forward_range #else - namespace { - constexpr dtl::as_utf32_impl as_utf32; - } + template #endif + class unpacking_view : public stl_interfaces::view_interface> { + V base_ = V(); -}}} + public: + constexpr unpacking_view() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::default_initializable +#endif + = default; + constexpr unpacking_view(V base) : base_(std::move(base)) {} + constexpr V base() const & #if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::copy_constructible +#endif + { return base_; } + constexpr V base() && { return std::move(base_); } -namespace boost::parser::detail { namespace text { BOOST_PARSER_DETAIL_TEXT_NAMESPACE_V2 { + constexpr auto code_units() const noexcept { + auto unpacked = boost::parser::detail::text::unpack_iterator_and_sentinel(detail::begin(base_), detail::end(base_)); +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + return std::ranges::subrange(unpacked.first, unpacked.last); +#else + return subrange{unpacked.first, unpacked.last}; +#endif + } -#if defined(BOOST_TEXT_DOXYGEN) + constexpr auto begin() { return code_units().begin(); } + constexpr auto begin() const { return code_units().begin(); } - /** Returns a `utf8_view` over the data in `[first, last)`. The view will - transcode the data if necessary. */ - template S> - constexpr detail::unspecified as_utf8(I first, S last); + constexpr auto end() { return code_units().end(); } + constexpr auto end() const { return code_units().end(); } + }; - /** Returns a `utf8_view` over the data in `r`. The view will transcode - the data if necessary. If `std::remove_reference_t` is not a - pointer, the result is returned as a `borrowed_view_t` (C++20 and - later only). */ - template - constexpr detail::unspecified as_utf8(R && r); +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + unpacking_view(R &&) -> unpacking_view>; +#endif + // clang-format on +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::view +#else + template>*/> #endif + class utf_view : public stl_interfaces::view_interface> + { + V base_ = V(); - namespace dtl { - struct as_utf8_impl : stl_interfaces::range_adaptor_closure + template + static constexpr auto make_begin(I first, S last) { - template S> - constexpr auto operator()(I first, S last) const - { - auto unpacked = - detail::unpack_iterator_and_sentinel(first, last); - auto r = detail::make_utf8_range_( - unpacked.tag_, unpacked.f_, unpacked.l_); - return utf8_view(r.f_, r.l_); + if constexpr (detail::bidirectional_iterator_v) { + return utf_iterator{first, first, last}; + } else { + return utf_iterator{first, last}; } - - template - constexpr auto operator()(R && r) const - { - if constexpr (std::is_pointer_v>) - return (*this)(r, null_sentinel); - else if constexpr (std::ranges::borrowed_range) - return (*this)(std::ranges::begin(r), std::ranges::end(r)); - else - return std::ranges::dangling{}; + } + template + static constexpr auto make_end(I first, S last) + { + if constexpr (!std::is_same_v) { + return last; + } else if constexpr (detail::bidirectional_iterator_v) { + return utf_iterator{first, last, last}; + } else { + return utf_iterator{last, last}; } - }; - } - - inline constexpr dtl::as_utf8_impl as_utf8; - -#if defined(BOOST_TEXT_DOXYGEN) - - /** Returns a `utf16_view` over the data in `[first, last)`. The view - will transcode the data if necessary. */ - template S> - constexpr detail::unspecified as_utf16(I first, S last); + } - /** Returns a `utf16_view` over the data in `r` the data if necessary. If - `std::remove_reference_t` is not a pointer, the result is returned - as a `borrowed_view_t` (C++20 and later only). */ - template - constexpr detail::unspecified as_utf16(R && r); + public: + constexpr utf_view() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::default_initializable +#endif + = default; + constexpr utf_view(V base) : base_{std::move(base)} {} + constexpr V base() const & +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::copy_constructible #endif + { return base_; } + constexpr V base() && { return std::move(base_); } - namespace dtl { - struct as_utf16_impl : stl_interfaces::range_adaptor_closure + constexpr auto begin() { - template S> - constexpr auto operator()(I first, S last) const - { - auto unpacked = - detail::unpack_iterator_and_sentinel(first, last); - auto r = detail::make_utf16_range_( - unpacked.tag_, unpacked.f_, unpacked.l_); - return utf16_view(r.f_, r.l_); + constexpr format from_format = detail::format_of>(); + if constexpr(detail::is_charn_view) { + return make_begin(detail::begin(base_.base()), detail::end(base_.base())); + } else { + return make_begin(detail::begin(base_), detail::end(base_)); + } + } + constexpr auto begin() const + { + constexpr format from_format = detail::format_of>(); + if constexpr(detail::is_charn_view) { + return make_begin(detail::begin(base_.base()), detail::end(base_.base())); + } else { + return make_begin(detail::begin(base_), detail::end(base_)); } + } - template - constexpr auto operator()(R && r) const - { - if constexpr (std::is_pointer_v>) - return (*this)(r, null_sentinel); - else if constexpr (std::ranges::borrowed_range) - return (*this)(std::ranges::begin(r), std::ranges::end(r)); - else - return std::ranges::dangling{}; + constexpr auto end() + { + constexpr format from_format = detail::format_of>(); + if constexpr(detail::is_charn_view) { + return make_end(detail::begin(base_.base()), detail::end(base_.base())); + } else { + return make_end(detail::begin(base_), detail::end(base_)); } - }; - } + } + constexpr auto end() const + { + constexpr format from_format = detail::format_of>(); + if constexpr(detail::is_charn_view) { + return make_end(detail::begin(base_.base()), detail::end(base_.base())); + } else { + return make_end(detail::begin(base_), detail::end(base_)); + } + } + + /** Stream inserter; performs unformatted output, in UTF-8 + encoding. */ + friend std::ostream & operator<<(std::ostream & os, utf_view v) + { + if constexpr (Format == format::utf8) { + auto out = std::ostreambuf_iterator(os); + for (auto it = v.begin(); it != v.end(); ++it, ++out) { + *out = *it; + } + } else { + boost::parser::detail::text::transcode_to_utf8( + v.begin(), v.end(), std::ostreambuf_iterator(os)); + } + return os; + } +#if defined(BOOST_TEXT_DOXYGEN) || defined(_MSC_VER) + /** Stream inserter; performs unformatted output, in UTF-16 encoding. + Defined on Windows only. */ + friend std::wostream & operator<<(std::wostream & os, utf_view v) + { + if constexpr (Format == format::utf16) { + auto out = std::ostreambuf_iterator(os); + for (auto it = v.begin(); it != v.end(); ++it, ++out) { + *out = *it; + } + } else { + boost::parser::detail::text::transcode_to_utf16( + v.begin(), v.end(), std::ostreambuf_iterator(os)); + } + return os; + } +#endif + }; + + +#if BOOST_PARSER_DETAIL_TEXT_USE_ALIAS_CTAD + + template + utf_view(R &&) -> utf_view>; + + template + using utf8_view = utf_view; + template + using utf16_view = utf_view; + template + using utf32_view = utf_view; + +#else + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::view +#else + template +#endif + class utf8_view : public utf_view + { + public: + constexpr utf8_view() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::default_initializable +#endif + = default; + constexpr utf8_view(V base) : + utf_view{std::move(base)} + {} + }; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::view +#else + template +#endif + class utf16_view : public utf_view + { + public: + constexpr utf16_view() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::default_initializable +#endif + = default; + constexpr utf16_view(V base) : + utf_view{std::move(base)} + {} + }; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires std::ranges::view +#else + template +#endif + class utf32_view : public utf_view + { + public: + constexpr utf32_view() +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires std::default_initializable +#endif + = default; + constexpr utf32_view(V base) : + utf_view{std::move(base)} + {} + }; + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + utf8_view(R &&) -> utf8_view>; + template + utf16_view(R &&) -> utf16_view>; + template + utf32_view(R &&) -> utf32_view>; +#endif - inline constexpr dtl::as_utf16_impl as_utf16; +#endif #if defined(BOOST_TEXT_DOXYGEN) - /** Returns a `utf32_view` over the data in `[first, last)`. The view - will transcode the data if necessary. */ - template S> - constexpr detail::unspecified as_utf32(I first, S last); + /** A view adaptor that produces a UTF-8 view of the given view. */ + constexpr detail::unspecified as_utf8; - /** Returns a `utf32_view` over the data in `r`. The view will transcode - the data if necessary. If `std::remove_reference_t` is not a - pointer, the result is returned as a `borrowed_view_t` (C++20 and - later only). */ - template - constexpr detail::unspecified as_utf32(R && r); + /** A view adaptor that produces a UTF-16 view of the given view. */ + constexpr detail::unspecified as_utf16; + + /** A view adaptor that produces a UTF-32 view of the given view. */ + constexpr detail::unspecified as_utf32; #endif - namespace dtl { - struct as_utf32_impl : stl_interfaces::range_adaptor_closure - { - template S> - constexpr auto operator()(I first, S last) const - { + namespace detail { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template class View> + concept can_utf_view = requires { View(std::declval()); }; +#else + template + using can_utf_view_expr = decltype(View(std::declval())); + template class View> + constexpr bool can_utf_view = + is_detected_v>; +#endif + + template + constexpr bool is_utf_view = false; + template + constexpr bool is_utf_view> = true; + template + constexpr bool is_utf_view> = true; + template + constexpr bool is_utf_view> = true; + template + constexpr bool is_utf_view> = true; + + template + constexpr bool is_bounded_array_v = false; + template + constexpr bool is_bounded_array_v = true; + + template + constexpr decltype(auto) unpack_range(R && r) + { + using T = detail::remove_cv_ref_t; + if constexpr (forward_range_v) { auto unpacked = - detail::unpack_iterator_and_sentinel(first, last); - auto r = detail::make_utf32_range_( - unpacked.tag_, unpacked.f_, unpacked.l_); - return utf32_view(r.f_, r.l_); + boost::parser::detail::text::unpack_iterator_and_sentinel(detail::begin(r), detail::end(r)); + if constexpr (is_bounded_array_v) { + constexpr auto n = std::extent_v; + if (n && !r[n - 1]) + --unpacked.last; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + return std::ranges::subrange(unpacked.first, unpacked.last); +#else + return subrange{unpacked.first, unpacked.last}; +#endif + } else if constexpr ( + !std::is_same_v> || + !std::is_same_v>) { + return unpacking_view(std::forward(r)); + } else { + return std::forward(r); + } + } else { + return std::forward(r); } + } - template - constexpr auto operator()(R && r) const + template + using unpacked_range = decltype(detail::unpack_range(std::declval())); + + template class View, format Format> + struct as_utf_impl : stl_interfaces::range_adaptor_closure> + { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template + requires is_utf_view> || + (std::ranges::viewable_range && + can_utf_view, View>) || + utf_pointer> +#else + template +#endif + [[nodiscard]] constexpr auto operator()(R && r) const { - if constexpr (std::is_pointer_v>) - return (*this)(r, null_sentinel); - else if constexpr (std::ranges::borrowed_range) - return (*this)(std::ranges::begin(r), std::ranges::end(r)); - else - return std::ranges::dangling{}; + using T = detail::remove_cv_ref_t; + if constexpr (detail::is_empty_view) { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + return std::ranges::empty_view>{}; +#else + return 42; // Never gonna happen. +#endif + } else if constexpr (is_utf_view) { + return View(std::forward(r).base()); + } else if constexpr (detail::is_charn_view) { + return View(std::forward(r)); + } else if constexpr (std::is_pointer_v) { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + return View(std::ranges::subrange(r, null_sentinel)); +#else + return View(subrange{r, null_sentinel}); +#endif + } else { + return View(detail::unpack_range(std::forward(r))); + } } }; + + template + constexpr bool is_utf32_view = false; + template + constexpr bool is_utf32_view> = true; } - inline constexpr dtl::as_utf32_impl as_utf32; -}}} + inline constexpr detail::as_utf_impl as_utf8; + inline constexpr detail::as_utf_impl as_utf16; + inline constexpr detail::as_utf_impl as_utf32; -namespace std::ranges { - template S> - inline constexpr bool enable_borrowed_range> = - true; +}} - template S> - inline constexpr bool enable_borrowed_range> = - true; +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS - template S> - inline constexpr bool enable_borrowed_range> = - true; +namespace std::ranges { + template + inline constexpr bool enable_borrowed_range> = + enable_borrowed_range; + + template + inline constexpr bool enable_borrowed_range> = + enable_borrowed_range; + + template + inline constexpr bool enable_borrowed_range> = + enable_borrowed_range; + +#if !BOOST_PARSER_DETAIL_TEXT_USE_ALIAS_CTAD + template + inline constexpr bool enable_borrowed_range> = + enable_borrowed_range; + template + inline constexpr bool enable_borrowed_range> = + enable_borrowed_range; + template + inline constexpr bool enable_borrowed_range> = + enable_borrowed_range; +#endif } #endif diff --git a/include/boost/parser/detail/text/trie.hpp b/include/boost/parser/detail/text/trie.hpp index 1bed46e5..4b4b0921 100644 --- a/include/boost/parser/detail/text/trie.hpp +++ b/include/boost/parser/detail/text/trie.hpp @@ -6,12 +6,11 @@ #ifndef BOOST_PARSER_DETAIL_TEXT_TRIE_HPP #define BOOST_PARSER_DETAIL_TEXT_TRIE_HPP +#include #include -#include #include #include -#include #include #include diff --git a/include/boost/parser/detail/text/unpack.hpp b/include/boost/parser/detail/text/unpack.hpp new file mode 100644 index 00000000..7f1bd349 --- /dev/null +++ b/include/boost/parser/detail/text/unpack.hpp @@ -0,0 +1,274 @@ +// Copyright (C) 2020 T. Zachary Laine +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_PARSER_DETAIL_TEXT_UNPACK_HPP +#define BOOST_PARSER_DETAIL_TEXT_UNPACK_HPP + +#include + +#include +#include + + +namespace boost::parser::detail { namespace text { + + struct no_op_repacker + { + template + T operator()(T x) const + { + return x; + } + }; + + namespace detail { + // Using this custom template is quite a bit faster than using lambdas. + // Unexpected. + template< + typename RepackedIterator, + typename I, + typename S, + typename Then, + bool Bidi> + struct repacker + { + repacker() = default; +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template> +#endif + repacker(I first, S last, Then then) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires Bidi +#endif + : first{first}, + last{last}, + then{then} + {} +#if !BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template> +#endif + repacker(S last, Then then) +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + requires(!Bidi) +#endif + : + last{last}, then{then} + {} + + auto operator()(I it) const + { + if constexpr (Bidi) { + return then(RepackedIterator(*first, it, last)); + } else { + return then(RepackedIterator(it, last)); + } + } + + std::optional first; + [[no_unique_address]] S last; + [[no_unique_address]] Then then; + }; + + template + constexpr auto + unpack_iterator_and_sentinel_impl(I first, S last, Repack repack); + + template< + format FromFormat, + format ToFormat, + typename I, + typename S, + typename ErrorHandler, + typename Repack> + constexpr auto unpack_iterator_and_sentinel_impl( + utf_iterator first, + utf_iterator last, + Repack repack); + + template< + format FromFormat, + format ToFormat, + typename I, + typename S, + typename ErrorHandler, + typename Repack> + constexpr auto unpack_iterator_and_sentinel_impl( + utf_iterator first, + S last, + Repack repack); + + template + constexpr auto + unpack_iterator_and_sentinel(I first, S last, Repack repack) + { + return detail::unpack_iterator_and_sentinel_impl( + first, last, repack); + } + + struct unpack_iterator_and_sentinel_cpo + { +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template< + utf_iter I, + std::sentinel_for S, + typename Repack = no_op_repacker> + requires std::forward_iterator +#else + template +#endif + constexpr auto + operator()(I first, S last, Repack repack = Repack()) const + { + return unpack_iterator_and_sentinel(first, last, repack); + } + }; + } + + inline namespace cpo { + inline constexpr detail::unpack_iterator_and_sentinel_cpo + unpack_iterator_and_sentinel{}; + } + +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + template S, class Repack> +#else + template +#endif + struct unpack_result + { + static constexpr format format_tag = FormatTag; + + I first; + [[no_unique_address]] S last; + [[no_unique_address]] Repack repack; + }; + + namespace detail { + struct no_such_type + {}; + template + constexpr auto + unpack_iterator_and_sentinel_impl(I first, S last, Repack repack) + { + using value_type = detail::iter_value_t; + if constexpr ( + std::is_same_v +#if defined(__cpp_char8_t) + || std::is_same_v +#endif + ) { + return unpack_result{ + first, last, repack}; + } else if constexpr ( +#if defined(_MSC_VER) + std::is_same_v || +#endif + std::is_same_v) { + return unpack_result{ + first, last, repack}; + } else if constexpr ( +#if !defined(_MSC_VER) + std::is_same_v || +#endif + std::is_same_v) { + return unpack_result{ + first, last, repack}; + } else { + static_assert( + std::is_same_v, + "Unpacked iterator is not a utf_iter!"); + return 0; + } + } + + } +}} + +#include + +namespace boost::parser::detail { namespace text { namespace detail { + + template< + format FromFormat, + format ToFormat, + typename I, + typename S, + typename ErrorHandler, + typename Repack> + constexpr auto unpack_iterator_and_sentinel_impl( + utf_iterator first, + utf_iterator last, + Repack repack) + { + using iterator = utf_iterator; + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::bidirectional_iterator +#else + std::is_base_of_v< + std::bidirectional_iterator_tag, + typename std::iterator_traits::iterator_category> +#endif + ) { + return boost::parser::detail::text::unpack_iterator_and_sentinel( + first.base(), + last.base(), + repacker< + iterator, + decltype(first.begin()), + decltype(first.end()), + Repack, + true>(first.begin(), first.end(), repack)); + } else { + return boost::parser::detail::text::unpack_iterator_and_sentinel( + first.base(), + last.base(), + repacker( + first.end(), repack)); + } + } + + template< + format FromFormat, + format ToFormat, + typename I, + typename S, + typename ErrorHandler, + typename Repack> + constexpr auto unpack_iterator_and_sentinel_impl( + utf_iterator first, + S last, + Repack repack) + { + using iterator = utf_iterator; + if constexpr ( +#if BOOST_PARSER_DETAIL_TEXT_USE_CONCEPTS + std::bidirectional_iterator +#else + std::is_base_of_v< + std::bidirectional_iterator_tag, + typename std::iterator_traits::iterator_category> +#endif + ) { + return boost::parser::detail::text::unpack_iterator_and_sentinel( + first.base(), + last, + repacker< + iterator, + decltype(first.begin()), + decltype(first.end()), + Repack, + true>(first.begin(), first.end(), repack)); + } else { + return boost::parser::detail::text::unpack_iterator_and_sentinel( + first.base(), + last, + repacker(last, repack)); + } + } + +}}} + +#endif diff --git a/include/boost/parser/detail/text/utf.hpp b/include/boost/parser/detail/text/utf.hpp index 18b48beb..8cefb3df 100644 --- a/include/boost/parser/detail/text/utf.hpp +++ b/include/boost/parser/detail/text/utf.hpp @@ -10,6 +10,7 @@ #include #include +#include namespace boost::parser::detail { namespace text { @@ -21,16 +22,23 @@ namespace boost::parser::detail { namespace text { template constexpr format format_of() { - constexpr uint32_t size = sizeof(T); - static_assert(std::is_integral::value, ""); - static_assert(size == 1 || size == 2 || size == 4, ""); - constexpr format formats[] = { - format::utf8, - format::utf8, - format::utf16, - format::utf32, - format::utf32}; - return formats[size]; + if constexpr ( + std::is_same_v +#if defined(__cpp_char8_t) + || std::is_same_v +#endif + ) { + return format::utf8; + } else if ( + std::is_same_v +#ifdef _MSC_VER + || std::is_same_v +#endif + ) { + return format::utf16; + } else { + return format::utf32; + } } } diff --git a/include/boost/parser/error_handling.hpp b/include/boost/parser/error_handling.hpp index 346f4f2b..70444e6d 100644 --- a/include/boost/parser/error_handling.hpp +++ b/include/boost/parser/error_handling.hpp @@ -119,7 +119,7 @@ namespace boost { namespace parser { int64_t preferred_max_line_length, int64_t max_after_caret) { - auto const r = parser::detail::text::as_utf8(filename); + auto const r = filename | parser::detail::text::as_utf8; std::string s(r.begin(), r.end()); return parser::write_formatted_message( os, @@ -167,7 +167,7 @@ namespace boost { namespace parser { int64_t preferred_max_line_length, int64_t max_after_caret) { - auto const r = parser::detail::text::as_utf8(filename); + auto const r = filename | parser::detail::text::as_utf8; std::string s(r.begin(), r.end()); return parser::write_formatted_expectation_failure_error_message( os, s, first, last, e, preferred_max_line_length, max_after_caret); @@ -197,7 +197,7 @@ namespace boost { namespace parser { std::wstring_view filename) : error_(error), warning_(warning) { - auto const r = parser::detail::text::as_utf8(filename); + auto const r = filename | parser::detail::text::as_utf8; filename_.assign(r.begin(), r.end()); } #endif diff --git a/include/boost/parser/error_handling_fwd.hpp b/include/boost/parser/error_handling_fwd.hpp index 2ce6ae93..e8263bb7 100644 --- a/include/boost/parser/error_handling_fwd.hpp +++ b/include/boost/parser/error_handling_fwd.hpp @@ -164,7 +164,7 @@ namespace boost { namespace parser { stream_error_handler(std::wstring_view filename) : err_os_(&std::cout), warn_os_(err_os_) { - auto const r = detail::text::as_utf8(filename); + auto const r = filename | detail::text::as_utf8; filename_.assign(r.begin(), r.end()); } /** This overload is Windows-only. */ @@ -172,7 +172,7 @@ namespace boost { namespace parser { std::wstring_view filename, std::ostream & errors) : err_os_(&errors), warn_os_(&errors) { - auto const r = detail::text::as_utf8(filename); + auto const r = filename | detail::text::as_utf8; filename_.assign(r.begin(), r.end()); } /** This overload is Windows-only. */ @@ -182,7 +182,7 @@ namespace boost { namespace parser { std::ostream & warnings) : err_os_(&errors), warn_os_(&warnings) { - auto const r = detail::text::as_utf8(filename); + auto const r = filename | detail::text::as_utf8; filename_.assign(r.begin(), r.end()); } #endif diff --git a/include/boost/parser/parser.hpp b/include/boost/parser/parser.hpp index ba833f5c..d8ffd3bf 100644 --- a/include/boost/parser/parser.hpp +++ b/include/boost/parser/parser.hpp @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include @@ -879,7 +879,7 @@ namespace boost { namespace parser { a = trie_t{}; trie_t & trie = a.cast(); for (auto const & e : symbol_parser.initial_elements()) { - trie.insert(text::as_utf32(e.first), e.second); + trie.insert(e.first | text::as_utf32, e.second); } return trie; } else { @@ -973,8 +973,8 @@ namespace boost { namespace parser { template struct is_utf8_view : std::false_type {}; - template - struct is_utf8_view> : std::true_type + template + struct is_utf8_view> : std::true_type {}; template @@ -1247,7 +1247,7 @@ namespace boost { namespace parser { return; if constexpr (needs_transcoding_to_utf8) { char32_t cps[1] = {(char32_t)x}; - auto const r = text::as_utf8(cps); + auto const r = cps | text::as_utf8; c.insert(c.end(), r.begin(), r.end()); } else { c.insert(c.end(), std::move(x)); @@ -1272,7 +1272,8 @@ namespace boost { namespace parser { if constexpr (needs_transcoding_to_utf8< Container, iter_value_t>) { - auto const r = text::as_utf8(first, last); + auto const r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + text::as_utf8; c.insert(c.end(), r.begin(), r.end()); } else { c.insert(c.end(), first, last); @@ -1432,7 +1433,7 @@ namespace boost { namespace parser { operator!=(T c_, char_range const & chars) { if (sizeof(c_) == 4) { - auto const cps = text::as_utf32(chars.chars_); + auto const cps = chars.chars_ | text::as_utf32; using element_type = decltype(*cps.begin()); element_type const c = c_; return text::find(cps.begin(), cps.end(), c) == cps.end(); @@ -2105,34 +2106,6 @@ namespace boost { namespace parser { } } - template - text::utf32_view - remove_utf32_terminator(text::utf32_view view) - { - return view; - } - template - text::utf32_view remove_utf32_terminator(text::utf32_view view) - { - if (!view.empty() && view.back() == 0) { - return text::utf32_view( - view.begin(), -#if BOOST_PARSER_USE_CONCEPTS - std::ranges::prev(view.end()) -#else - std::prev(view.end()) -#endif - ); - } - return view; - } - template - auto as_utf32_no_terminator(R & r) - -> decltype(detail::remove_utf32_terminator(text::as_utf32(r))) - { - return detail::remove_utf32_terminator(text::as_utf32(r)); - } - template constexpr auto make_input_subrange(R && r) noexcept { @@ -2142,23 +2115,19 @@ namespace boost { namespace parser { if constexpr (std::is_same_v) { return parser::make_subrange(r, text::null_sentinel); } else { - auto r_ = text::as_utf32(r); - return parser::make_subrange(r_.begin(), r_.end()); + return r | text::as_utf32; } } else { using value_type = range_value_t; if constexpr (std::is_array_v) { - auto first = std::begin(r); - auto last = std::end(r); - static_assert(std::is_pointer_v); - static_assert(std::is_pointer_v); - if (first != last && !*std::prev(last)) - --last; if constexpr (std::is_same_v) { + auto first = std::begin(r); + auto last = std::end(r); + if (first != last && !*std::prev(last)) + --last; return parser::make_subrange(first, last); } else { - auto r_ = text::as_utf32(first, last); - return parser::make_subrange(r_.begin(), r_.end()); + return r | text::as_utf32; } } else { if constexpr ( @@ -2166,9 +2135,7 @@ namespace boost { namespace parser { !is_utf8_view::value) { return parser::make_subrange(std::begin(r), std::end(r)); } else { - auto r_ = detail::remove_utf32_terminator( - text::as_utf32(std::begin(r), std::end(r))); - return parser::make_subrange(r_.begin(), r_.end()); + return r | text::as_utf32; } } } @@ -3885,7 +3852,7 @@ namespace boost { namespace parser { { parser::detail::text::trie, T> & trie_ = detail::get_trie(context, ref()); - return trie_[parser::detail::text::as_utf32(str)]; + return trie_[str | detail::text::as_utf32]; } /** Inserts an entry consisting of a UTF-8 string `str` to match, and @@ -3896,7 +3863,7 @@ namespace boost { namespace parser { { parser::detail::text::trie, T> & trie_ = detail::get_trie(context, ref()); - trie_.insert(parser::detail::text::as_utf32(str), std::move(x)); + trie_.insert(str | detail::text::as_utf32, std::move(x)); } /** Erases the entry whose UTF-8 match string is `str` from the copy @@ -3906,7 +3873,7 @@ namespace boost { namespace parser { { parser::detail::text::trie, T> & trie_ = detail::get_trie(context, ref()); - trie_.erase(parser::detail::text::as_utf32(str)); + trie_.erase(str | detail::text::as_utf32); } template< @@ -5305,8 +5272,9 @@ namespace boost { namespace parser { } if constexpr (sizeof(*first) == 4) { - auto const cps = parser::detail::text::as_utf32( - expected_first_, expected_last_); + auto const cps = BOOST_PARSER_DETAIL_TEXT_SUBRANGE( + expected_first_, expected_last_) | + detail::text::as_utf32; auto const mismatch = detail::mismatch(first, last, cps.begin(), cps.end()); if (mismatch.second != cps.end()) { @@ -6442,7 +6410,8 @@ namespace boost { namespace parser { first, last, parser, parser.error_handler_, attr); } } else { - auto r = parser::detail::text::as_utf32(first, last); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + detail::text::as_utf32; auto f = r.begin(); auto const l = r.end(); auto _ = detail::scoped_base_assign(first, f); @@ -6542,7 +6511,8 @@ namespace boost { namespace parser { first, last, parser, parser.error_handler_); } } else { - auto r = parser::detail::text::as_utf32(first, last); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + detail::text::as_utf32; auto f = r.begin(); auto const l = r.end(); auto _ = detail::scoped_base_assign(first, f); @@ -6646,7 +6616,8 @@ namespace boost { namespace parser { first, last, parser, skip, parser.error_handler_, attr); } } else { - auto r = parser::detail::text::as_utf32(first, last); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + detail::text::as_utf32; auto f = r.begin(); auto const l = r.end(); auto _ = detail::scoped_base_assign(first, f); @@ -6756,7 +6727,8 @@ namespace boost { namespace parser { first, last, parser, skip, parser.error_handler_); } } else { - auto r = parser::detail::text::as_utf32(first, last); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + detail::text::as_utf32; auto f = r.begin(); auto const l = r.end(); auto _ = detail::scoped_base_assign(first, f); @@ -6814,7 +6786,8 @@ namespace boost { namespace parser { first, last, parser, skip, parser.error_handler_); } } else { - auto r = parser::detail::text::as_utf32(first, last); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + detail::text::as_utf32; auto f = r.begin(); auto const l = r.end(); auto _ = detail::scoped_base_assign(first, f); @@ -6870,7 +6843,8 @@ namespace boost { namespace parser { first, last, parser, skip, parser.error_handler_); } } else { - auto r = parser::detail::text::as_utf32(first, last); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + detail::text::as_utf32; auto f = r.begin(); auto const l = r.end(); auto _ = detail::scoped_base_assign(first, f); @@ -7069,7 +7043,8 @@ namespace boost { namespace parser { first, last, parser, parser.error_handler_, callbacks); } } else { - auto r = parser::detail::text::as_utf32(first, last); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + detail::text::as_utf32; auto f = r.begin(); auto const l = r.end(); auto _ = detail::scoped_base_assign(first, f); @@ -7194,7 +7169,8 @@ namespace boost { namespace parser { callbacks); } } else { - auto r = parser::detail::text::as_utf32(first, last); + auto r = BOOST_PARSER_DETAIL_TEXT_SUBRANGE(first, last) | + detail::text::as_utf32; auto f = r.begin(); auto const l = r.end(); auto _ = detail::scoped_base_assign(first, f);