From e5ca7e66ed6df19de8d9025a766297a854eb70f4 Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Sun, 9 Jul 2023 18:34:13 +0200 Subject: [PATCH 01/11] Unify/fix conversion between UTF-8 and wide strings across Minetest and IrrlichtMt --- include/irrString.h | 110 +--------------- source/Irrlicht/CMakeLists.txt | 1 + source/Irrlicht/irr_hex.h | 60 +++++++++ source/Irrlicht/irr_string_conv.cpp | 195 ++++++++++++++++++++++++++++ source/Irrlicht/irr_string_conv.h | 74 +++++++++++ 5 files changed, 332 insertions(+), 108 deletions(-) create mode 100644 source/Irrlicht/irr_hex.h create mode 100644 source/Irrlicht/irr_string_conv.cpp create mode 100644 source/Irrlicht/irr_string_conv.h diff --git a/include/irrString.h b/include/irrString.h index d62eeee0a..531ae380a 100644 --- a/include/irrString.h +++ b/include/irrString.h @@ -2,8 +2,7 @@ // This file is part of the "Irrlicht Engine" and the "irrXML" project. // For conditions of distribution and use, see copyright notice in irrlicht.h and irrXML.h -#ifndef __IRR_STRING_H_INCLUDED__ -#define __IRR_STRING_H_INCLUDED__ +#pragma once #include "irrTypes.h" #include @@ -35,8 +34,6 @@ outside the string class for explicit use. // forward declarations template class string; -static size_t multibyteToWString(string& destination, const char* source, u32 sourceSize); -static size_t wStringToMultibyte(string& destination, const wchar_t* source, u32 sourceSize); //! Returns a character converted to lower case static inline u32 locale_lower ( u32 x ) @@ -919,111 +916,8 @@ typedef string stringc; //! Typedef for wide character strings typedef string stringw; -//! Convert multibyte string to wide-character string -/** Wrapper around mbstowcs from standard library, but directly using Irrlicht string class. -What the function does exactly depends on the LC_CTYPE of the current c locale. -\param destination Wide-character string receiving the converted source -\param source multibyte string -\return The number of wide characters written to destination, not including the eventual terminating null character or -1 when conversion failed */ -static inline size_t multibyteToWString(string& destination, const core::string& source) -{ - return multibyteToWString(destination, source.c_str(), (u32)source.size()); -} - -//! Convert multibyte string to wide-character string -/** Wrapper around mbstowcs from standard library, but directly writing to Irrlicht string class. -What the function does exactly depends on the LC_CTYPE of the current c locale. -\param destination Wide-character string receiving the converted source -\param source multibyte string -\return The number of wide characters written to destination, not including the eventual terminating null character or -1 when conversion failed. */ -static inline size_t multibyteToWString(string& destination, const char* source) -{ - const u32 s = source ? (u32)strlen(source) : 0; - return multibyteToWString(destination, source, s); -} - -//! Internally used by the other multibyteToWString functions -static size_t multibyteToWString(string& destination, const char* source, u32 sourceSize) -{ - if ( sourceSize ) - { - destination.str.resize(sourceSize+1); -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable: 4996) // 'mbstowcs': This function or variable may be unsafe. Consider using mbstowcs_s instead. -#endif - const size_t written = mbstowcs(&destination[0], source, (size_t)sourceSize); -#if defined(_MSC_VER) -#pragma warning(pop) -#endif - if ( written != (size_t)-1 ) - { - destination.str.resize(written); - } - else - { - // Likely character which got converted until the invalid character was encountered are in destination now. - // And it seems even 0-terminated, but I found no documentation anywhere that this (the 0-termination) is guaranteed :-( - destination.clear(); - } - return written; - } - else - { - destination.clear(); - return 0; - } -} - -//! Same as multibyteToWString, but the other way around -static inline size_t wStringToMultibyte(string& destination, const core::string& source) -{ - return wStringToMultibyte(destination, source.c_str(), (u32)source.size()); -} - -//! Same as multibyteToWString, but the other way around -static inline size_t wStringToMultibyte(string& destination, const wchar_t* source) -{ - const u32 s = source ? (u32)wcslen(source) : 0; - return wStringToMultibyte(destination, source, s); -} - -//! Same as multibyteToWString, but the other way around -static size_t wStringToMultibyte(string& destination, const wchar_t* source, u32 sourceSize) -{ - if ( sourceSize ) - { - destination.str.resize(sizeof(wchar_t)*sourceSize+1); -#if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable: 4996) // 'wcstombs': This function or variable may be unsafe. Consider using wcstombs_s instead. -#endif - const size_t written = wcstombs(&destination[0], source, destination.size()); -#if defined(_MSC_VER) -#pragma warning(pop) -#endif - if ( written != (size_t)-1 ) - { - destination.str.resize(written); - } - else - { - // Likely character which got converted until the invalid character was encountered are in destination now. - // And it seems even 0-terminated, but I found no documentation anywhere that this (the 0-termination) is guaranteed :-( - destination.clear(); - } - return written; - } - else - { - destination.clear(); - return 0; - } -} - } // end namespace core } // end namespace irr -#endif - +#include "irr_string_conv.h" diff --git a/source/Irrlicht/CMakeLists.txt b/source/Irrlicht/CMakeLists.txt index d404735f3..e5856992d 100644 --- a/source/Irrlicht/CMakeLists.txt +++ b/source/Irrlicht/CMakeLists.txt @@ -473,6 +473,7 @@ add_library(IRROTHEROBJ OBJECT COSOperator.cpp Irrlicht.cpp os.cpp + irr_string_conv.cpp ) if(ENABLE_OPENGL3) diff --git a/source/Irrlicht/irr_hex.h b/source/Irrlicht/irr_hex.h new file mode 100644 index 000000000..708f33024 --- /dev/null +++ b/source/Irrlicht/irr_hex.h @@ -0,0 +1,60 @@ +/* +Minetest +Copyright (C) 2013 Jonathan Neuschäfer + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#pragma once + +#include + +static const char hex_chars[] = "0123456789abcdef"; + +static inline std::string hex_encode(const char *data, unsigned int data_size) +{ + std::string ret; + ret.reserve(data_size * 2); + + char buf2[3]; + buf2[2] = '\0'; + + for (unsigned int i = 0; i < data_size; i++) { + unsigned char c = (unsigned char)data[i]; + buf2[0] = hex_chars[(c & 0xf0) >> 4]; + buf2[1] = hex_chars[c & 0x0f]; + ret.append(buf2); + } + + return ret; +} + +static inline std::string hex_encode(const std::string &data) +{ + return hex_encode(data.c_str(), data.size()); +} + +static inline bool hex_digit_decode(char hexdigit, unsigned char &value) +{ + if (hexdigit >= '0' && hexdigit <= '9') + value = hexdigit - '0'; + else if (hexdigit >= 'A' && hexdigit <= 'F') + value = hexdigit - 'A' + 10; + else if (hexdigit >= 'a' && hexdigit <= 'f') + value = hexdigit - 'a' + 10; + else + return false; + return true; +} diff --git a/source/Irrlicht/irr_string_conv.cpp b/source/Irrlicht/irr_string_conv.cpp new file mode 100644 index 000000000..7f32082f1 --- /dev/null +++ b/source/Irrlicht/irr_string_conv.cpp @@ -0,0 +1,195 @@ +/* +Minetest +Copyright (C) 2010-2013 celeron55, Perttu Ahola + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +// Copyright (C) 2002-2012 Nikolaus Gebhardt +// This file is part of the "Irrlicht Engine". +// For conditions of distribution and use, see copyright notice in irrlicht.h + +#include "irr_string_conv.h" +#include "os.h" +#include "irr_hex.h" + +#ifndef _WIN32 + #include +#else + #include +#endif + +/* + std::string / std::wstring conversion functions. +*/ + +#ifndef _WIN32 + +static bool convert(const char *to, const char *from, char *outbuf, + size_t *outbuf_size, char *inbuf, size_t inbuf_size) +{ + iconv_t cd = iconv_open(to, from); + + char *inbuf_ptr = inbuf; + char *outbuf_ptr = outbuf; + + size_t *inbuf_left_ptr = &inbuf_size; + + const size_t old_outbuf_size = *outbuf_size; + size_t old_size = inbuf_size; + while (inbuf_size > 0) { + iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size); + if (inbuf_size == old_size) { + iconv_close(cd); + return false; + } + old_size = inbuf_size; + } + + iconv_close(cd); + *outbuf_size = old_outbuf_size - *outbuf_size; + return true; +} + +#ifdef __ANDROID__ +// On Android iconv disagrees how big a wchar_t is for whatever reason +const char *DEFAULT_ENCODING = "UTF-32LE"; +#elif defined(__NetBSD__) || defined(__OpenBSD__) + // NetBSD does not allow "WCHAR_T" as a charset input to iconv. + #include + #if BYTE_ORDER == BIG_ENDIAN + const char *DEFAULT_ENCODING = "UTF-32BE"; + #else + const char *DEFAULT_ENCODING = "UTF-32LE"; + #endif +#else +const char *DEFAULT_ENCODING = "WCHAR_T"; +#endif + +std::wstring utf8_to_wide(const std::string &input) +{ + const size_t inbuf_size = input.length(); + // maximum possible size, every character is sizeof(wchar_t) bytes + size_t outbuf_size = input.length() * sizeof(wchar_t); + + char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated + memcpy(inbuf, input.c_str(), inbuf_size); + std::wstring out; + out.resize(outbuf_size / sizeof(wchar_t)); + +#if defined(__ANDROID__) || defined(__NetBSD__) || defined(__OpenBSD__) + static_assert(sizeof(wchar_t) == 4, "Unexpected wide char size"); +#endif + + char *outbuf = reinterpret_cast(&out[0]); + if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) { + irr::os::Printer::log(("Couldn't convert UTF-8 string 0x" + hex_encode(input) + + " into wstring").c_str(), irr::ELL_WARNING); + delete[] inbuf; + return L""; + } + delete[] inbuf; + + out.resize(outbuf_size / sizeof(wchar_t)); + return out; +} + +std::string wide_to_utf8(const std::wstring &input) +{ + const size_t inbuf_size = input.length() * sizeof(wchar_t); + // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes + size_t outbuf_size = input.length() * 4; + + char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated + memcpy(inbuf, input.c_str(), inbuf_size); + std::string out; + out.resize(outbuf_size); + + if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) { + irr::os::Printer::log(("Couldn't convert UTF-8 string 0x" + hex_encode(inbuf, inbuf_size) + + " into wstring").c_str(), irr::ELL_WARNING); + delete[] inbuf; + return ""; + } + delete[] inbuf; + + out.resize(outbuf_size); + return out; +} + +#else // _WIN32 + +std::wstring utf8_to_wide(const std::string &input) +{ + size_t outbuf_size = input.size() + 1; + wchar_t *outbuf = new wchar_t[outbuf_size]; + memset(outbuf, 0, outbuf_size * sizeof(wchar_t)); + MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(), + outbuf, outbuf_size); + std::wstring out(outbuf); + delete[] outbuf; + return out; +} + +std::string wide_to_utf8(const std::wstring &input) +{ + size_t outbuf_size = (input.size() + 1) * 6; + char *outbuf = new char[outbuf_size]; + memset(outbuf, 0, outbuf_size); + WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(), + outbuf, outbuf_size, NULL, NULL); + std::string out(outbuf); + delete[] outbuf; + return out; +} + +#endif // _WIN32 + +/* + irr::core::stringc / irr::core::stringw conversion functions. + Implemented using the std versions. +*/ + +namespace irr +{ +namespace core +{ + +size_t multibyteToWString(irr::core::stringw &destination, const irr::core::stringc &source) +{ + destination = utf8_to_wide(source.c_str()).c_str(); + return destination.size(); +} + +size_t multibyteToWString(irr::core::stringw &destination, const char *source) +{ + destination = utf8_to_wide(source).c_str(); + return destination.size(); +} + +size_t wStringToMultibyte(irr::core::stringc &destination, const irr::core::stringw &source) +{ + destination = wide_to_utf8(source.c_str()).c_str(); + return destination.size(); +} + +size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source) +{ + destination = wide_to_utf8(source).c_str(); + return destination.size(); +} + +} // namespace core +} // namespace irr diff --git a/source/Irrlicht/irr_string_conv.h b/source/Irrlicht/irr_string_conv.h new file mode 100644 index 000000000..3e1990ac7 --- /dev/null +++ b/source/Irrlicht/irr_string_conv.h @@ -0,0 +1,74 @@ +/* +Minetest +Copyright (C) 2010-2013 celeron55, Perttu Ahola + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +// Copyright (C) 2002-2012 Nikolaus Gebhardt +// This file is part of the "Irrlicht Engine". +// For conditions of distribution and use, see copyright notice in irrlicht.h + +#pragma once + +#include +#include "irrString.h" + +// Try to avoid converting between wide and UTF-8 unless you need to +// input/output stuff via Irrlicht. + +/* + std::string / std::wstring conversion functions. +*/ + +std::wstring utf8_to_wide(const std::string &input); +std::string wide_to_utf8(const std::wstring &input); + +/* + irr::core::stringc / irr::core::stringw conversion functions. + Implemented using the std versions. +*/ + +namespace irr +{ +namespace core +{ + +size_t multibyteToWString(irr::core::stringw &destination, const irr::core::stringc &source); +size_t multibyteToWString(irr::core::stringw &destination, const char *source); +size_t wStringToMultibyte(irr::core::stringc &destination, const irr::core::stringw &source); +size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source); + +} +} + +/* + Another two interesting string conversion functions. + std::string / irr::core::stringw this time. +*/ + +//! Create a UTF8 std::string from an irr::core::stringw. +inline std::string stringw_to_utf8(const irr::core::stringw &input) +{ + std::wstring str(input.c_str()); + return wide_to_utf8(str); +} + +//! Create an irr::core:stringw from a UTF8 std::string. +inline irr::core::stringw utf8_to_stringw(const std::string &input) +{ + std::wstring str = utf8_to_wide(input); + return irr::core::stringw(str.c_str()); +} From 5e6ba49d207d9365ffbee43fc26b940d90a49a36 Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Sun, 9 Jul 2023 20:37:42 +0200 Subject: [PATCH 02/11] Cake --- source/Irrlicht/irr_string_conv.cpp | 19 +++++++++++++++++++ source/Irrlicht/irr_string_conv.h | 15 ++------------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/source/Irrlicht/irr_string_conv.cpp b/source/Irrlicht/irr_string_conv.cpp index 7f32082f1..fde721748 100644 --- a/source/Irrlicht/irr_string_conv.cpp +++ b/source/Irrlicht/irr_string_conv.cpp @@ -193,3 +193,22 @@ size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source } // namespace core } // namespace irr + +/* + Another two interesting string conversion functions. + std::string / irr::core::stringw this time. +*/ + +//! Create a UTF8 std::string from an irr::core::stringw. +std::string stringw_to_utf8(const irr::core::stringw &input) +{ + std::wstring str(input.c_str()); + return wide_to_utf8(str); +} + +//! Create an irr::core:stringw from a UTF8 std::string. +irr::core::stringw utf8_to_stringw(const std::string &input) +{ + std::wstring str = utf8_to_wide(input); + return irr::core::stringw(str.c_str()); +} diff --git a/source/Irrlicht/irr_string_conv.h b/source/Irrlicht/irr_string_conv.h index 3e1990ac7..633825133 100644 --- a/source/Irrlicht/irr_string_conv.h +++ b/source/Irrlicht/irr_string_conv.h @@ -59,16 +59,5 @@ size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source std::string / irr::core::stringw this time. */ -//! Create a UTF8 std::string from an irr::core::stringw. -inline std::string stringw_to_utf8(const irr::core::stringw &input) -{ - std::wstring str(input.c_str()); - return wide_to_utf8(str); -} - -//! Create an irr::core:stringw from a UTF8 std::string. -inline irr::core::stringw utf8_to_stringw(const std::string &input) -{ - std::wstring str = utf8_to_wide(input); - return irr::core::stringw(str.c_str()); -} +std::string stringw_to_utf8(const irr::core::stringw &input); +irr::core::stringw utf8_to_stringw(const std::string &input); From b0ad8ceee9b2c55a836242e3485ee3be57e462f5 Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Sun, 9 Jul 2023 20:45:20 +0200 Subject: [PATCH 03/11] More cake --- source/Irrlicht/irr_string_conv.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/Irrlicht/irr_string_conv.cpp b/source/Irrlicht/irr_string_conv.cpp index fde721748..5f9986714 100644 --- a/source/Irrlicht/irr_string_conv.cpp +++ b/source/Irrlicht/irr_string_conv.cpp @@ -118,8 +118,8 @@ std::string wide_to_utf8(const std::wstring &input) out.resize(outbuf_size); if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) { - irr::os::Printer::log(("Couldn't convert UTF-8 string 0x" + hex_encode(inbuf, inbuf_size) + - " into wstring").c_str(), irr::ELL_WARNING); + irr::os::Printer::log(("Couldn't convert wstring 0x" + hex_encode(inbuf, inbuf_size) + + " into UTF-8 string").c_str(), irr::ELL_WARNING); delete[] inbuf; return ""; } From 6f05de9f2cea68770773dd9864b0eb19c311bfae Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Sun, 9 Jul 2023 21:01:55 +0200 Subject: [PATCH 04/11] Even more cake --- source/Irrlicht/irr_string_conv.cpp | 4 ---- source/Irrlicht/irr_string_conv.h | 4 ---- 2 files changed, 8 deletions(-) diff --git a/source/Irrlicht/irr_string_conv.cpp b/source/Irrlicht/irr_string_conv.cpp index 5f9986714..196d04a69 100644 --- a/source/Irrlicht/irr_string_conv.cpp +++ b/source/Irrlicht/irr_string_conv.cpp @@ -17,10 +17,6 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -// Copyright (C) 2002-2012 Nikolaus Gebhardt -// This file is part of the "Irrlicht Engine". -// For conditions of distribution and use, see copyright notice in irrlicht.h - #include "irr_string_conv.h" #include "os.h" #include "irr_hex.h" diff --git a/source/Irrlicht/irr_string_conv.h b/source/Irrlicht/irr_string_conv.h index 633825133..dd5b67957 100644 --- a/source/Irrlicht/irr_string_conv.h +++ b/source/Irrlicht/irr_string_conv.h @@ -17,10 +17,6 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -// Copyright (C) 2002-2012 Nikolaus Gebhardt -// This file is part of the "Irrlicht Engine". -// For conditions of distribution and use, see copyright notice in irrlicht.h - #pragma once #include From 17e0d61413fa552a8c04c0259a1c3373d1f6e280 Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Sun, 9 Jul 2023 21:35:05 +0200 Subject: [PATCH 05/11] A different approach Co-authored-by: sfan5 --- include/irrString.h | 47 +++++-- source/Irrlicht/CMakeLists.txt | 1 - source/Irrlicht/irr_hex.h | 60 -------- source/Irrlicht/irr_string_conv.cpp | 210 ---------------------------- source/Irrlicht/irr_string_conv.h | 59 -------- 5 files changed, 39 insertions(+), 338 deletions(-) delete mode 100644 source/Irrlicht/irr_hex.h delete mode 100644 source/Irrlicht/irr_string_conv.cpp delete mode 100644 source/Irrlicht/irr_string_conv.h diff --git a/include/irrString.h b/include/irrString.h index 531ae380a..576ea2559 100644 --- a/include/irrString.h +++ b/include/irrString.h @@ -2,7 +2,8 @@ // This file is part of the "Irrlicht Engine" and the "irrXML" project. // For conditions of distribution and use, see copyright notice in irrlicht.h and irrXML.h -#pragma once +#ifndef __IRR_STRING_H_INCLUDED__ +#define __IRR_STRING_H_INCLUDED__ #include "irrTypes.h" #include @@ -11,6 +12,9 @@ #include #include +extern std::wstring utf8_to_wide(const std::string &input); +extern std::string wide_to_utf8(const std::wstring &input); + namespace irr { namespace core @@ -35,6 +39,12 @@ outside the string class for explicit use. template class string; +//! Typedef for character strings +typedef string stringc; + +//! Typedef for wide character strings +typedef string stringw; + //! Returns a character converted to lower case static inline u32 locale_lower ( u32 x ) { @@ -856,8 +866,10 @@ class string return ret.size()-oldSize; } - friend size_t multibyteToWString(string& destination, const char* source, u32 sourceSize); - friend size_t wStringToMultibyte(string& destination, const wchar_t* source, u32 sourceSize); + friend size_t multibyteToWString(irr::core::stringw &destination, const irr::core::stringc &source); + friend size_t multibyteToWString(irr::core::stringw &destination, const char *source); + friend size_t wStringToMultibyte(irr::core::stringc &destination, const irr::core::stringw &source); + friend size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source); private: @@ -910,14 +922,33 @@ class string }; -//! Typedef for character strings -typedef string stringc; +inline size_t multibyteToWString(irr::core::stringw &destination, const irr::core::stringc &source) +{ + destination = utf8_to_wide(source.str); + return destination.size(); +} -//! Typedef for wide character strings -typedef string stringw; +inline size_t multibyteToWString(irr::core::stringw &destination, const char *source) +{ + destination = utf8_to_wide(source); + return destination.size(); +} + +inline size_t wStringToMultibyte(irr::core::stringc &destination, const irr::core::stringw &source) +{ + destination = wide_to_utf8(source.str); + return destination.size(); +} + +inline size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source) +{ + destination = wide_to_utf8(source); + return destination.size(); +} } // end namespace core } // end namespace irr -#include "irr_string_conv.h" +#endif + diff --git a/source/Irrlicht/CMakeLists.txt b/source/Irrlicht/CMakeLists.txt index e5856992d..d404735f3 100644 --- a/source/Irrlicht/CMakeLists.txt +++ b/source/Irrlicht/CMakeLists.txt @@ -473,7 +473,6 @@ add_library(IRROTHEROBJ OBJECT COSOperator.cpp Irrlicht.cpp os.cpp - irr_string_conv.cpp ) if(ENABLE_OPENGL3) diff --git a/source/Irrlicht/irr_hex.h b/source/Irrlicht/irr_hex.h deleted file mode 100644 index 708f33024..000000000 --- a/source/Irrlicht/irr_hex.h +++ /dev/null @@ -1,60 +0,0 @@ -/* -Minetest -Copyright (C) 2013 Jonathan Neuschäfer - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -#pragma once - -#include - -static const char hex_chars[] = "0123456789abcdef"; - -static inline std::string hex_encode(const char *data, unsigned int data_size) -{ - std::string ret; - ret.reserve(data_size * 2); - - char buf2[3]; - buf2[2] = '\0'; - - for (unsigned int i = 0; i < data_size; i++) { - unsigned char c = (unsigned char)data[i]; - buf2[0] = hex_chars[(c & 0xf0) >> 4]; - buf2[1] = hex_chars[c & 0x0f]; - ret.append(buf2); - } - - return ret; -} - -static inline std::string hex_encode(const std::string &data) -{ - return hex_encode(data.c_str(), data.size()); -} - -static inline bool hex_digit_decode(char hexdigit, unsigned char &value) -{ - if (hexdigit >= '0' && hexdigit <= '9') - value = hexdigit - '0'; - else if (hexdigit >= 'A' && hexdigit <= 'F') - value = hexdigit - 'A' + 10; - else if (hexdigit >= 'a' && hexdigit <= 'f') - value = hexdigit - 'a' + 10; - else - return false; - return true; -} diff --git a/source/Irrlicht/irr_string_conv.cpp b/source/Irrlicht/irr_string_conv.cpp deleted file mode 100644 index 196d04a69..000000000 --- a/source/Irrlicht/irr_string_conv.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* -Minetest -Copyright (C) 2010-2013 celeron55, Perttu Ahola - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -#include "irr_string_conv.h" -#include "os.h" -#include "irr_hex.h" - -#ifndef _WIN32 - #include -#else - #include -#endif - -/* - std::string / std::wstring conversion functions. -*/ - -#ifndef _WIN32 - -static bool convert(const char *to, const char *from, char *outbuf, - size_t *outbuf_size, char *inbuf, size_t inbuf_size) -{ - iconv_t cd = iconv_open(to, from); - - char *inbuf_ptr = inbuf; - char *outbuf_ptr = outbuf; - - size_t *inbuf_left_ptr = &inbuf_size; - - const size_t old_outbuf_size = *outbuf_size; - size_t old_size = inbuf_size; - while (inbuf_size > 0) { - iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size); - if (inbuf_size == old_size) { - iconv_close(cd); - return false; - } - old_size = inbuf_size; - } - - iconv_close(cd); - *outbuf_size = old_outbuf_size - *outbuf_size; - return true; -} - -#ifdef __ANDROID__ -// On Android iconv disagrees how big a wchar_t is for whatever reason -const char *DEFAULT_ENCODING = "UTF-32LE"; -#elif defined(__NetBSD__) || defined(__OpenBSD__) - // NetBSD does not allow "WCHAR_T" as a charset input to iconv. - #include - #if BYTE_ORDER == BIG_ENDIAN - const char *DEFAULT_ENCODING = "UTF-32BE"; - #else - const char *DEFAULT_ENCODING = "UTF-32LE"; - #endif -#else -const char *DEFAULT_ENCODING = "WCHAR_T"; -#endif - -std::wstring utf8_to_wide(const std::string &input) -{ - const size_t inbuf_size = input.length(); - // maximum possible size, every character is sizeof(wchar_t) bytes - size_t outbuf_size = input.length() * sizeof(wchar_t); - - char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated - memcpy(inbuf, input.c_str(), inbuf_size); - std::wstring out; - out.resize(outbuf_size / sizeof(wchar_t)); - -#if defined(__ANDROID__) || defined(__NetBSD__) || defined(__OpenBSD__) - static_assert(sizeof(wchar_t) == 4, "Unexpected wide char size"); -#endif - - char *outbuf = reinterpret_cast(&out[0]); - if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) { - irr::os::Printer::log(("Couldn't convert UTF-8 string 0x" + hex_encode(input) + - " into wstring").c_str(), irr::ELL_WARNING); - delete[] inbuf; - return L""; - } - delete[] inbuf; - - out.resize(outbuf_size / sizeof(wchar_t)); - return out; -} - -std::string wide_to_utf8(const std::wstring &input) -{ - const size_t inbuf_size = input.length() * sizeof(wchar_t); - // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes - size_t outbuf_size = input.length() * 4; - - char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated - memcpy(inbuf, input.c_str(), inbuf_size); - std::string out; - out.resize(outbuf_size); - - if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) { - irr::os::Printer::log(("Couldn't convert wstring 0x" + hex_encode(inbuf, inbuf_size) + - " into UTF-8 string").c_str(), irr::ELL_WARNING); - delete[] inbuf; - return ""; - } - delete[] inbuf; - - out.resize(outbuf_size); - return out; -} - -#else // _WIN32 - -std::wstring utf8_to_wide(const std::string &input) -{ - size_t outbuf_size = input.size() + 1; - wchar_t *outbuf = new wchar_t[outbuf_size]; - memset(outbuf, 0, outbuf_size * sizeof(wchar_t)); - MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(), - outbuf, outbuf_size); - std::wstring out(outbuf); - delete[] outbuf; - return out; -} - -std::string wide_to_utf8(const std::wstring &input) -{ - size_t outbuf_size = (input.size() + 1) * 6; - char *outbuf = new char[outbuf_size]; - memset(outbuf, 0, outbuf_size); - WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(), - outbuf, outbuf_size, NULL, NULL); - std::string out(outbuf); - delete[] outbuf; - return out; -} - -#endif // _WIN32 - -/* - irr::core::stringc / irr::core::stringw conversion functions. - Implemented using the std versions. -*/ - -namespace irr -{ -namespace core -{ - -size_t multibyteToWString(irr::core::stringw &destination, const irr::core::stringc &source) -{ - destination = utf8_to_wide(source.c_str()).c_str(); - return destination.size(); -} - -size_t multibyteToWString(irr::core::stringw &destination, const char *source) -{ - destination = utf8_to_wide(source).c_str(); - return destination.size(); -} - -size_t wStringToMultibyte(irr::core::stringc &destination, const irr::core::stringw &source) -{ - destination = wide_to_utf8(source.c_str()).c_str(); - return destination.size(); -} - -size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source) -{ - destination = wide_to_utf8(source).c_str(); - return destination.size(); -} - -} // namespace core -} // namespace irr - -/* - Another two interesting string conversion functions. - std::string / irr::core::stringw this time. -*/ - -//! Create a UTF8 std::string from an irr::core::stringw. -std::string stringw_to_utf8(const irr::core::stringw &input) -{ - std::wstring str(input.c_str()); - return wide_to_utf8(str); -} - -//! Create an irr::core:stringw from a UTF8 std::string. -irr::core::stringw utf8_to_stringw(const std::string &input) -{ - std::wstring str = utf8_to_wide(input); - return irr::core::stringw(str.c_str()); -} diff --git a/source/Irrlicht/irr_string_conv.h b/source/Irrlicht/irr_string_conv.h deleted file mode 100644 index dd5b67957..000000000 --- a/source/Irrlicht/irr_string_conv.h +++ /dev/null @@ -1,59 +0,0 @@ -/* -Minetest -Copyright (C) 2010-2013 celeron55, Perttu Ahola - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -#pragma once - -#include -#include "irrString.h" - -// Try to avoid converting between wide and UTF-8 unless you need to -// input/output stuff via Irrlicht. - -/* - std::string / std::wstring conversion functions. -*/ - -std::wstring utf8_to_wide(const std::string &input); -std::string wide_to_utf8(const std::wstring &input); - -/* - irr::core::stringc / irr::core::stringw conversion functions. - Implemented using the std versions. -*/ - -namespace irr -{ -namespace core -{ - -size_t multibyteToWString(irr::core::stringw &destination, const irr::core::stringc &source); -size_t multibyteToWString(irr::core::stringw &destination, const char *source); -size_t wStringToMultibyte(irr::core::stringc &destination, const irr::core::stringw &source); -size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source); - -} -} - -/* - Another two interesting string conversion functions. - std::string / irr::core::stringw this time. -*/ - -std::string stringw_to_utf8(const irr::core::stringw &input); -irr::core::stringw utf8_to_stringw(const std::string &input); From 2fd0d9805756f9c08a51e57801e695639052f5e7 Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Sun, 9 Jul 2023 21:50:24 +0200 Subject: [PATCH 06/11] "Fix" test compilation --- examples/AutomatedTest/main.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/examples/AutomatedTest/main.cpp b/examples/AutomatedTest/main.cpp index 99fa5a96a..888dae7ca 100644 --- a/examples/AutomatedTest/main.cpp +++ b/examples/AutomatedTest/main.cpp @@ -1,7 +1,21 @@ +#include #include +#include #include #include "exampleHelper.h" +// usually provided by Minetest +std::wstring utf8_to_wide(const std::string &input) { + assert(false); + return L"false"; +} + +// usually provided by Minetest +std::string wide_to_utf8(const std::wstring &input) { + assert(false); + return "false"; +} + using namespace irr; static IrrlichtDevice *device = nullptr; From 32e0e2600ecdb30773b34fa1348bfa5dac3bdd46 Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Wed, 30 Aug 2023 20:42:54 +0200 Subject: [PATCH 07/11] std::wstring_convert --- examples/AutomatedTest/main.cpp | 14 -------------- include/irrString.h | 33 ++++++++++++++++++--------------- 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/examples/AutomatedTest/main.cpp b/examples/AutomatedTest/main.cpp index 888dae7ca..99fa5a96a 100644 --- a/examples/AutomatedTest/main.cpp +++ b/examples/AutomatedTest/main.cpp @@ -1,21 +1,7 @@ -#include #include -#include #include #include "exampleHelper.h" -// usually provided by Minetest -std::wstring utf8_to_wide(const std::string &input) { - assert(false); - return L"false"; -} - -// usually provided by Minetest -std::string wide_to_utf8(const std::wstring &input) { - assert(false); - return "false"; -} - using namespace irr; static IrrlichtDevice *device = nullptr; diff --git a/include/irrString.h b/include/irrString.h index 576ea2559..9164697c1 100644 --- a/include/irrString.h +++ b/include/irrString.h @@ -11,9 +11,8 @@ #include #include #include - -extern std::wstring utf8_to_wide(const std::string &input); -extern std::string wide_to_utf8(const std::wstring &input); +#include +#include namespace irr { @@ -866,10 +865,10 @@ class string return ret.size()-oldSize; } - friend size_t multibyteToWString(irr::core::stringw &destination, const irr::core::stringc &source); - friend size_t multibyteToWString(irr::core::stringw &destination, const char *source); - friend size_t wStringToMultibyte(irr::core::stringc &destination, const irr::core::stringw &source); - friend size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source); + friend size_t multibyteToWString(stringw &destination, const stringc &source); + friend size_t multibyteToWString(stringw &destination, const char *source); + friend size_t wStringToMultibyte(stringc &destination, const stringw &source); + friend size_t wStringToMultibyte(stringc &destination, const wchar_t *source); private: @@ -922,27 +921,31 @@ class string }; -inline size_t multibyteToWString(irr::core::stringw &destination, const irr::core::stringc &source) +inline size_t multibyteToWString(stringw &destination, const stringc &source) { - destination = utf8_to_wide(source.str); + std::wstring_convert> conv; + destination = conv.from_bytes(source.str); return destination.size(); } -inline size_t multibyteToWString(irr::core::stringw &destination, const char *source) +inline size_t multibyteToWString(stringw &destination, const char *source) { - destination = utf8_to_wide(source); + std::wstring_convert> conv; + destination = conv.from_bytes(source); return destination.size(); } -inline size_t wStringToMultibyte(irr::core::stringc &destination, const irr::core::stringw &source) +inline size_t wStringToMultibyte(stringc &destination, const stringw &source) { - destination = wide_to_utf8(source.str); + std::wstring_convert> conv; + destination = conv.to_bytes(source.str); return destination.size(); } -inline size_t wStringToMultibyte(irr::core::stringc &destination, const wchar_t *source) +inline size_t wStringToMultibyte(stringc &destination, const wchar_t *source) { - destination = wide_to_utf8(source); + std::wstring_convert> conv; + destination = conv.to_bytes(source); return destination.size(); } From a3e4bf1289f3fc8d4c0399aad01eb9a3d502656c Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Wed, 30 Aug 2023 20:48:36 +0200 Subject: [PATCH 08/11] Maybe this works? --- include/irrString.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/irrString.h b/include/irrString.h index 9164697c1..24f30d758 100644 --- a/include/irrString.h +++ b/include/irrString.h @@ -14,6 +14,9 @@ #include #include +// silence MSVC warnings +#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING + namespace irr { namespace core From 98d53fbea8387dc211d0c13c8f35f302624ea370 Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Wed, 30 Aug 2023 20:54:45 +0200 Subject: [PATCH 09/11] Next test --- include/irrString.h | 3 --- source/Irrlicht/CMakeLists.txt | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/irrString.h b/include/irrString.h index 24f30d758..9164697c1 100644 --- a/include/irrString.h +++ b/include/irrString.h @@ -14,9 +14,6 @@ #include #include -// silence MSVC warnings -#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING - namespace irr { namespace core diff --git a/source/Irrlicht/CMakeLists.txt b/source/Irrlicht/CMakeLists.txt index d404735f3..451f6bb2b 100644 --- a/source/Irrlicht/CMakeLists.txt +++ b/source/Irrlicht/CMakeLists.txt @@ -50,6 +50,8 @@ elseif(MSVC) if(CMAKE_SIZEOF_VOID_P EQUAL 4) add_compile_options(/arch:SSE) endif() + + add_definitions(-D_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING) endif() # Sanity-check version From 254b507c35bb1e96336c9385f35050672bbaa119 Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Wed, 30 Aug 2023 22:31:32 +0200 Subject: [PATCH 10/11] Maybe like this? --- source/Irrlicht/os.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/source/Irrlicht/os.cpp b/source/Irrlicht/os.cpp index cdd1a26c4..e761f9498 100644 --- a/source/Irrlicht/os.cpp +++ b/source/Irrlicht/os.cpp @@ -16,7 +16,6 @@ #define bswap_16(X) _byteswap_ushort(X) #define bswap_32(X) _byteswap_ulong(X) #define bswap_64(X) _byteswap_uint64(X) - #define localtime _localtime_s #elif defined(_IRR_OSX_PLATFORM_) #include #define bswap_16(X) OSReadSwapInt16(&X,0) From ca7694c3c237bf507d10a02f2f4ca1945016a36b Mon Sep 17 00:00:00 2001 From: Gregor Parzefall Date: Wed, 30 Aug 2023 22:39:28 +0200 Subject: [PATCH 11/11] Add a warning comment --- include/irrString.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/irrString.h b/include/irrString.h index 9164697c1..7cea1bfad 100644 --- a/include/irrString.h +++ b/include/irrString.h @@ -920,6 +920,8 @@ class string stl_type str; }; +// Don't replace std::wstring_convert with mbstowcs / wcstombs. +// See https://github.com/minetest/irrlicht/issues/216. inline size_t multibyteToWString(stringw &destination, const stringc &source) {