Skip to content
This repository has been archived by the owner on May 13, 2024. It is now read-only.

Fix UTF-8 / wide string conversion functions #221

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 30 additions & 100 deletions include/irrString.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <cstdio>
#include <cstring>
#include <cwchar>
#include <locale>
#include <codecvt>

namespace irr
{
Expand All @@ -35,8 +37,12 @@ outside the string class for explicit use.
// forward declarations
template <typename T>
class string;
static size_t multibyteToWString(string<wchar_t>& destination, const char* source, u32 sourceSize);
static size_t wStringToMultibyte(string<c8>& destination, const wchar_t* source, u32 sourceSize);

//! Typedef for character strings
typedef string<c8> stringc;

//! Typedef for wide character strings
typedef string<wchar_t> stringw;

//! Returns a character converted to lower case
static inline u32 locale_lower ( u32 x )
Expand Down Expand Up @@ -859,8 +865,10 @@ class string
return ret.size()-oldSize;
}

friend size_t multibyteToWString(string<wchar_t>& destination, const char* source, u32 sourceSize);
friend size_t wStringToMultibyte(string<c8>& destination, const wchar_t* source, u32 sourceSize);
friend size_t multibyteToWString(stringw &destination, const stringc &source);
friend size_t multibyteToWString(stringw &destination, const char *source);
friend size_t wStringToMultibyte(stringc &destination, const stringw &source);
friend size_t wStringToMultibyte(stringc &destination, const wchar_t *source);

private:

Expand Down Expand Up @@ -912,113 +920,35 @@ class string
stl_type str;
};

// Don't replace std::wstring_convert with mbstowcs / wcstombs.
// See https://github.com/minetest/irrlicht/issues/216.

//! Typedef for character strings
typedef string<c8> stringc;

//! Typedef for wide character strings
typedef string<wchar_t> stringw;

//! Convert multibyte string to wide-character string
/** Wrapper around mbstowcs from standard library, but directly using Irrlicht string class.
What the function does exactly depends on the LC_CTYPE of the current c locale.
\param destination Wide-character string receiving the converted source
\param source multibyte string
\return The number of wide characters written to destination, not including the eventual terminating null character or -1 when conversion failed */
static inline size_t multibyteToWString(string<wchar_t>& destination, const core::string<c8>& source)
{
return multibyteToWString(destination, source.c_str(), (u32)source.size());
}

//! Convert multibyte string to wide-character string
/** Wrapper around mbstowcs from standard library, but directly writing to Irrlicht string class.
What the function does exactly depends on the LC_CTYPE of the current c locale.
\param destination Wide-character string receiving the converted source
\param source multibyte string
\return The number of wide characters written to destination, not including the eventual terminating null character or -1 when conversion failed. */
static inline size_t multibyteToWString(string<wchar_t>& destination, const char* source)
inline size_t multibyteToWString(stringw &destination, const stringc &source)
{
const u32 s = source ? (u32)strlen(source) : 0;
return multibyteToWString(destination, source, s);
}

//! Internally used by the other multibyteToWString functions
static size_t multibyteToWString(string<wchar_t>& destination, const char* source, u32 sourceSize)
{
if ( sourceSize )
{
destination.str.resize(sourceSize+1);
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable: 4996) // 'mbstowcs': This function or variable may be unsafe. Consider using mbstowcs_s instead.
#endif
const size_t written = mbstowcs(&destination[0], source, (size_t)sourceSize);
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
if ( written != (size_t)-1 )
{
destination.str.resize(written);
}
else
{
// Likely character which got converted until the invalid character was encountered are in destination now.
// And it seems even 0-terminated, but I found no documentation anywhere that this (the 0-termination) is guaranteed :-(
destination.clear();
}
return written;
}
else
{
destination.clear();
return 0;
}
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
destination = conv.from_bytes(source.str);
return destination.size();
}

//! Same as multibyteToWString, but the other way around
static inline size_t wStringToMultibyte(string<c8>& destination, const core::string<wchar_t>& source)
inline size_t multibyteToWString(stringw &destination, const char *source)
{
return wStringToMultibyte(destination, source.c_str(), (u32)source.size());
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
destination = conv.from_bytes(source);
return destination.size();
}

//! Same as multibyteToWString, but the other way around
static inline size_t wStringToMultibyte(string<c8>& destination, const wchar_t* source)
inline size_t wStringToMultibyte(stringc &destination, const stringw &source)
{
const u32 s = source ? (u32)wcslen(source) : 0;
return wStringToMultibyte(destination, source, s);
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
destination = conv.to_bytes(source.str);
return destination.size();
}

//! Same as multibyteToWString, but the other way around
static size_t wStringToMultibyte(string<c8>& destination, const wchar_t* source, u32 sourceSize)
inline size_t wStringToMultibyte(stringc &destination, const wchar_t *source)
{
if ( sourceSize )
{
destination.str.resize(sizeof(wchar_t)*sourceSize+1);
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable: 4996) // 'wcstombs': This function or variable may be unsafe. Consider using wcstombs_s instead.
#endif
const size_t written = wcstombs(&destination[0], source, destination.size());
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
if ( written != (size_t)-1 )
{
destination.str.resize(written);
}
else
{
// Likely character which got converted until the invalid character was encountered are in destination now.
// And it seems even 0-terminated, but I found no documentation anywhere that this (the 0-termination) is guaranteed :-(
destination.clear();
}
return written;
}
else
{
destination.clear();
return 0;
}
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
destination = conv.to_bytes(source);
return destination.size();
}


Expand Down
2 changes: 2 additions & 0 deletions source/Irrlicht/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ elseif(MSVC)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
add_compile_options(/arch:SSE)
endif()

add_definitions(-D_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING)
endif()

# Sanity-check version
Expand Down
1 change: 0 additions & 1 deletion source/Irrlicht/os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#define bswap_16(X) _byteswap_ushort(X)
#define bswap_32(X) _byteswap_ulong(X)
#define bswap_64(X) _byteswap_uint64(X)
#define localtime _localtime_s
#elif defined(_IRR_OSX_PLATFORM_)
#include <libkern/OSByteOrder.h>
#define bswap_16(X) OSReadSwapInt16(&X,0)
Expand Down