Skip to content

Commit

Permalink
Merge pull request #197 from tyler92/fix-buffer-overflow
Browse files Browse the repository at this point in the history
Fix buffer overflow in u8_strlen
  • Loading branch information
tristanpenman authored Oct 21, 2024
2 parents ad1e184 + b7c051f commit cc6ca36
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 31 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ if(valijson_BUILD_TESTS)
tests/test_validator.cpp
tests/test_validator_with_custom_regular_expression_engine.cpp
tests/test_yaml_cpp_adapter.cpp
tests/test_utf8_utils.cpp
)

set(TEST_LIBS gtest gtest_main jsoncpp json11 yamlcpp)
Expand Down
51 changes: 20 additions & 31 deletions include/valijson/utils/utf8_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,50 +14,39 @@
namespace valijson {
namespace utils {

static const uint32_t offsetsFromUTF8[6] = {
0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL
};

/* is c the start of a utf8 sequence? */
inline bool isutf(char c) {
return ((c & 0xC0) != 0x80);
}

/* reads the next utf-8 sequence out of a string, updating an index */
inline uint64_t u8_nextchar(const char *s, uint64_t *i)
inline bool isutf(char c)
{
uint64_t ch = 0;
int sz = 0;

do {
ch <<= 6;
ch += static_cast<unsigned char>(s[(*i)++]);
sz++;
} while (s[*i] && !isutf(s[*i]));
ch -= offsetsFromUTF8[sz-1];

return ch;
return ((c & 0xC0) != 0x80);
}

/* number of characters */
inline uint64_t u8_strlen(const char *s)
{
constexpr auto maxLength = std::numeric_limits<uint64_t>::max();
uint64_t count = 0;
uint64_t i = 0;

while (s[i] != 0 && u8_nextchar(s, &i) != 0) {
if (i == maxLength) {
throwRuntimeError(
"String exceeded maximum size of " +
std::to_string(maxLength) + " bytes.");
while (*s) {
unsigned char p = static_cast<unsigned char>(*s);

size_t seqLen = p < 0x80 ? 1 // 0xxxxxxx: 1-byte (ASCII)
: p < 0xE0 ? 2 // 110xxxxx: 2-byte sequence
: p < 0xF0 ? 3 // 1110xxxx: 3-byte sequence
: p < 0xF8 ? 4 // 11110xxx: 4-byte sequence
: 1; // treat as a single character

for (size_t i = 1; i < seqLen; ++i) {
if (s[i] == 0 || isutf(s[i])) {
seqLen = i;
break;
}
}

s += seqLen;
count++;
}

return count;
}

} // namespace utils
} // namespace valijson
} // namespace utils
} // namespace valijson
51 changes: 51 additions & 0 deletions tests/test_utf8_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#include <gtest/gtest.h>
#include <valijson/utils/utf8_utils.hpp>

class TestUtf8Utils : public testing::Test
{
};

TEST_F(TestUtf8Utils, Utf8StringLength)
{
using valijson::utils::u8_strlen;

EXPECT_EQ(u8_strlen(""), 0);
EXPECT_EQ(u8_strlen("a"), 1);
EXPECT_EQ(u8_strlen("abc"), 3);

// U+0416
EXPECT_EQ(u8_strlen("\xD0\x96"), 1);

// U+0915
EXPECT_EQ(u8_strlen("\xE0\xA4\x95"), 1);

// U+10348
EXPECT_EQ(u8_strlen("\xF0\x90\x8D\x88"), 1);

// U+0915 + U+0416
EXPECT_EQ(u8_strlen("\xE0\xA4\x95\xD0\x96"), 2);

// incomplete U+0416 at the end
EXPECT_EQ(u8_strlen("\xD0"), 1);

// incomplete U+0416 in the middle
EXPECT_EQ(u8_strlen("\320abc"), 4);

// incomplete U+0915 at the end
EXPECT_EQ(u8_strlen("\xE0\xA4"), 1);

// incomplete U+0915 at the end
EXPECT_EQ(u8_strlen("\xE0\244abc"), 4);

// U+DFFF
EXPECT_EQ(u8_strlen("\xED\xBF\xBF"), 1);

// Overlong encoding for U+0000
EXPECT_EQ(u8_strlen("\xC0\x80"), 1);

// U+110000 (out of Unicode range)
EXPECT_EQ(u8_strlen("\xF5\x80\x80\x80"), 1);

// 0xE0 + 0xA4 repeating 9 times
EXPECT_EQ(u8_strlen("\340\244\244\244\244\244\244\244\244\244"), 5);
}

0 comments on commit cc6ca36

Please sign in to comment.