From 895bd71fa058c52a1236b22693ecef9199de9c76 Mon Sep 17 00:00:00 2001 From: Turiiya <34311583+ttytm@users.noreply.github.com> Date: Sat, 29 Jun 2024 10:10:10 +0200 Subject: [PATCH] builtin: improve snake to camel case conversion (#21755) --- vlib/builtin/string.v | 69 +++++++++++++++++--------------------- vlib/builtin/string_test.v | 11 ++++-- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 1ae24f61cf2a23..3ef2cd6e63b464 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -2651,56 +2651,49 @@ pub fn (s string) camel_to_snake() string { return s.to_lower() } mut b := unsafe { malloc_noscan(2 * s.len + 1) } + // Rather than checking whether the iterator variable is > 1 inside the loop, + // handle the first two chars separately to reduce load. + mut pos := 2 mut prev_is_upper := false - first_char, second_char := if s[0].is_capital() { - lower_first_c := s[0] + 32 - lower_second_c := if s[1].is_capital() { - prev_is_upper = true - s[1] + 32 - } else { - s[1] - } - lower_first_c, lower_second_c - } else { - first_c := s[0] - second_c := if s[1].is_capital() { - if first_c == `_` { s[1] + 32 } else { u8(`_`) } + unsafe { + if s[0].is_capital() { + b[0] = s[0] + 32 + b[1] = if s[1].is_capital() { + prev_is_upper = true + s[1] + 32 + } else { + s[1] + } } else { - s[1] + b[0] = s[0] + if s[1].is_capital() { + prev_is_upper = true + if s[0] != `_` && s.len > 2 && !s[2].is_capital() { + b[1] = `_` + b[2] = s[1] + 32 + pos = 3 + } else { + b[1] = s[1] + 32 + } + } else { + b[1] = s[1] + } } - first_c, second_c } - unsafe { - b[0] = first_char - b[1] = second_char - } - mut pos := 2 - mut prev_char := second_char - mut lower_c := `_` - mut c_is_upper := false - for i in pos .. s.len { + for i := 2; i < s.len; i++ { c := s[i] - c_is_upper = c.is_capital() - lower_c = if c_is_upper { c + 32 } else { c } - if !prev_is_upper && c_is_upper { - // aB => a_b, if prev has `_`, then do not add `_` + c_is_upper := c.is_capital() + // Cases: `aBcd == a_bcd` || `ABcd == ab_cd` + if ((c_is_upper && !prev_is_upper) + || (!c_is_upper && prev_is_upper && s[i - 2].is_capital())) && c != `_` { unsafe { if b[pos - 1] != `_` { b[pos] = `_` pos++ } } - } else if prev_is_upper && !c_is_upper && c != `_` { - // Ba => _ba, if prev has `_`, then do not add `_` - unsafe { - if b[pos - 2] != `_` { - prev_char = b[pos - 1] - b[pos - 1] = `_` - b[pos] = prev_char - pos++ - } - } } + lower_c := if c_is_upper { c + 32 } else { c } unsafe { b[pos] = lower_c } diff --git a/vlib/builtin/string_test.v b/vlib/builtin/string_test.v index b9cf2dcf912c77..95b7003392e555 100644 --- a/vlib/builtin/string_test.v +++ b/vlib/builtin/string_test.v @@ -1524,12 +1524,19 @@ fn test_contains_byte() { fn test_camel_to_snake() { assert 'Abcd'.camel_to_snake() == 'abcd' + assert 'aBcd'.camel_to_snake() == 'a_bcd' + assert 'AAbb'.camel_to_snake() == 'aa_bb' assert 'aaBB'.camel_to_snake() == 'aa_bb' assert 'aaBbCcDD'.camel_to_snake() == 'aa_bb_cc_dd' - assert 'BBaa'.camel_to_snake() == 'b_baa' + assert 'AAbbCC'.camel_to_snake() == 'aa_bb_cc' + assert 'aaBBcc'.camel_to_snake() == 'aa_bb_cc' assert 'aa_BB'.camel_to_snake() == 'aa_bb' + assert 'aa__BB'.camel_to_snake() == 'aa__bb' assert 'JVM_PUBLIC_ACC'.camel_to_snake() == 'jvm_public_acc' - assert '_ISspace'.camel_to_snake() == '_i_sspace' + assert '_ISspace'.camel_to_snake() == '_is_space' + assert '_aBcd'.camel_to_snake() == '_a_bcd' + assert '_a_Bcd'.camel_to_snake() == '_a_bcd' + assert '_AbCDe_'.camel_to_snake() == '_ab_cd_e_' } fn test_snake_to_camel() {