From 945b1f67e3c3fb0fb211560f767e127802493a0e Mon Sep 17 00:00:00 2001 From: Rene Saarsoo Date: Wed, 25 Oct 2023 11:19:54 +0300 Subject: [PATCH] Don't allow number token to be immediately followed by a unicode letter Fixes #651 --- src/lexer/Tokenizer.ts | 2 +- test/behavesLikeMariaDbFormatter.ts | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/lexer/Tokenizer.ts b/src/lexer/Tokenizer.ts index f1a8e6f3a5..ca1374faf3 100644 --- a/src/lexer/Tokenizer.ts +++ b/src/lexer/Tokenizer.ts @@ -46,7 +46,7 @@ export default class Tokenizer { { type: TokenType.NUMBER, regex: - /(?:0x[0-9a-fA-F]+|0b[01]+|(?:-\s*)?[0-9]+(?:\.[0-9]*)?(?:[eE][-+]?[0-9]+(?:\.[0-9]+)?)?)(?!\w)/uy, + /(?:0x[0-9a-fA-F]+|0b[01]+|(?:-\s*)?[0-9]+(?:\.[0-9]*)?(?:[eE][-+]?[0-9]+(?:\.[0-9]+)?)?)(?![\w\p{Alphabetic}])/uy, }, // RESERVED_PHRASE is matched before all other keyword tokens // to e.g. prioritize matching "TIMESTAMP WITH TIME ZONE" phrase over "WITH" clause. diff --git a/test/behavesLikeMariaDbFormatter.ts b/test/behavesLikeMariaDbFormatter.ts index 79c43383a3..9fe47ddd0e 100644 --- a/test/behavesLikeMariaDbFormatter.ts +++ b/test/behavesLikeMariaDbFormatter.ts @@ -51,6 +51,18 @@ export default function behavesLikeMariaDbFormatter(format: FormatFn) { ); }); + // regression test for sql-formatter#651 + it('supports unicode identifiers that start with numbers', () => { + expect(format('SELECT 1ä FROM tbl')).toBe( + dedent` + SELECT + 1ä + FROM + tbl + ` + ); + }); + it('supports @variables', () => { expect(format('SELECT @foo, @some_long.var$with$special.chars')).toBe(dedent` SELECT