From 4f139187379913888e8e87d68b69b1362a9c1ab5 Mon Sep 17 00:00:00 2001 From: "yngwe@fry" Date: Sun, 22 Jan 2023 13:14:00 +0100 Subject: [PATCH] Implemented counter-example support in Sly. When conflicts are encountered, Sly will output examples of sequences of symbols and how the parser could interpret them. A couple of examples are provided and a small explanation is added in the documentation. --- CHANGES | 3 + docs/sly.rst | 108 ++ example/conflict/c.py | 1862 +++++++++++++++++++++++++++++++++ example/conflict/calc.py | 89 ++ example/conflict/decl_expr.py | 68 ++ example/conflict/ifelse.py | 74 ++ sly/yacc.py | 487 ++++++++- 7 files changed, 2649 insertions(+), 42 deletions(-) create mode 100644 example/conflict/c.py create mode 100644 example/conflict/calc.py create mode 100644 example/conflict/decl_expr.py create mode 100644 example/conflict/ifelse.py diff --git a/CHANGES b/CHANGES index f1846ea..2bcb9f3 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,9 @@ In Progress ----------- +06/26/2021 Experimental support for counterexamples. SLY will now give + counterexamples for shift/reduce and reduce/reduce conflicts. + 05/09/2020 Experimental support for EBNF choices. For example: @('term { PLUS|MINUS term }') diff --git a/docs/sly.rst b/docs/sly.rst index 5382c7d..7b9c21c 100644 --- a/docs/sly.rst +++ b/docs/sly.rst @@ -1180,6 +1180,114 @@ also be stressed that not all shift-reduce conflicts are bad. However, the only way to be sure that they are resolved correctly is to look at the debugging file. +Conflict counterexamples +^^^^^^^^^^^^^^^^^^^^^^^^ + +To help tracking conflicts, SLY generates counterexamples in the debug file. +For each conflict, SLY will generate one or more examples for each +possibility in a shift/reduce or reduce/reduce conflict. The examples are +a sequence of terminals and nonterminals that the grammar could interpret +in two ways. SLY will show the different derivations, showing clearly what +the ambiguities were. The counterexamples are listed at the end of the debug +file and look like this:: + + shift/reduce conflict for ELSE in state 11 resolved as shift + shift using rule if_statement -> IF LPAREN expr RPAREN statement . ELSE statement + ╭╴ + │ IF LPAREN expr RPAREN IF LPAREN expr RPAREN statement ♦ ELSE statement + │ ╰if_statement──────────────────────────────────╯ + │ ╰statement─────────────────────────────────────╯ + │ ╰if_statement────────────────────────────────────────────────────────╯ + │ ╰statement───────────────────────────────────────────────────────────╯ + ╰╴ + + reduce using rule if_statement -> IF LPAREN expr RPAREN statement . + ╭╴ + │ IF LPAREN expr RPAREN IF LPAREN expr RPAREN statement ♦ ELSE statement + │ ╰if_statement───────────────────╯ + │ ╰statement──────────────────────╯ + │ ╰if_statement────────────────────────────────────────────────────────╯ + │ ╰statement───────────────────────────────────────────────────────────╯ + ╰╴ + +For each counterexample, the display starts with the list of symbols that cause +an ambiguity. The diamond shows the current location of the parser, and the +symbol following the diamond is the lookahead. +The lines below the symol sequence show the possible reductions according to the +grammar rules. The problem displayed here is the `dangling else +`_ issue; +the first counterexample shows the reduction sequence if the shift path is taken; +the ``ELSE`` is attached to the rightmost ``if_statement``. In the second example, +SLY shows that another interpretation could be to reduce the second +``if_statement`` early and attach the ``ELSE`` sequence to the leftmost +``if_statement`` instead. + +Here is an example of a reduce/reduce conflict that occurs in the C language:: + + reduce/reduce conflict for ) in state 21 resolved using rule expr -> IDENTIFIER + rejected rule (declarator -> IDENTIFIER) in state 21 + reduce using expr -> IDENTIFIER with lookahead ) + ╭╴ + │ TYPENAME ( IDENTIFIER ♦ ) + │ ╰expr──────╯ + │ ╰expr───────────────────╯ + ╰╴ + + reduce using declarator -> IDENTIFIER with lookahead ) + ╭╴ + │ TYPENAME ( IDENTIFIER ♦ ) ; + │ ╰declarator╯ + │ ╰declarator────╯ + │ ╰decl─────────────────────╯ + ╰╴ + +In the same way as the shift/reduce conflict, SLY shows here the two ways of +understanding the sequence. It will always backtrack far enough to find the lookahead +symbol after a reduction. + +Sometimes, it can be hard to understand why SLY encounters a conflict in the +first place. Consider this example taken from the C11 grammar:: + + shift/reduce conflict for [ in state 561 resolved as shift + shift using rule attribute_specifier -> . [ [ attribute_list ] ] + ╭╴ + │ identifier ♦ [ [ attribute_list ] ] + │ ╰attribute_specifier───╯ + │ ╰attribute_specifier_sequence╯ + │ ╰direct_declarator──────────────────────╯ + ╰╴ + + reduce using rule direct_declarator -> identifier . + ╭╴ + │ identifier ♦ [ ] + │ ╰direct_declarator╯ + │ ╰array_declarator─────╯ + ╰╴ + +Here, the two sequences are not ambiguous when considered in their entirety; +it is clear that the symbol following the first ``[`` will determine if the +input was supposed to be interpreted as an ``array_declarator`` or as a +``direct_declarator`` including an ``attribute_specifier``. While the grammar +is not ambiguous in this example, it is not LR(1) (in this case, LR(2) would +have removed the conflict) and the state machine does not have enough information +at the time it encounters the lookahead symbol to unambiguously determine what +to do next. + +Viewing the state machine +^^^^^^^^^^^^^^^^^^^^^^^^^ + +SLY can save the state machine in a file in the DOT format that can be used with +graphing tools such as `graphviz `_ or even viewed online +in `Edotor `_. The graph can help visualize how the state +machine is built. Such graphs can become very big for big grammars and are not +always practical, but building such a graph for a small grammar. + +In order to generate such a file, add a ``dotfile`` attribute to your +class like this:: + + class CalcParser(Parser): + dotfile = 'parser.gv' + Syntax Error Handling ^^^^^^^^^^^^^^^^^^^^^ diff --git a/example/conflict/c.py b/example/conflict/c.py new file mode 100644 index 0000000..38af8a4 --- /dev/null +++ b/example/conflict/c.py @@ -0,0 +1,1862 @@ +# ----------------------------------------------------------------------------- +# c.py +# Sly version of the grammar extracted from the C 2x standard +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, '../..') + +import sly + +class CLexer(sly.Lexer): + tokens = ( + "character-constant", + "floating-constant", + "identifier", + "integer-constant", + "|=", + "^=", + "&=", + ">>=", + "<<=", + "-=", + "+=", + "%=", + "/=", + "*=", + "...", + "::", + "||", + "&&", + "!=", + "==", + ">=", + "<=", + ">>", + "<<", + "--", + "++", + "->", + "string-literal", + "enumeration-constant", + "auto", + "break", + "case", + "char", + "const", + "continue", + "default", + "do", + "double", + "else", + "enum", + "extern", + "float", + "for", + "goto", + "if", + "inline", + "int", + "long", + "register", + "restrict", + "return", + "short", + "signed", + "sizeof", + "static", + "struct", + "switch", + "typedef", + "union", + "unsigned", + "void", + "volatile", + "while", + "_Alignas", + "_Alignof", + "_Atomic", + "_Bool", + "_Complex", + "_Decimal128", + "_Decimal32", + "_Decimal64", + "_Generic", + "_Imaginary", + "_Noreturn", + "_Static_assert", + "_Thread_local", + ) + literals = { + ",", + "=", + ";", + ":", + "^", + "|", + ">", + "<", + "%", + "/", + "!", + "~", + "-", + "&", + ".", + ")", + "(", + "+", + "*", + "}", + "{", + "]", + "[", + "?", + } + +class CParser(sly.Parser): + tokens = CLexer.tokens + debugfile = 'c.out' + dotfile = 'c.dot' + start = 'translation_unit' + + + @_('AND_expression "&" equality_expression') + def AND_expression(self, p): + pass + + @_('equality_expression') + def AND_expression(self, p): + pass + + @_('pointer direct_abstract_declarator') + def abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator') + def abstract_declarator(self, p): + pass + + @_('pointer') + def abstract_declarator(self, p): + pass + + @_('additive_expression "-" multiplicative_expression') + def additive_expression(self, p): + pass + + @_('additive_expression "+" multiplicative_expression') + def additive_expression(self, p): + pass + + @_('multiplicative_expression') + def additive_expression(self, p): + pass + + @_('_Alignas "(" constant_expression ")"') + def alignment_specifier(self, p): + pass + + @_('_Alignas "(" type_name ")"') + def alignment_specifier(self, p): + pass + + @_('argument_expression_list "," assignment_expression') + def argument_expression_list(self, p): + pass + + @_('assignment_expression') + def argument_expression_list(self, p): + pass + + @_('direct_abstract_declarator "[" "*" "]"') + def array_abstract_declarator(self, p): + pass + + @_('"[" "*" "]"') + def array_abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator "[" type_qualifier_list static assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('"[" type_qualifier_list static assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator "[" static type_qualifier_list assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('"[" static type_qualifier_list assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator "[" static assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('"[" static assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator "[" type_qualifier_list assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('"[" type_qualifier_list assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator "[" assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('"[" assignment_expression "]"') + def array_abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator "[" type_qualifier_list "]"') + def array_abstract_declarator(self, p): + pass + + @_('"[" type_qualifier_list "]"') + def array_abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator "[" "]"') + def array_abstract_declarator(self, p): + pass + + @_('"[" "]"') + def array_abstract_declarator(self, p): + pass + + @_('direct_declarator "[" type_qualifier_list "*" "]"') + def array_declarator(self, p): + pass + + @_('direct_declarator "[" "*" "]"') + def array_declarator(self, p): + pass + + @_('direct_declarator "[" type_qualifier_list static assignment_expression "]"') + def array_declarator(self, p): + pass + + @_('direct_declarator "[" static type_qualifier_list assignment_expression "]"') + def array_declarator(self, p): + pass + + @_('direct_declarator "[" static assignment_expression "]"') + def array_declarator(self, p): + pass + + @_('direct_declarator "[" type_qualifier_list assignment_expression "]"') + def array_declarator(self, p): + pass + + @_('direct_declarator "[" assignment_expression "]"') + def array_declarator(self, p): + pass + + @_('direct_declarator "[" type_qualifier_list "]"') + def array_declarator(self, p): + pass + + @_('direct_declarator "[" "]"') + def array_declarator(self, p): + pass + + @_('unary_expression assignment_operator assignment_expression') + def assignment_expression(self, p): + pass + + @_('conditional_expression') + def assignment_expression(self, p): + pass + + @_('"="') + def assignment_operator(self, p): + pass + + @_('*=') + def assignment_operator(self, p): + pass + + @_('/=') + def assignment_operator(self, p): + pass + + @_('%=') + def assignment_operator(self, p): + pass + + @_('+=') + def assignment_operator(self, p): + pass + + @_('-=') + def assignment_operator(self, p): + pass + + @_('<<=') + def assignment_operator(self, p): + pass + + @_('>>=') + def assignment_operator(self, p): + pass + + @_('&=') + def assignment_operator(self, p): + pass + + @_('^=') + def assignment_operator(self, p): + pass + + @_('|=') + def assignment_operator(self, p): + pass + + @_('_Atomic "(" type_name ")"') + def atomic_type_specifier(self, p): + pass + + @_('attribute_token attribute_argument_clause') + def attribute(self, p): + pass + + @_('attribute_token') + def attribute(self, p): + pass + + @_('"(" balanced_token_sequence ")"') + def attribute_argument_clause(self, p): + pass + + @_('"(" ")"') + def attribute_argument_clause(self, p): + pass + + @_('attribute_specifier_sequence ";"') + def attribute_declaration(self, p): + pass + + @_('attribute_list "," attribute') + def attribute_list(self, p): + pass + + @_('attribute_list ","') + def attribute_list(self, p): + pass + + @_('attribute') + def attribute_list(self, p): + pass + + @_('') + def attribute_list(self, p): + pass + + @_('identifier') + def attribute_prefix(self, p): + pass + + @_('attribute_prefix :: identifier') + def attribute_prefixed_token(self, p): + pass + + @_('"[" "[" attribute_list "]" "]"') + def attribute_specifier(self, p): + pass + + @_('attribute_specifier_sequence attribute_specifier') + def attribute_specifier_sequence(self, p): + pass + + @_('attribute_specifier') + def attribute_specifier_sequence(self, p): + pass + + @_('attribute_prefixed_token') + def attribute_token(self, p): + pass + + @_('standard_attribute') + def attribute_token(self, p): + pass + + @_('string-literal') + def balanced_token(self, p): + pass + + @_('character-constant') + def balanced_token(self, p): + pass + + @_('floating-constant') + def balanced_token(self, p): + pass + + @_('integer-constant') + def balanced_token(self, p): + pass + + @_('identifier') + def balanced_token(self, p): + pass + + @_('","') + def balanced_token(self, p): + pass + + @_('|=') + def balanced_token(self, p): + pass + + @_('^=') + def balanced_token(self, p): + pass + + @_('&=') + def balanced_token(self, p): + pass + + @_('>>=') + def balanced_token(self, p): + pass + + @_('<<=') + def balanced_token(self, p): + pass + + @_('-=') + def balanced_token(self, p): + pass + + @_('+=') + def balanced_token(self, p): + pass + + @_('%=') + def balanced_token(self, p): + pass + + @_('/=') + def balanced_token(self, p): + pass + + @_('*=') + def balanced_token(self, p): + pass + + @_('"="') + def balanced_token(self, p): + pass + + @_('...') + def balanced_token(self, p): + pass + + @_('";"') + def balanced_token(self, p): + pass + + @_('::') + def balanced_token(self, p): + pass + + @_('":"') + def balanced_token(self, p): + pass + + @_('"?"') + def balanced_token(self, p): + pass + + @_('||') + def balanced_token(self, p): + pass + + @_('&&') + def balanced_token(self, p): + pass + + @_('"|"') + def balanced_token(self, p): + pass + + @_('"^"') + def balanced_token(self, p): + pass + + @_('!=') + def balanced_token(self, p): + pass + + @_('==') + def balanced_token(self, p): + pass + + @_('>=') + def balanced_token(self, p): + pass + + @_('<=') + def balanced_token(self, p): + pass + + @_('">"') + def balanced_token(self, p): + pass + + @_('"<"') + def balanced_token(self, p): + pass + + @_('>>') + def balanced_token(self, p): + pass + + @_('<<') + def balanced_token(self, p): + pass + + @_('"%"') + def balanced_token(self, p): + pass + + @_('"/"') + def balanced_token(self, p): + pass + + @_('"!"') + def balanced_token(self, p): + pass + + @_('"~"') + def balanced_token(self, p): + pass + + @_('"-"') + def balanced_token(self, p): + pass + + @_('"+"') + def balanced_token(self, p): + pass + + @_('"*"') + def balanced_token(self, p): + pass + + @_('"&"') + def balanced_token(self, p): + pass + + @_('--') + def balanced_token(self, p): + pass + + @_('++') + def balanced_token(self, p): + pass + + @_('->') + def balanced_token(self, p): + pass + + @_('"."') + def balanced_token(self, p): + pass + + @_('_Thread_local') + def balanced_token(self, p): + pass + + @_('_Static_assert') + def balanced_token(self, p): + pass + + @_('_Noreturn') + def balanced_token(self, p): + pass + + @_('_Imaginary') + def balanced_token(self, p): + pass + + @_('_Generic') + def balanced_token(self, p): + pass + + @_('_Decimal64') + def balanced_token(self, p): + pass + + @_('_Decimal32') + def balanced_token(self, p): + pass + + @_('_Decimal128') + def balanced_token(self, p): + pass + + @_('_Complex') + def balanced_token(self, p): + pass + + @_('_Bool') + def balanced_token(self, p): + pass + + @_('_Atomic') + def balanced_token(self, p): + pass + + @_('_Alignof') + def balanced_token(self, p): + pass + + @_('_Alignas') + def balanced_token(self, p): + pass + + @_('while') + def balanced_token(self, p): + pass + + @_('volatile') + def balanced_token(self, p): + pass + + @_('void') + def balanced_token(self, p): + pass + + @_('unsigned') + def balanced_token(self, p): + pass + + @_('union') + def balanced_token(self, p): + pass + + @_('typedef') + def balanced_token(self, p): + pass + + @_('switch') + def balanced_token(self, p): + pass + + @_('struct') + def balanced_token(self, p): + pass + + @_('static') + def balanced_token(self, p): + pass + + @_('sizeof') + def balanced_token(self, p): + pass + + @_('signed') + def balanced_token(self, p): + pass + + @_('short') + def balanced_token(self, p): + pass + + @_('return') + def balanced_token(self, p): + pass + + @_('restrict') + def balanced_token(self, p): + pass + + @_('register') + def balanced_token(self, p): + pass + + @_('long') + def balanced_token(self, p): + pass + + @_('int') + def balanced_token(self, p): + pass + + @_('inline') + def balanced_token(self, p): + pass + + @_('if') + def balanced_token(self, p): + pass + + @_('goto') + def balanced_token(self, p): + pass + + @_('for') + def balanced_token(self, p): + pass + + @_('float') + def balanced_token(self, p): + pass + + @_('extern') + def balanced_token(self, p): + pass + + @_('enum') + def balanced_token(self, p): + pass + + @_('else') + def balanced_token(self, p): + pass + + @_('double') + def balanced_token(self, p): + pass + + @_('do') + def balanced_token(self, p): + pass + + @_('default') + def balanced_token(self, p): + pass + + @_('continue') + def balanced_token(self, p): + pass + + @_('const') + def balanced_token(self, p): + pass + + @_('char') + def balanced_token(self, p): + pass + + @_('case') + def balanced_token(self, p): + pass + + @_('break') + def balanced_token(self, p): + pass + + @_('auto') + def balanced_token(self, p): + pass + + @_('"{" balanced_token_sequence "}"') + def balanced_token(self, p): + pass + + @_('"{" "}"') + def balanced_token(self, p): + pass + + @_('"[" balanced_token_sequence "]"') + def balanced_token(self, p): + pass + + @_('"[" "]"') + def balanced_token(self, p): + pass + + @_('"(" balanced_token_sequence ")"') + def balanced_token(self, p): + pass + + @_('"(" ")"') + def balanced_token(self, p): + pass + + @_('balanced_token_sequence balanced_token') + def balanced_token_sequence(self, p): + pass + + @_('balanced_token') + def balanced_token_sequence(self, p): + pass + + @_('label') + def block_item(self, p): + pass + + @_('unlabeled_statement') + def block_item(self, p): + pass + + @_('declaration') + def block_item(self, p): + pass + + @_('block_item_list block_item') + def block_item_list(self, p): + pass + + @_('block_item') + def block_item_list(self, p): + pass + + @_('"(" type_name ")" cast_expression') + def cast_expression(self, p): + pass + + @_('unary_expression') + def cast_expression(self, p): + pass + + @_('"{" block_item_list "}"') + def compound_statement(self, p): + pass + + @_('"{" "}"') + def compound_statement(self, p): + pass + + @_('logical_OR_expression "?" expression ":" conditional_expression') + def conditional_expression(self, p): + pass + + @_('logical_OR_expression') + def conditional_expression(self, p): + pass + + @_('conditional_expression') + def constant_expression(self, p): + pass + + @_('static_assert_declaration attribute_declaration') + def declaration(self, p): + pass + + @_('attribute_specifier_sequence declaration_specifiers init_declarator_list ";"') + def declaration(self, p): + pass + + @_('declaration_specifiers init_declarator_list ";"') + def declaration(self, p): + pass + + @_('declaration_specifiers ";"') + def declaration(self, p): + pass + + @_('function_specifier') + def declaration_specifier(self, p): + pass + + @_('type_specifier_qualifier') + def declaration_specifier(self, p): + pass + + @_('storage_class_specifier') + def declaration_specifier(self, p): + pass + + @_('declaration_specifier declaration_specifiers') + def declaration_specifiers(self, p): + pass + + @_('declaration_specifier attribute_specifier_sequence') + def declaration_specifiers(self, p): + pass + + @_('declaration_specifier') + def declaration_specifiers(self, p): + pass + + @_('pointer direct_declarator') + def declarator(self, p): + pass + + @_('direct_declarator') + def declarator(self, p): + pass + + @_('designator_list "="') + def designation(self, p): + pass + + @_('"." identifier') + def designator(self, p): + pass + + @_('"[" constant_expression "]"') + def designator(self, p): + pass + + @_('designator_list designator') + def designator_list(self, p): + pass + + @_('designator') + def designator_list(self, p): + pass + + @_('function_abstract_declarator attribute_specifier_sequence') + def direct_abstract_declarator(self, p): + pass + + @_('function_abstract_declarator') + def direct_abstract_declarator(self, p): + pass + + @_('array_abstract_declarator attribute_specifier_sequence') + def direct_abstract_declarator(self, p): + pass + + @_('array_abstract_declarator') + def direct_abstract_declarator(self, p): + pass + + @_('"(" abstract_declarator ")"') + def direct_abstract_declarator(self, p): + pass + + @_('function_declarator attribute_specifier_sequence') + def direct_declarator(self, p): + pass + + @_('function_declarator') + def direct_declarator(self, p): + pass + + @_('array_declarator attribute_specifier_sequence') + def direct_declarator(self, p): + pass + + @_('array_declarator') + def direct_declarator(self, p): + pass + + @_('"(" declarator ")"') + def direct_declarator(self, p): + pass + + @_('identifier attribute_specifier_sequence') + def direct_declarator(self, p): + pass + + @_('identifier') + def direct_declarator(self, p): + pass + + @_('enum identifier') + def enum_specifier(self, p): + pass + + @_('enum attribute_specifier_sequence identifier "{" enumerator_list "," "}"') + def enum_specifier(self, p): + pass + + @_('enum identifier "{" enumerator_list "," "}"') + def enum_specifier(self, p): + pass + + @_('enum attribute_specifier_sequence "{" enumerator_list "," "}"') + def enum_specifier(self, p): + pass + + @_('enum "{" enumerator_list "," "}"') + def enum_specifier(self, p): + pass + + @_('enum attribute_specifier_sequence identifier "{" enumerator_list "}"') + def enum_specifier(self, p): + pass + + @_('enum identifier "{" enumerator_list "}"') + def enum_specifier(self, p): + pass + + @_('enum attribute_specifier_sequence "{" enumerator_list "}"') + def enum_specifier(self, p): + pass + + @_('enum "{" enumerator_list "}"') + def enum_specifier(self, p): + pass + + @_('enumeration-constant attribute_specifier_sequence "=" constant_expression') + def enumerator(self, p): + pass + + @_('enumeration-constant "=" constant_expression') + def enumerator(self, p): + pass + + @_('enumeration-constant attribute_specifier_sequence') + def enumerator(self, p): + pass + + @_('enumeration-constant') + def enumerator(self, p): + pass + + @_('enumerator_list "," enumerator') + def enumerator_list(self, p): + pass + + @_('enumerator') + def enumerator_list(self, p): + pass + + @_('equality_expression != relational_expression') + def equality_expression(self, p): + pass + + @_('equality_expression == relational_expression') + def equality_expression(self, p): + pass + + @_('relational_expression') + def equality_expression(self, p): + pass + + @_('expression "," assignment_expression') + def expression(self, p): + pass + + @_('assignment_expression') + def expression(self, p): + pass + + @_('attribute_specifier_sequence expression ";"') + def expression_statement(self, p): + pass + + @_('expression ";"') + def expression_statement(self, p): + pass + + @_('";"') + def expression_statement(self, p): + pass + + @_('declaration') + def external_declaration(self, p): + pass + + @_('function_definition') + def external_declaration(self, p): + pass + + @_('direct_abstract_declarator "(" parameter_type_list ")"') + def function_abstract_declarator(self, p): + pass + + @_('"(" parameter_type_list ")"') + def function_abstract_declarator(self, p): + pass + + @_('direct_abstract_declarator "(" ")"') + def function_abstract_declarator(self, p): + pass + + @_('"(" ")"') + def function_abstract_declarator(self, p): + pass + + @_('compound_statement') + def function_body(self, p): + pass + + @_('direct_declarator "(" parameter_type_list ")"') + def function_declarator(self, p): + pass + + @_('direct_declarator "(" ")"') + def function_declarator(self, p): + pass + + @_('attribute_specifier_sequence declaration_specifiers declarator function_body') + def function_definition(self, p): + pass + + @_('declaration_specifiers declarator function_body') + def function_definition(self, p): + pass + + @_('_Noreturn') + def function_specifier(self, p): + pass + + @_('inline') + def function_specifier(self, p): + pass + + @_('generic_assoc_list "," generic_association') + def generic_assoc_list(self, p): + pass + + @_('generic_association') + def generic_assoc_list(self, p): + pass + + @_('default ":" assignment_expression') + def generic_association(self, p): + pass + + @_('type_name ":" assignment_expression') + def generic_association(self, p): + pass + + @_('_Generic "(" assignment_expression "," generic_assoc_list ")"') + def generic_selection(self, p): + pass + + @_('inclusive_OR_expression "|" exclusive_OR_expression') + def inclusive_OR_expression(self, p): + pass + + @_('exclusive_OR_expression') + def inclusive_OR_expression(self, p): + pass + + @_('exclusive_OR_expression "^" AND_expression') + def exclusive_OR_expression(self, p): + pass + + @_('AND_expression') + def exclusive_OR_expression(self, p): + pass + + @_('declarator "=" initializer') + def init_declarator(self, p): + pass + + @_('declarator') + def init_declarator(self, p): + pass + + @_('init_declarator_list "," init_declarator') + def init_declarator_list(self, p): + pass + + @_('init_declarator') + def init_declarator_list(self, p): + pass + + @_('"{" initializer_list "," "}"') + def initializer(self, p): + pass + + @_('"{" initializer_list "}"') + def initializer(self, p): + pass + + @_('assignment_expression') + def initializer(self, p): + pass + + @_('initializer_list "," designation initializer') + def initializer_list(self, p): + pass + + @_('initializer_list "," initializer') + def initializer_list(self, p): + pass + + @_('designation initializer') + def initializer_list(self, p): + pass + + @_('initializer') + def initializer_list(self, p): + pass + + @_('for "(" declaration expression ";" expression ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" declaration ";" expression ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" declaration expression ";" ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" declaration ";" ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" expression ";" expression ";" expression ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" ";" expression ";" expression ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" expression ";" ";" expression ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" ";" ";" expression ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" expression ";" expression ";" ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" ";" expression ";" ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" expression ";" ";" ")" statement') + def iteration_statement(self, p): + pass + + @_('for "(" ";" ";" ")" statement') + def iteration_statement(self, p): + pass + + @_('do statement while "(" expression ")" ";"') + def iteration_statement(self, p): + pass + + @_('while "(" expression ")" statement') + def iteration_statement(self, p): + pass + + @_('return expression ";"') + def jump_statement(self, p): + pass + + @_('return ";"') + def jump_statement(self, p): + pass + + @_('break ";"') + def jump_statement(self, p): + pass + + @_('continue ";"') + def jump_statement(self, p): + pass + + @_('goto identifier ";"') + def jump_statement(self, p): + pass + + @_('attribute_specifier_sequence default ":"') + def label(self, p): + pass + + @_('default ":"') + def label(self, p): + pass + + @_('attribute_specifier_sequence case constant_expression ":"') + def label(self, p): + pass + + @_('case constant_expression ":"') + def label(self, p): + pass + + @_('attribute_specifier_sequence identifier ":"') + def label(self, p): + pass + + @_('identifier ":"') + def label(self, p): + pass + + @_('label statement') + def labeled_statement(self, p): + pass + + @_('logical_AND_expression && inclusive_OR_expression') + def logical_AND_expression(self, p): + pass + + @_('inclusive_OR_expression') + def logical_AND_expression(self, p): + pass + + @_('logical_OR_expression || logical_AND_expression') + def logical_OR_expression(self, p): + pass + + @_('logical_AND_expression') + def logical_OR_expression(self, p): + pass + + @_('static_assert_declaration') + def member_declaration(self, p): + pass + + @_('attribute_specifier_sequence specifier_qualifier_list member_declarator_list ";"') + def member_declaration(self, p): + pass + + @_('specifier_qualifier_list member_declarator_list ";"') + def member_declaration(self, p): + pass + + @_('attribute_specifier_sequence specifier_qualifier_list ";"') + def member_declaration(self, p): + pass + + @_('specifier_qualifier_list ";"') + def member_declaration(self, p): + pass + + @_('member_declaration_list member_declaration') + def member_declaration_list(self, p): + pass + + @_('member_declaration') + def member_declaration_list(self, p): + pass + + @_('declarator ":" constant_expression') + def member_declarator(self, p): + pass + + @_('":" constant_expression') + def member_declarator(self, p): + pass + + @_('declarator') + def member_declarator(self, p): + pass + + @_('member_declarator_list "," member_declarator') + def member_declarator_list(self, p): + pass + + @_('member_declarator') + def member_declarator_list(self, p): + pass + + @_('multiplicative_expression "%" cast_expression') + def multiplicative_expression(self, p): + pass + + @_('multiplicative_expression "/" cast_expression') + def multiplicative_expression(self, p): + pass + + @_('multiplicative_expression "*" cast_expression') + def multiplicative_expression(self, p): + pass + + @_('cast_expression') + def multiplicative_expression(self, p): + pass + + @_('attribute_specifier_sequence declaration_specifiers abstract_declarator') + def parameter_declaration(self, p): + pass + + @_('declaration_specifiers abstract_declarator') + def parameter_declaration(self, p): + pass + + @_('attribute_specifier_sequence declaration_specifiers') + def parameter_declaration(self, p): + pass + + @_('declaration_specifiers') + def parameter_declaration(self, p): + pass + + @_('attribute_specifier_sequence declaration_specifiers declarator') + def parameter_declaration(self, p): + pass + + @_('declaration_specifiers declarator') + def parameter_declaration(self, p): + pass + + @_('parameter_list "," parameter_declaration') + def parameter_list(self, p): + pass + + @_('parameter_declaration') + def parameter_list(self, p): + pass + + @_('parameter_list "," ...') + def parameter_type_list(self, p): + pass + + @_('parameter_list') + def parameter_type_list(self, p): + pass + + @_('"*" attribute_specifier_sequence type_qualifier_list pointer') + def pointer(self, p): + pass + + @_('"*" type_qualifier_list pointer') + def pointer(self, p): + pass + + @_('"*" attribute_specifier_sequence pointer') + def pointer(self, p): + pass + + @_('"*" pointer') + def pointer(self, p): + pass + + @_('"*" attribute_specifier_sequence type_qualifier_list') + def pointer(self, p): + pass + + @_('"*" type_qualifier_list') + def pointer(self, p): + pass + + @_('"*" attribute_specifier_sequence') + def pointer(self, p): + pass + + @_('"*"') + def pointer(self, p): + pass + + @_('"(" type_name ")" "{" initializer_list "," "}"') + def postfix_expression(self, p): + pass + + @_('"(" type_name ")" "{" initializer_list "}"') + def postfix_expression(self, p): + pass + + @_('postfix_expression --') + def postfix_expression(self, p): + pass + + @_('postfix_expression ++') + def postfix_expression(self, p): + pass + + @_('postfix_expression -> identifier') + def postfix_expression(self, p): + pass + + @_('postfix_expression "." identifier') + def postfix_expression(self, p): + pass + + @_('postfix_expression "(" argument_expression_list ")"') + def postfix_expression(self, p): + pass + + @_('postfix_expression "(" ")"') + def postfix_expression(self, p): + pass + + @_('postfix_expression "[" expression "]"') + def postfix_expression(self, p): + pass + + @_('primary_expression') + def postfix_expression(self, p): + pass + + @_('generic_selection') + def primary_expression(self, p): + pass + + @_('"(" expression ")"') + def primary_expression(self, p): + pass + + @_('string-literal') + def primary_expression(self, p): + pass + + @_('enumeration-constant') + def primary_expression(self, p): + pass + + @_('character-constant') + def primary_expression(self, p): + pass + + @_('floating-constant') + def primary_expression(self, p): + pass + + @_('integer-constant') + def primary_expression(self, p): + pass + + @_('identifier') + def primary_expression(self, p): + pass + + @_('relational_expression >= shift_expression') + def relational_expression(self, p): + pass + + @_('relational_expression <= shift_expression') + def relational_expression(self, p): + pass + + @_('relational_expression ">" shift_expression') + def relational_expression(self, p): + pass + + @_('relational_expression "<" shift_expression') + def relational_expression(self, p): + pass + + @_('shift_expression') + def relational_expression(self, p): + pass + + @_('switch "(" expression ")" statement') + def selection_statement(self, p): + pass + + @_('if "(" expression ")" statement else statement') + def selection_statement(self, p): + pass + + @_('if "(" expression ")" statement') + def selection_statement(self, p): + pass + + @_('shift_expression >> additive_expression') + def shift_expression(self, p): + pass + + @_('shift_expression << additive_expression') + def shift_expression(self, p): + pass + + @_('additive_expression') + def shift_expression(self, p): + pass + + @_('type_specifier_qualifier specifier_qualifier_list') + def specifier_qualifier_list(self, p): + pass + + @_('type_specifier_qualifier attribute_specifier_sequence') + def specifier_qualifier_list(self, p): + pass + + @_('type_specifier_qualifier') + def specifier_qualifier_list(self, p): + pass + + @_('identifier') + def standard_attribute(self, p): + pass + + @_('unlabeled_statement') + def statement(self, p): + pass + + @_('labeled_statement') + def statement(self, p): + pass + + @_('_Static_assert "(" constant_expression ")" ";"') + def static_assert_declaration(self, p): + pass + + @_('_Static_assert "(" constant_expression "," string-literal ")" ";"') + def static_assert_declaration(self, p): + pass + + @_('register') + def storage_class_specifier(self, p): + pass + + @_('auto') + def storage_class_specifier(self, p): + pass + + @_('_Thread_local') + def storage_class_specifier(self, p): + pass + + @_('static') + def storage_class_specifier(self, p): + pass + + @_('extern') + def storage_class_specifier(self, p): + pass + + @_('typedef') + def storage_class_specifier(self, p): + pass + + @_('union') + def struct_or_union(self, p): + pass + + @_('struct') + def struct_or_union(self, p): + pass + + @_('struct_or_union attribute_specifier_sequence identifier') + def struct_or_union_specifier(self, p): + pass + + @_('struct_or_union identifier') + def struct_or_union_specifier(self, p): + pass + + @_('struct_or_union attribute_specifier_sequence identifier "{" member_declaration_list "}"') + def struct_or_union_specifier(self, p): + pass + + @_('struct_or_union identifier "{" member_declaration_list "}"') + def struct_or_union_specifier(self, p): + pass + + @_('struct_or_union attribute_specifier_sequence "{" member_declaration_list "}"') + def struct_or_union_specifier(self, p): + pass + + @_('struct_or_union "{" member_declaration_list "}"') + def struct_or_union_specifier(self, p): + pass + + @_('translation_unit external_declaration') + def translation_unit(self, p): + pass + + @_('external_declaration') + def translation_unit(self, p): + pass + + @_('specifier_qualifier_list abstract_declarator') + def type_name(self, p): + pass + + @_('specifier_qualifier_list') + def type_name(self, p): + pass + + @_('_Atomic') + def type_qualifier(self, p): + pass + + @_('volatile') + def type_qualifier(self, p): + pass + + @_('restrict') + def type_qualifier(self, p): + pass + + @_('const') + def type_qualifier(self, p): + pass + + @_('type_qualifier_list type_qualifier') + def type_qualifier_list(self, p): + pass + + @_('type_qualifier') + def type_qualifier_list(self, p): + pass + + @_('typedef_name') + def type_specifier(self, p): + pass + + @_('enum_specifier') + def type_specifier(self, p): + pass + + @_('struct_or_union_specifier') + def type_specifier(self, p): + pass + + @_('atomic_type_specifier') + def type_specifier(self, p): + pass + + @_('_Decimal128') + def type_specifier(self, p): + pass + + @_('_Decimal64') + def type_specifier(self, p): + pass + + @_('_Decimal32') + def type_specifier(self, p): + pass + + @_('_Complex') + def type_specifier(self, p): + pass + + @_('_Bool') + def type_specifier(self, p): + pass + + @_('unsigned') + def type_specifier(self, p): + pass + + @_('signed') + def type_specifier(self, p): + pass + + @_('double') + def type_specifier(self, p): + pass + + @_('float') + def type_specifier(self, p): + pass + + @_('long') + def type_specifier(self, p): + pass + + @_('int') + def type_specifier(self, p): + pass + + @_('short') + def type_specifier(self, p): + pass + + @_('char') + def type_specifier(self, p): + pass + + @_('void') + def type_specifier(self, p): + pass + + @_('alignment_specifier') + def type_specifier_qualifier(self, p): + pass + + @_('type_qualifier') + def type_specifier_qualifier(self, p): + pass + + @_('type_specifier') + def type_specifier_qualifier(self, p): + pass + + @_('identifier') + def typedef_name(self, p): + pass + + @_('_Alignof "(" type_name ")"') + def unary_expression(self, p): + pass + + @_('sizeof "(" type_name ")"') + def unary_expression(self, p): + pass + + @_('sizeof unary_expression') + def unary_expression(self, p): + pass + + @_('unary_operator cast_expression') + def unary_expression(self, p): + pass + + @_('-- unary_expression') + def unary_expression(self, p): + pass + + @_('++ unary_expression') + def unary_expression(self, p): + pass + + @_('postfix_expression') + def unary_expression(self, p): + pass + + @_('"&"') + def unary_operator(self, p): + pass + + @_('"*"') + def unary_operator(self, p): + pass + + @_('"+"') + def unary_operator(self, p): + pass + + @_('"-"') + def unary_operator(self, p): + pass + + @_('"~"') + def unary_operator(self, p): + pass + + @_('"!"') + def unary_operator(self, p): + pass + + @_('attribute_specifier_sequence jump_statement') + def unlabeled_statement(self, p): + pass + + @_('jump_statement') + def unlabeled_statement(self, p): + pass + + @_('attribute_specifier_sequence iteration_statement') + def unlabeled_statement(self, p): + pass + + @_('iteration_statement') + def unlabeled_statement(self, p): + pass + + @_('attribute_specifier_sequence selection_statement') + def unlabeled_statement(self, p): + pass + + @_('selection_statement') + def unlabeled_statement(self, p): + pass + + @_('attribute_specifier_sequence compound_statement') + def unlabeled_statement(self, p): + pass + + @_('compound_statement') + def unlabeled_statement(self, p): + pass + + @_('expression_statement') + def unlabeled_statement(self, p): + pass + +if __name__ == '__main__': + lexer = CLexer() + parser = CParser() diff --git a/example/conflict/calc.py b/example/conflict/calc.py new file mode 100644 index 0000000..321a5d7 --- /dev/null +++ b/example/conflict/calc.py @@ -0,0 +1,89 @@ +# ----------------------------------------------------------------------------- +# calc.py +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, '../..') + +from sly import Lexer, Parser + +class CalcLexer(Lexer): + tokens = { NAME, NUMBER, PLUS, TIMES, MINUS, DIVIDE, ASSIGN, LPAREN, RPAREN } + ignore = ' \t' + + # Tokens + NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + NUMBER = r'\d+' + + # Special symbols + PLUS = r'\+' + MINUS = r'-' + TIMES = r'\*' + DIVIDE = r'/' + ASSIGN = r'=' + LPAREN = r'\(' + RPAREN = r'\)' + + # Ignored pattern + ignore_newline = r'\n+' + + # Extra action for newlines + def ignore_newline(self, t): + self.lineno += t.value.count('\n') + + def error(self, t): + print("Illegal character '%s'" % t.value[0]) + self.index += 1 + +class CalcParser(Parser): + tokens = CalcLexer.tokens + debugfile = 'calc.out' + dotfile = "calc.dot" + + #precedence = ( + # ('left', PLUS, MINUS), + # ('left', TIMES, DIVIDE), + # ('right', UMINUS) + # ) + + def __init__(self): + self.names = { } + + @_('NAME ASSIGN expr') + def statement(self, p): + self.names[p.NAME] = p.expr + + @_('expr') + def statement(self, p): + print(p.expr) + + @_('expr TIMES expr') + def expr(self, p): + return p.expr0 + p.expr1 + + @_('LPAREN expr RPAREN') + def expr(self, p): + return p.expr + + @_('NUMBER') + def expr(self, p): + return int(p.NUMBER) + + @_('NAME') + def expr(self, p): + try: + return self.names[p.NAME] + except LookupError: + print(f'Undefined name {p.NAME!r}') + return 0 + +if __name__ == '__main__': + lexer = CalcLexer() + parser = CalcParser() + while True: + try: + text = input('calc > ') + except EOFError: + break + if text: + parser.parse(lexer.tokenize(text)) diff --git a/example/conflict/decl_expr.py b/example/conflict/decl_expr.py new file mode 100644 index 0000000..7fd1721 --- /dev/null +++ b/example/conflict/decl_expr.py @@ -0,0 +1,68 @@ +# ----------------------------------------------------------------------------- +# calc.py +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, '../..') + +from sly import Lexer, Parser + +class DeclExprLexer(Lexer): + tokens = { IDENTIFIER, TYPENAME } + ignore = ' \t' + literals = {'=','+',';', '(', ')'} + + # Tokens + IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_]*' + IDENTIFIER['typename'] = TYPENAME + + # Ignored pattern + ignore_newline = r'\n+' + + # Extra action for newlines + def ignore_newline(self, t): + self.lineno += t.value.count('\n') + + def error(self, t): + print("Illegal character '%s'" % t.value[0]) + self.index += 1 + +class DeclExprParser(Parser): + tokens = DeclExprLexer.tokens + debugfile = 'decl_expr.out' + + def __init__(self): + pass + + @_('prog stmt') + @_('') + def prog(self, p): + pass + + @_('expr ";"') + @_('decl') + def stmt(self, p): + pass + + @_('IDENTIFIER') + @_('TYPENAME "(" expr ")"') + @_('expr "+" expr') + @_('expr "=" expr') + def expr(self, p): + pass + + @_('TYPENAME declarator ";"') + @_('TYPENAME declarator "=" expr ";"') + def decl(self, p): + pass + + @_('IDENTIFIER') + @_('"(" declarator ")"') + def declarator(self, p): + pass + + +if __name__ == '__main__': + lexer = DeclExprLexer() + parser = DeclExprParser() + diff --git a/example/conflict/ifelse.py b/example/conflict/ifelse.py new file mode 100644 index 0000000..c1a96ec --- /dev/null +++ b/example/conflict/ifelse.py @@ -0,0 +1,74 @@ +# ----------------------------------------------------------------------------- +# calc.py +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, '../..') + +from sly import Lexer, Parser + +class IfElseLexer(Lexer): + tokens = { IDENTIFIER, IF, ELSE, SEMI, LPAREN, RPAREN } + ignore = ' \t' + + # Tokens + IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_]*' + IDENTIFIER['if'] = IF + IDENTIFIER['else'] = ELSE + + # Special symbols + SEMI = r'\;' + LPAREN = r'\(' + RPAREN = r'\)' + + # Ignored pattern + ignore_newline = r'\n+' + + # Extra action for newlines + def ignore_newline(self, t): + self.lineno += t.value.count('\n') + + def error(self, t): + print("Illegal character '%s'" % t.value[0]) + self.index += 1 + +class IfElseParser(Parser): + tokens = IfElseLexer.tokens + debugfile = 'ifelse.out' + dotfile = 'ifelse.dot' + + #precedence = ( + # ('left', ELSE), + # ) + + def __init__(self): + pass + + @_('statement') + def prog(self, p): + pass + + @_('if_statement') + def statement(self, p): + pass + + @_('IF LPAREN expr RPAREN statement') + def if_statement(self, p): + pass + + @_('IF LPAREN expr RPAREN statement ELSE statement') + def if_statement(self, p): + pass + + @_('expr SEMI') + def statement(self, p): + pass + + @_('IDENTIFIER') + def expr(self, p): + pass + +if __name__ == '__main__': + lexer = IfElseLexer() + parser = IfElseParser() + parser.parse(lexer.tokenize("if (x) if (y) z; else w;")) diff --git a/sly/yacc.py b/sly/yacc.py index 085ed6b..bd232e4 100644 --- a/sly/yacc.py +++ b/sly/yacc.py @@ -33,6 +33,8 @@ import sys import inspect +import io +import functools from collections import OrderedDict, defaultdict, Counter __all__ = [ 'Parser' ] @@ -330,6 +332,296 @@ def __str__(self): def __repr__(self): return f'LRItem({self})' + +# ----------------------------------------------------------------------------- +# class LRPath +# +# This class represents a path between nodes. +# ----------------------------------------------------------------------------- + +class LRPath(object): + class LRPathItem(object): + def __init__(self, lookahead): + self._hash = (lookahead,) + self._lookahead = lookahead + + def to_string(self): + return [self._lookahead], len(self._lookahead) + + def __init__(self, node, sequence, use_marker = True): + self._node = node + if sequence: + self._sequence = sequence + else: + if use_marker: + self._sequence = [LRPath.LRPathItem('\u2666')] + [LRPath.LRPathItem(i) for i in self._node.item.prod[self._node.item.lr_index+1:]] + else: + self._sequence = [LRPath.LRPathItem(i) for i in self._node.item.prod[self._node.item.lr_index+1:]] + if node.item.number == 0: + self._sequence.append(LRPath.LRPathItem('$end')) + self._hash = sum([s._hash for s in self._sequence], start=(node.item, )) + + def __hash__(self): + return hash(self._hash) + + def __eq__(self, other): + return self._hash == other._hash + + def derive_from(self, node, lookahead): + if lookahead is None: + result = LRPath(node, [self] + [LRPath.LRPathItem(i) for i in node.item.prod[node.item.lr_index+2:]]) + if node.item.number == 0: + result._sequence.append(LRPath.LRPathItem('$end')) + else: + result = LRPath(node, [LRPath.LRPathItem(lookahead)] + self._sequence) + return result + + def expand_left(self): + return LRPath(self._node, [LRPath.LRPathItem(i) for i in self._node.item.prod[:self._node.item.lr_index]] + self._sequence) + + def expand(self, index, path): + return LRPath(self._node, self._sequence[:index] + path._sequence) + + def to_string(self): + expanded_symbol = self._node.item.name + if len(self._sequence) == 0: + return ['', f'\u2570{expanded_symbol}\u256f'], len(expanded_symbol) + 2 + buffer, length = self._sequence[0].to_string() + for item in self._sequence[1:]: + temp = buffer + extension, ext_length = item.to_string() + buffer = [f'{i.ljust(length)} {j}' for i, j in zip(temp, extension)] + buffer += temp[len(extension):] + buffer += [(1+length)*' ' + j for j in extension[len(temp):]] + length += 1 + ext_length + + extra_padding = '\u2500'*(length - 2 - len(expanded_symbol)) + buffer.append(f'\u2570{expanded_symbol}{extra_padding}\u256f') + return buffer, max(length, len(buffer[-1])) + + + +# ----------------------------------------------------------------------------- +# class LRDominanceNode +# +# This class represents a node used in the complete grammar graph. There is one +# dominance node for every item of every item set. +# ----------------------------------------------------------------------------- + +class LRDominanceNode(object): + def __init__(self, item_set, item, predecessor=None, parent=None): + self.item_set = item_set + self.item = item + if predecessor is not None: + self.predecessor_lookahead = predecessor[0] + self.predecessors = [predecessor[1]] + else: + self.predecessors = [] + self.successor = None + + self.direct_parents = [] + self.parents = set([]) + self.direct_children = [] + self.children = set([]) + if parent is not None: + self.direct_parents.append(parent) + self.parents.add(parent) + parent.direct_children.append(self) + parent.children.add(self) + + def expand_empty(self, first_set): + # expand the first item of the path to build empty productions + if self.item.lr_index == len(self.item.prod) - 1: + return LRPath(self, [], use_marker=False) + for child in sorted(self.direct_children, key=lambda n: len(n.item.prod)): + try: + following_symbol = child.item.prod[1] + except IndexError: + result = LRPath(child, [], use_marker=False) + result = result.derive_from(self, None) + return result + else: + if '' in first_set[following_symbol]: + p = child.successor.expand_empty(first_set) + if p: + result = child.expand_empty(first_set) + result = result.expand(1, p) + result = result.derive_from(self, None) + return result + return None + + def expand_lookahead(self, lookahead, first_set): + # expand the first item of the path until it starts with the lookahead + if self.item.prod[self.item.lr_index+1] == lookahead: + return LRPath(self, [], use_marker=False) + queue = [(self, [[]])] + seen = set() + + while queue: + node, paths = queue.pop(0) + if node in seen: + continue + seen.add(node) + + try: + following_symbol = node.item.prod[node.item.lr_index+1] + except IndexError: + continue + + if following_symbol == lookahead: + result = None + paths[-1].append(LRPath(node, [], use_marker=False)) + while paths: + child_paths = paths.pop(-1) + if result is not None: + child_paths[-1] = child_paths[-1].expand(1, result) + merge_children = lambda x, y: x.derive_from(y._node, None) + result = functools.reduce(merge_children, child_paths[::-1]) + return result + elif lookahead in first_set[following_symbol]: + for child in sorted(node.direct_children, key=lambda n: len(n.item.prod)): + queue.append((child, paths[:-1] + [paths[-1] + [LRPath(node, [], use_marker=False)]])) + elif '' in first_set[following_symbol]: + queue.append((node.successor, paths[:-1] + [paths[-1] + [node.expand_empty(first_set)]] + [[]])) + + return None + + + def filter_node_by_lookahead(self, path, lookahead, first_set): + result = [] + if lookahead is not None: + try: + following_symbol = self.item.prod[self.item.lr_index+2] + except IndexError: + if lookahead == '$end' and self.item.number == 0: + result.append((path, None)) + else: + result.append((path, lookahead)) + else: + if '' in first_set[following_symbol]: + successor_path = self.successor.expand_empty(first_set) + for p, la in self.successor.filter_node_by_lookahead(successor_path, lookahead, first_set): + result.append((path.expand(1, p), la)) + if lookahead in first_set[following_symbol]: + successor_path = self.successor.expand_lookahead(lookahead, first_set) + result.append((path.expand(1, successor_path), None)) + else: + result.append((path, lookahead)) + return result + + def backtrack_up(self, path, state, lookahead, first_set, seen): + # this method will find the fastest path from self to the specified parent state + # it will only find paths that can be followed by lookahead + queue = [(path, lookahead)] + result = [] + shortest_path_seen = set() + while queue: + path, lookahead = queue.pop(0) + node = path._node + for parent in sorted(node.direct_parents, key=lambda n: len(n.item.prod) - n.item.lr_index): + if (parent, lookahead) in seen: + continue + seen.add((parent, lookahead)) + if parent.item.lr_index > 0: + if (lookahead, parent.item_set, parent.item.prod[:parent.item.lr_index]) in shortest_path_seen: + continue + for p, la in parent.filter_node_by_lookahead(path.derive_from(parent, None), + lookahead, + first_set): + if parent.item.lr_index > 0 and la is None: + shortest_path_seen.add((lookahead, parent.item_set, parent.item.prod[:parent.item.lr_index])) + if la is None and state is None: + result.append((p, la)) + else: + queue.append((p, la)) + for predecessor in node.predecessors: + if (predecessor, lookahead) in seen: + continue + seen.add((predecessor, lookahead)) + if state is None or predecessor.item_set == state: + if predecessor.item.lr_index > 0: + if (lookahead, predecessor.item_set, predecessor.item.prod[:predecessor.item.lr_index]) in shortest_path_seen: + continue + shortest_path_seen.add((lookahead, predecessor.item_set, predecessor.item.prod[:predecessor.item.lr_index])) + result.append((path.derive_from(predecessor, node.predecessor_lookahead), lookahead)) + return result + +# ----------------------------------------------------------------------------- +# class LRItemSet +# +# This class represents a collection of LRItem objects and their relationship. +# Storing relationships between LRItems allows backtracking to find sequences +# of tokens that lead to a conflict. +# ----------------------------------------------------------------------------- + +class LRItemSet(object): + def __init__(self, core): + self._core = set([]) + self._items = {} + self.add_core(core) + self._lr0_close() + + def __iter__(self): + return iter(self._items) + + def __getitem__(self, item): + return self._items[item] + + def __repr__(self): + return f'LRItemSet({id(self)})' + + def add_core(self, core): + for item, node, lookahead in core: + try: + target_node = self._items[item] + except KeyError: + if node is not None: + target_node = LRDominanceNode(self, item, predecessor = (lookahead, node)) + else: + target_node = LRDominanceNode(self, item) + self._items[item] = target_node + else: + assert node not in target_node.predecessors + target_node.predecessors.append(node) + if node is not None: + node.successor = target_node + self._core.add(target_node) + + def _lr0_close(self): + # Compute the LR(0) closure operation on self._items + new_items = self._items + while new_items: + self._items.update(new_items) + new_items = {} + for item, dn in self._items.items(): + for x in item.lr_after: + try: + successor = self._items[x.lr_next] + except KeyError: + try: + successor = new_items[x.lr_next] + except KeyError: + successor = LRDominanceNode( + self, x.lr_next, parent=dn + ) + new_items[x.lr_next] = successor + if successor not in dn.direct_children: + dn.direct_children.append(successor) + if dn not in successor.direct_parents: + successor.direct_parents.append(dn) + + dn.children.add(successor) + dn.children.update(successor.children) + for node in dn.parents: + node.children.add(successor) + node.children.update(successor.children) + + successor.parents.add(dn) + successor.parents.update(dn.parents) + for node in successor.children: + node.parents.add(dn) + node.parents.update(dn.parents) + # ----------------------------------------------------------------------------- # rightmost_terminal() # @@ -942,10 +1234,11 @@ def __init__(self, grammar): self.lr_productions = grammar.Productions # Copy of grammar Production array self.lr_goto_cache = {} # Cache of computed gotos self.lr0_cidhash = {} # Cache of closures - self._add_count = 0 # Internal counter used to detect cycles # Diagonistic information filled in by the table generator self.state_descriptions = OrderedDict() + self.graph_description = [] + self.edge_description = [] self.sr_conflict = 0 self.rr_conflict = 0 self.conflicts = [] # List of conflicts @@ -973,26 +1266,6 @@ def __init__(self, grammar): if len(rules) == 1 and rules[0] < 0: self.defaulted_states[state] = rules[0] - # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - def lr0_closure(self, I): - self._add_count += 1 - - # Add everything in I to J - J = I[:] - didadd = True - while didadd: - didadd = False - for j in J: - for x in j.lr_after: - if getattr(x, 'lr0_added', 0) == self._add_count: - continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = self._add_count - didadd = True - - return J - # Compute the LR(0) goto function goto(I,X) where I is a set # of LR(0) items and X is a grammar symbol. This function is written # in a way that guarantees uniqueness of the generated goto sets @@ -1022,21 +1295,23 @@ def lr0_goto(self, I, x): if not s1: s1 = {} s[id(n)] = s1 - gs.append(n) + gs.append((n, I[p], x)) s = s1 g = s.get('$end') if not g: if gs: - g = self.lr0_closure(gs) + g = LRItemSet(gs) s['$end'] = g else: s['$end'] = gs + else: + g.add_core(gs) self.lr_goto_cache[(id(I), x)] = g return g # Compute the LR(0) sets of item function def lr0_items(self): - C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] + C = [LRItemSet([(self.grammar.Productions[0].lr_next, None, '$start')])] i = 0 for I in C: self.lr0_cidhash[id(I)] = i @@ -1393,6 +1668,8 @@ def lr_parse_table(self): st_actionp = {} st_goto = {} + sr_conflict_count = 0 + rr_conflict_count = 0 descrip.append(f'\nstate {st}\n') for p in I: descrip.append(f' ({p.number}) {p}') @@ -1425,10 +1702,13 @@ def lr_parse_table(self): if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): # We really need to reduce here. st_action[a] = -p.number + rejected = st_actionp[a] + shift_rule = st_actionp[a] st_actionp[a] = p if not slevel and not rlevel: descrip.append(f' ! shift/reduce conflict for {a} resolved as reduce') - self.sr_conflicts.append((st, a, 'reduce')) + self.sr_conflicts.append((st, a, 'reduce', I[rejected], I[p], shift_rule, p)) + sr_conflict_count += 1 Productions[p.number].reduced += 1 elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None @@ -1436,23 +1716,27 @@ def lr_parse_table(self): # Hmmm. Guess we'll keep the shift if not rlevel: descrip.append(f' ! shift/reduce conflict for {a} resolved as shift') - self.sr_conflicts.append((st, a, 'shift')) + self.sr_conflicts.append((st, a, 'shift', I[st_actionp[a]], I[p], st_actionp[a], p)) + sr_conflict_count += 1 elif r <= 0: # Reduce/reduce conflict. In this case, we favor the rule # that was defined first in the grammar file + olditem = st_actionp[a] oldp = Productions[-r] pp = Productions[p.number] + pitem = p if oldp.line > pp.line: st_action[a] = -p.number st_actionp[a] = p - chosenp, rejectp = pp, oldp + chosenp, rejectp, chosenitem, rejecteditem = pp, oldp, pitem, olditem Productions[p.number].reduced += 1 Productions[oldp.number].reduced -= 1 else: - chosenp, rejectp = oldp, pp - self.rr_conflicts.append((st, chosenp, rejectp)) + chosenp, rejectp, chosenitem, rejecteditem = oldp, pp, olditem, pitem + self.rr_conflicts.append((st, a, chosenp, rejectp, I[chosenitem], I[rejecteditem])) descrip.append(' ! reduce/reduce conflict for %s resolved using rule %d (%s)' % (a, st_actionp[a].number, st_actionp[a])) + rr_conflict_count += 1 else: raise LALRError(f'Unknown conflict in state {st}') else: @@ -1484,18 +1768,22 @@ def lr_parse_table(self): if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): # We decide to shift here... highest precedence to shift Productions[st_actionp[a].number].reduced -= 1 + reduce = st_actionp[a] + reduce_rule = st_actionp[a] st_action[a] = j st_actionp[a] = p if not rlevel: descrip.append(f' ! shift/reduce conflict for {a} resolved as shift') - self.sr_conflicts.append((st, a, 'shift')) + self.sr_conflicts.append((st, a, 'shift', I[p], I[reduce], p, reduce_rule)) + sr_conflict_count += 1 elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the reduce if not slevel and not rlevel: descrip.append(f' ! shift/reduce conflict for {a} resolved as reduce') - self.sr_conflicts.append((st, a, 'reduce')) + self.sr_conflicts.append((st, a, 'reduce', I[p], I[st_action[a]], p, st_actionp[a])) + sr_conflict_count += 1 else: raise LALRError(f'Unknown conflict in state {st}') @@ -1529,6 +1817,98 @@ def lr_parse_table(self): actionp[st] = st_actionp goto[st] = st_goto self.state_descriptions[st] = '\n'.join(descrip) + color = 'lightgray' + if sr_conflict_count > 0: + color = 'yellow' + if rr_conflict_count > 0: + color = 'orange' + self.graph_description.append('\n'.join([ + f' subgraph cluster_{st} {{', + f' label="State {st}";', + ' style=filled;', + f' color={color};', + ' node[style=filled;color=white];' + ] + [ + f' {id(node)}[label="{node.item}"];' for node in I._items.values() + ] + [ + ' }' + ])) + for node in I._items.values(): + for parent in node.direct_parents: + self.edge_description += [f' {id(parent)}->{id(node)}[style=dotted];'] + for predecessor in node.predecessors: + self.edge_description += [f' {id(predecessor)}->{id(node)}[label="{node.predecessor_lookahead}"];'] + + def _log(self, title, conflict_paths, out): + seen = set([]) + if conflict_paths: + count = len(set(conflict_paths)) + out.append(f' {title}') + out.append(' \u256d\u2574') + for path in conflict_paths: + if path in seen: + continue + count -= 1 + seen.add(path) + strings = path.expand_left().to_string()[0] + for s in strings: + out.append(f' \u2502 {s}') + if count == 0: + out.append(' \u2570\u2574') + else: + out.append(' \u251c\u2574') + + def _log_counterexamples(self, node_1, example_1, lookahead_1, node_2, example_2, lookahead_2, out): + conflict_r1_paths = [] + conflict_r2_paths = [] + seen_1 = set() + seen_2 = set() + + queue = [((LRPath(node_1, []), lookahead_1), (LRPath(node_2, []), lookahead_2))] + while queue: + (path_1, lookahead_1), (path_2, lookahead_2) = queue.pop(0) + if path_1._node.item.lr_index == 0 and path_2._node.item.lr_index == 0: + if lookahead_1 is None and lookahead_2 is None: + conflict_r1_paths.append(path_1) + conflict_r2_paths.append(path_2) + elif lookahead_1 is not None: + for path1, la1 in path_1._node.backtrack_up(path_1, None, lookahead_1, self.grammar.First, seen_1): + if path1._node.item_set == path_2._node.item_set: + queue.append(((path1, la1), (path_2, lookahead_2))) + else: + for path2, la2 in path_2._node.backtrack_up(path_2, path1._node.item_set, lookahead_2, self.grammar.First, seen_2): + queue.append(((path1, la1), (path2, la2))) + else: + for path2, la2 in path_2._node.backtrack_up(path_2, None, lookahead_2, self.grammar.First, seen_2): + if path_1._node.item_set == path2._node.item_set: + queue.append(((path_1, lookahead_1), (path2, la2))) + else: + for path1, la1 in path_1._node.backtrack_up(path_1, path2._node.item_set, lookahead_1, self.grammar.First, seen_1): + queue.append(((path1, la1), (path2, la2))) + else: + if path_1._node.item.lr_index == 0: + for pred_2 in path_2._node.predecessors: + parent_paths = path_1._node.backtrack_up(path_1, pred_2.item_set, lookahead_1, self.grammar.First, set()) + for p, la in parent_paths: + queue.append(((p, la), (path_2.derive_from(pred_2, path_2._node.predecessor_lookahead), lookahead_2))) + elif path_2._node.item.lr_index == 0: + for pred_1 in path_1._node.predecessors: + parent_paths = path_2._node.backtrack_up(path_2, pred_1.item_set, lookahead_2, self.grammar.First, set()) + for p, la in parent_paths: + queue.append(((path_1.derive_from(pred_1, path_1._node.predecessor_lookahead), lookahead_1), (p, la))) + else: + # reduce path_1 and path_2 + for pred_1 in path_1._node.predecessors: + for pred_2 in path_2._node.predecessors: + if pred_1.item_set == pred_2.item_set and pred_1.item_set != path_1._node.item_set: + queue.append(((path_1.derive_from(pred_1, path_1._node.predecessor_lookahead), lookahead_1), + (path_2.derive_from(pred_2, path_2._node.predecessor_lookahead), lookahead_2))) + + + self._log(example_1, conflict_r1_paths, out) + out.append('') + self._log(example_2, conflict_r2_paths, out) + out.append('') # ---------------------------------------------------------------------- # Debugging output. Printing the LRTable object will produce a listing @@ -1542,25 +1922,41 @@ def __str__(self): if self.sr_conflicts or self.rr_conflicts: out.append('\nConflicts:\n') - for state, tok, resolution in self.sr_conflicts: + for state, tok, resolution, shift_node, reduce_node, shift_rule, reduce_rule in self.sr_conflicts: out.append(f'shift/reduce conflict for {tok} in state {state} resolved as {resolution}') + self._log_counterexamples(shift_node, f'shift using rule {shift_rule}', None, + reduce_node, f'reduce using rule {reduce_rule}', tok, out) - already_reported = set() - for state, rule, rejected in self.rr_conflicts: - if (state, id(rule), id(rejected)) in already_reported: - continue - out.append(f'reduce/reduce conflict in state {state} resolved using rule {rule}') - out.append(f'rejected rule ({rejected}) in state {state}') - already_reported.add((state, id(rule), id(rejected))) + + rr_conflict_map = {} + # group reduce/reduce conflicts per state, collect lookaheads + for i, (state, lookahead, rule, rejected, node, rejected_node) in enumerate(self.rr_conflicts): + try: + rr_conflict_map[state, id(rule), id(rejected)][5].append(lookahead) + except KeyError: + rr_conflict_map[state, id(rule), id(rejected)] = (i, rule, rejected, node, rejected_node, [lookahead]) + + for (state, _, _), (_, rule, rejected, node, rejected_node, lookaheads) in sorted(list(rr_conflict_map.items()), key=lambda x: (x[0][0], x[1][0])): + for la in lookaheads: + out.append(f'reduce/reduce conflict for {la} in state {state} resolved using rule {rule}') + out.append(f'rejected rule ({rejected}) in state {state}') + self._log_counterexamples(node, f'reduce using {rule} with lookahead {la}', la, + rejected_node, f'reduce using {rejected} with lookahead {la}', la, out) warned_never = set() - for state, rule, rejected in self.rr_conflicts: + for state, lookahead, rule, rejected, node, rejected_node in self.rr_conflicts: if not rejected.reduced and (rejected not in warned_never): out.append(f'Rule ({rejected}) is never reduced') warned_never.add(rejected) return '\n'.join(out) + # ---------------------------------------------------------------------- + # Dotfile output. Dump the state machine into a graph + # ---------------------------------------------------------------------- + def dot_graph(self): + return '\n'.join(['digraph Grammar {'] + self.graph_description + self.edge_description + ['}']) + # Collect grammar rules from a function def _collect_grammar_rules(func): grammar = [] @@ -1827,6 +2223,8 @@ class Parser(metaclass=ParserMeta): # Debugging filename where parsetab.out data can be written debugfile = None + # Dot filename where state achine can be described + dotfile = None @classmethod def __validate_tokens(cls): @@ -1927,7 +2325,7 @@ def __build_grammar(cls, rules): unused_terminals = grammar.unused_terminals() if unused_terminals: unused_str = '{' + ','.join(unused_terminals) + '}' - cls.log.warning(f'Token{"(s)" if len(unused_terminals) >1 else ""} {unused_str} defined, but not used') + cls.log.warning(f'Token{"(s)" if len(unused_terminals) >1 else ""} %s defined, but not used', unused_str) unused_rules = grammar.unused_rules() for prod in unused_rules: @@ -2018,8 +2416,13 @@ def _build(cls, definitions): if not cls.__build_lrtables(): raise YaccError('Can\'t build parsing tables') + if cls.dotfile: + with io.open(cls.dotfile, 'w', encoding='utf-8') as f: + f.write(cls._lrtable.dot_graph()) + cls.log.info('Parser dot graph for %s written to %s', cls.__qualname__, cls.dotfile) + if cls.debugfile: - with open(cls.debugfile, 'w') as f: + with io.open(cls.debugfile, 'w', encoding='utf-8') as f: f.write(str(cls._grammar)) f.write('\n') f.write(str(cls._lrtable))