forked from ossc-db/pg_hint_plan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
query_scan_int.h
120 lines (107 loc) · 5.05 KB
/
query_scan_int.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*-------------------------------------------------------------------------
*
* query_scan_int.h
* lexical scanner internal declarations
*
* This file declares the QueryScanStateData structure used by query_scan.l.
*
* One difficult aspect of this code is that we need to work in multibyte
* encodings that are not ASCII-safe. A "safe" encoding is one in which each
* byte of a multibyte character has the high bit set (it's >= 0x80). Since
* all our lexing rules treat all high-bit-set characters alike, we don't
* really need to care whether such a byte is part of a sequence or not.
* In an "unsafe" encoding, we still expect the first byte of a multibyte
* sequence to be >= 0x80, but later bytes might not be. If we scan such
* a sequence as-is, the lexing rules could easily be fooled into matching
* such bytes to ordinary ASCII characters. Our solution for this is to
* substitute 0xFF for each non-first byte within the data presented to flex.
* The flex rules will then pass the FF's through unmolested. The
* query_scan_emit() subroutine is responsible for looking back to the
* original string and replacing FF's with the corresponding original bytes.
*
* Another interesting thing we do here is scan different parts of the same
* input with physically separate flex lexers (ie, lexers written in separate
* .l files). We can get away with this because the only part of the
* persistent state of a flex lexer that depends on its parsing rule tables
* is the start state number, which is easy enough to manage --- usually,
* in fact, we just need to set it to INITIAL when changing lexers. But to
* make that work at all, we must use re-entrant lexers, so that all the
* relevant state is in the yyscan_t attached to the QueryScanState;
* if we were using lexers with separate static state we would soon end up
* with dangling buffer pointers in one or the other. Also note that this
* is unlikely to work very nicely if the lexers aren't all built with the
* same flex version, or if they don't use the same flex options.
*
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* query_scan_int.h
*
*-------------------------------------------------------------------------
*/
#ifndef QUERY_SCAN_INT_H
#define QUERY_SCAN_INT_H
#include "query_scan.h"
/*
* These are just to allow this file to be compilable standalone for header
* validity checking; in actual use, this file should always be included
* from the body of a flex file, where these symbols are already defined.
*/
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
#define YY_TYPEDEF_YY_BUFFER_STATE
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
#ifndef YY_TYPEDEF_YY_SCANNER_T
#define YY_TYPEDEF_YY_SCANNER_T
typedef void *yyscan_t;
#endif
/*
* All working state of the lexer must be stored in QueryScanStateData
* between calls. This allows us to have multiple open lexer operations,
* which is needed for nested include files. The lexer itself is not
* recursive, but it must be re-entrant.
*/
typedef struct QueryScanStateData
{
yyscan_t scanner; /* Flex's state for this QueryScanState */
StringInfo output_buf; /* current output buffer */
int elevel; /* level of reports generated at parsing */
/*
* These variables always refer to the outer buffer, never to any stacked
* variable-expansion buffer.
*/
YY_BUFFER_STATE scanbufhandle;
char *scanbuf; /* start of outer-level input buffer */
const char *scanline; /* current input line at outer level */
/* safe_encoding, curline, refline are used by emit() to replace FFs */
int encoding; /* encoding being used now */
bool safe_encoding; /* is current encoding "safe"? */
bool std_strings; /* are string literals standard? */
const char *curline; /* actual flex input string for cur buf */
const char *refline; /* original data for cur buffer */
/*
* All this state lives across successive input lines. start_state is
* adopted by yylex() on entry, and updated with its finishing state on
* exit.
*/
int start_state; /* yylex's starting/finishing state */
int state_before_str_stop; /* start cond. before end quote */
int paren_depth; /* depth of nesting in parentheses */
int xcdepth; /* depth of nesting in slash-star comments */
char *dolqstart; /* current $foo$ quote start string */
int xhintnum; /* number of query hints found */
/*
* State to track boundaries of BEGIN ... END blocks in function
* definitions, so that semicolons do not send query too early.
*/
int identifier_count; /* identifiers since start of statement */
char identifiers[4]; /* records the first few identifiers */
int begin_depth; /* depth of begin/end pairs */
} QueryScanStateData;
extern YY_BUFFER_STATE query_scan_prepare_buffer(QueryScanState state,
const char *txt, int len,
char **txtcopy);
extern void query_yyerror(int elevel, const char *txt, const char *message);
extern void query_scan_emit(QueryScanState state, const char *txt, int len);
#endif /* QUERY_SCAN_INT_H */