forked from fish-shell/fish-shell
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_tree.h
268 lines (189 loc) · 11.2 KB
/
parse_tree.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
/**\file parse_tree.h
Programmatic representation of fish code.
*/
#ifndef FISH_PARSE_PRODUCTIONS_H
#define FISH_PARSE_PRODUCTIONS_H
#include <wchar.h>
#include "config.h"
#include "util.h"
#include "common.h"
#include "tokenizer.h"
#include "parse_constants.h"
#include <vector>
#include <inttypes.h>
class parse_node_t;
class parse_node_tree_t;
typedef uint32_t node_offset_t;
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
typedef uint32_t source_offset_t;
#define SOURCE_OFFSET_INVALID (static_cast<source_offset_t>(-1))
/* Returns a description of a list of parse errors */
wcstring parse_errors_description(const parse_error_list_t &errors, const wcstring &src, const wchar_t *prefix = NULL);
/** A struct representing the token type that we use internally */
struct parse_token_t
{
enum parse_token_type_t type; // The type of the token as represented by the parser
enum parse_keyword_t keyword; // Any keyword represented by this token
bool has_dash_prefix; // Hackish: whether the source contains a dash prefix
bool is_help_argument; // Hackish: whether the source looks like '-h' or '--help'
source_offset_t source_start;
source_offset_t source_length;
wcstring describe() const;
wcstring user_presentable_description() const;
};
enum
{
parse_flag_none = 0,
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
parse_flag_continue_after_error = 1 << 0,
/* Include comment tokens */
parse_flag_include_comments = 1 << 1,
/* Indicate that the tokenizer should accept incomplete tokens */
parse_flag_accept_incomplete_tokens = 1 << 2,
/* Indicate that the parser should not generate the terminate token, allowing an 'unfinished' tree where some nodes may have no productions. */
parse_flag_leave_unterminated = 1 << 3
};
typedef unsigned int parse_tree_flags_t;
wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
wcstring token_type_description(parse_token_type_t type);
wcstring keyword_description(parse_keyword_t type);
/** Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields is important. */
class parse_node_t
{
public:
/* Start in the source code */
source_offset_t source_start;
/* Length of our range in the source code */
source_offset_t source_length;
/* Parent */
node_offset_t parent;
/* Children */
node_offset_t child_start;
/* Number of children */
uint8_t child_count;
/* Which production was used */
uint8_t production_idx;
/* Type of the node */
enum parse_token_type_t type;
/* Description */
wcstring describe(void) const;
/* Constructor */
explicit parse_node_t(parse_token_type_t ty) : source_start(SOURCE_OFFSET_INVALID), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1), type(ty)
{
}
node_offset_t child_offset(node_offset_t which) const
{
PARSE_ASSERT(which < child_count);
return child_start + which;
}
/* Indicate if this node has a range of source code associated with it */
bool has_source() const
{
return source_start != SOURCE_OFFSET_INVALID;
}
/* Gets source for the node, or the empty string if it has no source */
wcstring get_source(const wcstring &str) const
{
if (! has_source())
return wcstring();
else
return wcstring(str, this->source_start, this->source_length);
}
/* Returns whether the given location is within the source range or at its end */
bool location_in_or_at_end_of_source_range(size_t loc) const
{
return has_source() && source_start <= loc && loc - source_start <= source_length;
}
};
/* The parse tree itself */
class parse_node_tree_t : public std::vector<parse_node_t>
{
public:
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type.
*/
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
/* Find the first direct child of the given node of the given type. asserts on failure
*/
const parse_node_t &find_child(const parse_node_t &parent, parse_token_type_t type) const;
/* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */
const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const;
/* Returns the first ancestor of the given type, or NULL. */
const parse_node_t *get_first_ancestor_of_type(const parse_node_t &node, parse_token_type_t desired_type) const;
/* Find all the nodes of a given type underneath a given node, up to max_count of them */
typedef std::vector<const parse_node_t *> parse_node_list_t;
parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type, size_t max_count = (size_t)(-1)) const;
/* Finds the last node of a given type underneath a given node, or NULL if it could not be found. If parent is NULL, this finds the last node in the tree of that type. */
const parse_node_t *find_last_node_of_type(parse_token_type_t type, const parse_node_t *parent = NULL) const;
/* Finds a node containing the given source location. If 'parent' is not NULL, it must be an ancestor. */
const parse_node_t *find_node_matching_source_location(parse_token_type_t type, size_t source_loc, const parse_node_t *parent) const;
/* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */
bool argument_list_is_root(const parse_node_t &node) const;
/* Utilities */
/* Given a plain statement, get the decoration (from the parent node), or none if there is no decoration */
enum parse_statement_decoration_t decoration_for_plain_statement(const parse_node_t &node) const;
/* Given a plain statement, get the command by reference (from the child node). Returns true if successful. Clears the command on failure. */
bool command_for_plain_statement(const parse_node_t &node, const wcstring &src, wcstring *out_cmd) const;
/* Given a plain statement, return true if the statement is part of a pipeline. If include_first is set, the first command in a pipeline is considered part of it; otherwise only the second or additional commands are */
bool statement_is_in_pipeline(const parse_node_t &node, bool include_first) const;
/* Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd) */
enum token_type type_for_redirection(const parse_node_t &node, const wcstring &src, int *out_fd, wcstring *out_target) const;
/* If the given node is a block statement, returns the header node (for_header, while_header, begin_header, or function_header). Otherwise returns NULL */
const parse_node_t *header_node_for_block_statement(const parse_node_t &node) const;
/* Given a node list (e.g. of type symbol_job_list) and a node type (e.g. symbol_job), return the next element of the given type in that list, and the tail (by reference). Returns NULL if we've exhausted the list. */
const parse_node_t *next_node_in_node_list(const parse_node_t &node_list, parse_token_type_t item_type, const parse_node_t **list_tail) const;
/* Given a job, return all of its statements. These are 'specific statements' (e.g. symbol_decorated_statement, not symbol_statement) */
parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
/* Given a job, return whether it should be backgrounded, because it has a & specifier */
bool job_should_be_backgrounded(const parse_node_t &job) const;
};
/* The big entry point. Parse a string, attempting to produce a tree for the given goal type */
bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, parse_token_type_t goal = symbol_job_list);
/* Fish grammar:
# A job_list is a list of jobs, separated by semicolons or newlines
job_list = <empty> |
job job_list
<TOK_END> job_list
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation, and then optionally a background specifier '&'
job = statement job_continuation optional_background
job_continuation = <empty> |
<TOK_PIPE> statement job_continuation
# A statement is a normal command, or an if / while / and etc
statement = boolean_statement | block_statement | if_statement | switch_statement | decorated_statement
# A block is a conditional, loop, or begin/end
if_statement = if_clause else_clause end_command arguments_or_redirections_list
if_clause = <IF> job <TOK_END> job_list
else_clause = <empty> |
<ELSE> else_continuation
else_continuation = if_clause else_clause |
<TOK_END> job_list
switch_statement = SWITCH argument <TOK_END> case_item_list end_command arguments_or_redirections_list
case_item_list = <empty> |
case_item case_item_list |
<TOK_END> case_item_list
case_item = CASE argument_list <TOK_END> job_list
block_statement = block_header <TOK_END> job_list end_command arguments_or_redirections_list
block_header = for_header | while_header | function_header | begin_header
for_header = FOR var_name IN argument_list
while_header = WHILE job
begin_header = BEGIN
# Functions take arguments, and require at least one (the name). No redirections allowed.
function_header = FUNCTION argument argument_list
# A boolean statement is AND or OR or NOT
boolean_statement = AND statement | OR statement | NOT statement
# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" or "exec"
decorated_statement = plain_statement | COMMAND plain_statement | BUILTIN plain_statement | EXEC plain_statement
plain_statement = <TOK_STRING> arguments_or_redirections_list
argument_list = <empty> | argument argument_list
arguments_or_redirections_list = <empty> |
argument_or_redirection arguments_or_redirections_list
argument_or_redirection = argument | redirection
argument = <TOK_STRING>
redirection = <TOK_REDIRECTION> <TOK_STRING>
optional_background = <empty> | <TOK_BACKGROUND>
end_command = END
# A freestanding_argument_list is equivalent to a normal argument list, except it may contain TOK_END (newlines, and even semicolons, for historical reasons
freestanding_argument_list = <empty> |
argument freestanding_argument_list |
<TOK_END> freestanding_argument_list
*/
#endif