Ruby 3.4.3p32 (2025-04-14 revision d0b7e5b6a04bde21ca483d20a1546b28b401c2d4)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17// Macros for min/max.
18#define MIN(a,b) (((a)<(b))?(a):(b))
19#define MAX(a,b) (((a)>(b))?(a):(b))
20
21/******************************************************************************/
22/* Lex mode manipulations */
23/******************************************************************************/
24
29static inline uint8_t
30lex_mode_incrementor(const uint8_t start) {
31 switch (start) {
32 case '(':
33 case '[':
34 case '{':
35 case '<':
36 return start;
37 default:
38 return '\0';
39 }
40}
41
46static inline uint8_t
47lex_mode_terminator(const uint8_t start) {
48 switch (start) {
49 case '(':
50 return ')';
51 case '[':
52 return ']';
53 case '{':
54 return '}';
55 case '<':
56 return '>';
57 default:
58 return start;
59 }
60}
61
67static bool
68lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69 lex_mode.prev = parser->lex_modes.current;
70 parser->lex_modes.index++;
71
72 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
74 if (parser->lex_modes.current == NULL) return false;
75
76 *parser->lex_modes.current = lex_mode;
77 } else {
78 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80 }
81
82 return true;
83}
84
88static inline bool
89lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90 uint8_t incrementor = lex_mode_incrementor(delimiter);
91 uint8_t terminator = lex_mode_terminator(delimiter);
92
93 pm_lex_mode_t lex_mode = {
94 .mode = PM_LEX_LIST,
95 .as.list = {
96 .nesting = 0,
97 .interpolation = interpolation,
98 .incrementor = incrementor,
99 .terminator = terminator
100 }
101 };
102
103 // These are the places where we need to split up the content of the list.
104 // We'll use strpbrk to find the first of these characters.
105 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107 size_t index = 7;
108
109 // Now we'll add the terminator to the list of breakpoints. If the
110 // terminator is not already a NULL byte, add it to the list.
111 if (terminator != '\0') {
112 breakpoints[index++] = terminator;
113 }
114
115 // If interpolation is allowed, then we're going to check for the #
116 // character. Otherwise we'll only look for escapes and the terminator.
117 if (interpolation) {
118 breakpoints[index++] = '#';
119 }
120
121 // If there is an incrementor, then we'll check for that as well.
122 if (incrementor != '\0') {
123 breakpoints[index++] = incrementor;
124 }
125
126 parser->explicit_encoding = NULL;
127 return lex_mode_push(parser, lex_mode);
128}
129
135static inline bool
136lex_mode_push_list_eof(pm_parser_t *parser) {
137 return lex_mode_push_list(parser, false, '\0');
138}
139
143static inline bool
144lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145 pm_lex_mode_t lex_mode = {
146 .mode = PM_LEX_REGEXP,
147 .as.regexp = {
148 .nesting = 0,
149 .incrementor = incrementor,
150 .terminator = terminator
151 }
152 };
153
154 // These are the places where we need to split up the content of the
155 // regular expression. We'll use strpbrk to find the first of these
156 // characters.
157 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158 memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159 size_t index = 4;
160
161 // First we'll add the terminator.
162 if (terminator != '\0') {
163 breakpoints[index++] = terminator;
164 }
165
166 // Next, if there is an incrementor, then we'll check for that as well.
167 if (incrementor != '\0') {
168 breakpoints[index++] = incrementor;
169 }
170
171 parser->explicit_encoding = NULL;
172 return lex_mode_push(parser, lex_mode);
173}
174
178static inline bool
179lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180 pm_lex_mode_t lex_mode = {
181 .mode = PM_LEX_STRING,
182 .as.string = {
183 .nesting = 0,
184 .interpolation = interpolation,
185 .label_allowed = label_allowed,
186 .incrementor = incrementor,
187 .terminator = terminator
188 }
189 };
190
191 // These are the places where we need to split up the content of the
192 // string. We'll use strpbrk to find the first of these characters.
193 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194 memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195 size_t index = 3;
196
197 // Now add in the terminator. If the terminator is not already a NULL byte,
198 // then we'll add it.
199 if (terminator != '\0') {
200 breakpoints[index++] = terminator;
201 }
202
203 // If interpolation is allowed, then we're going to check for the #
204 // character. Otherwise we'll only look for escapes and the terminator.
205 if (interpolation) {
206 breakpoints[index++] = '#';
207 }
208
209 // If we have an incrementor, then we'll add that in as a breakpoint as
210 // well.
211 if (incrementor != '\0') {
212 breakpoints[index++] = incrementor;
213 }
214
215 parser->explicit_encoding = NULL;
216 return lex_mode_push(parser, lex_mode);
217}
218
224static inline bool
225lex_mode_push_string_eof(pm_parser_t *parser) {
226 return lex_mode_push_string(parser, false, false, '\0', '\0');
227}
228
234static void
235lex_mode_pop(pm_parser_t *parser) {
236 if (parser->lex_modes.index == 0) {
237 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239 parser->lex_modes.index--;
240 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241 } else {
242 parser->lex_modes.index--;
243 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244 xfree(parser->lex_modes.current);
245 parser->lex_modes.current = prev;
246 }
247}
248
252static inline bool
253lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254 return parser->lex_state & state;
255}
256
257typedef enum {
258 PM_IGNORED_NEWLINE_NONE = 0,
259 PM_IGNORED_NEWLINE_ALL,
260 PM_IGNORED_NEWLINE_PATTERN
261} pm_ignored_newline_type_t;
262
263static inline pm_ignored_newline_type_t
264lex_state_ignored_p(pm_parser_t *parser) {
265 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266
267 if (ignored) {
268 return PM_IGNORED_NEWLINE_ALL;
269 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270 return PM_IGNORED_NEWLINE_PATTERN;
271 } else {
272 return PM_IGNORED_NEWLINE_NONE;
273 }
274}
275
276static inline bool
277lex_state_beg_p(pm_parser_t *parser) {
278 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279}
280
281static inline bool
282lex_state_arg_p(pm_parser_t *parser) {
283 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284}
285
286static inline bool
287lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288 if (parser->current.end >= parser->end) {
289 return false;
290 }
291 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292}
293
294static inline bool
295lex_state_end_p(pm_parser_t *parser) {
296 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297}
298
302static inline bool
303lex_state_operator_p(pm_parser_t *parser) {
304 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305}
306
311static inline void
312lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313 parser->lex_state = state;
314}
315
316#ifndef PM_DEBUG_LOGGING
321#define PM_DEBUG_LOGGING 0
322#endif
323
324#if PM_DEBUG_LOGGING
325PRISM_ATTRIBUTE_UNUSED static void
326debug_state(pm_parser_t *parser) {
327 fprintf(stderr, "STATE: ");
328 bool first = true;
329
330 if (parser->lex_state == PM_LEX_STATE_NONE) {
331 fprintf(stderr, "NONE\n");
332 return;
333 }
334
335#define CHECK_STATE(state) \
336 if (parser->lex_state & state) { \
337 if (!first) fprintf(stderr, "|"); \
338 fprintf(stderr, "%s", #state); \
339 first = false; \
340 }
341
342 CHECK_STATE(PM_LEX_STATE_BEG)
343 CHECK_STATE(PM_LEX_STATE_END)
344 CHECK_STATE(PM_LEX_STATE_ENDARG)
345 CHECK_STATE(PM_LEX_STATE_ENDFN)
346 CHECK_STATE(PM_LEX_STATE_ARG)
347 CHECK_STATE(PM_LEX_STATE_CMDARG)
348 CHECK_STATE(PM_LEX_STATE_MID)
349 CHECK_STATE(PM_LEX_STATE_FNAME)
350 CHECK_STATE(PM_LEX_STATE_DOT)
351 CHECK_STATE(PM_LEX_STATE_CLASS)
352 CHECK_STATE(PM_LEX_STATE_LABEL)
353 CHECK_STATE(PM_LEX_STATE_LABELED)
354 CHECK_STATE(PM_LEX_STATE_FITEM)
355
356#undef CHECK_STATE
357
358 fprintf(stderr, "\n");
359}
360
361static void
362debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364 debug_state(parser);
365 lex_state_set(parser, state);
366 fprintf(stderr, "Now: ");
367 debug_state(parser);
368 fprintf(stderr, "\n");
369}
370
371#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372#endif
373
374/******************************************************************************/
375/* Command-line macro helpers */
376/******************************************************************************/
377
379#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380
382#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383
385#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386
388#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389
391#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392
394#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395
397#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398
399/******************************************************************************/
400/* Diagnostic-related functions */
401/******************************************************************************/
402
406static inline void
407pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409}
410
414#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415 pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416
421static inline void
422pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424}
425
430#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431 PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432
437static inline void
438pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440}
441
446#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447 PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448
453#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454 PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455
460static inline void
461pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463}
464
469static inline void
470pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471 pm_parser_err(parser, token->start, token->end, diag_id);
472}
473
478#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479 PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480
485#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486 PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487
491static inline void
492pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494}
495
500static inline void
501pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502 pm_parser_warn(parser, token->start, token->end, diag_id);
503}
504
509static inline void
510pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511 pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512}
513
517#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518 pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519
524#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525 PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526
531#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532 PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533
538#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539 PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540
546static void
547pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548 PM_PARSER_ERR_FORMAT(
549 parser,
550 ident_start,
551 ident_start + ident_length,
552 PM_ERR_HEREDOC_TERM,
553 (int) ident_length,
554 (const char *) ident_start
555 );
556}
557
558/******************************************************************************/
559/* Scope-related functions */
560/******************************************************************************/
561
565static bool
566pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568 if (scope == NULL) return false;
569
570 *scope = (pm_scope_t) {
571 .previous = parser->current_scope,
572 .locals = { 0 },
573 .parameters = PM_SCOPE_PARAMETERS_NONE,
574 .implicit_parameters = { 0 },
575 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576 .closed = closed
577 };
578
579 parser->current_scope = scope;
580 return true;
581}
582
587static bool
588pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589 pm_scope_t *scope = parser->current_scope;
590
591 do {
592 if (scope->previous == NULL) return true;
593 if (scope->closed) return false;
594 } while ((scope = scope->previous) != NULL);
595
596 assert(false && "unreachable");
597 return true;
598}
599
603static pm_scope_t *
604pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605 pm_scope_t *scope = parser->current_scope;
606
607 while (depth-- > 0) {
608 assert(scope != NULL);
609 scope = scope->previous;
610 }
611
612 return scope;
613}
614
615typedef enum {
616 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619} pm_scope_forwarding_param_check_result_t;
620
621static pm_scope_forwarding_param_check_result_t
622pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623 pm_scope_t *scope = parser->current_scope;
624 bool conflict = false;
625
626 while (scope != NULL) {
627 if (scope->parameters & mask) {
628 if (scope->closed) {
629 if (conflict) {
630 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631 } else {
632 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633 }
634 }
635
636 conflict = true;
637 }
638
639 if (scope->closed) break;
640 scope = scope->previous;
641 }
642
643 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644}
645
646static void
647pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650 // Pass.
651 break;
652 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654 break;
655 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657 break;
658 }
659}
660
661static void
662pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665 // Pass.
666 break;
667 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669 break;
670 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672 break;
673 }
674}
675
676static void
677pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680 // Pass.
681 break;
682 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683 // This shouldn't happen, because ... is not allowed in the
684 // declaration of blocks. If we get here, we assume we already have
685 // an error for this.
686 break;
687 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689 break;
690 }
691}
692
693static void
694pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697 // Pass.
698 break;
699 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701 break;
702 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704 break;
705 }
706}
707
712pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713 return parser->current_scope->shareable_constant;
714}
715
720static void
721pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722 pm_scope_t *scope = parser->current_scope;
723
724 do {
725 scope->shareable_constant = shareable_constant;
726 } while (!scope->closed && (scope = scope->previous) != NULL);
727}
728
729/******************************************************************************/
730/* Local variable-related functions */
731/******************************************************************************/
732
736#define PM_LOCALS_HASH_THRESHOLD 9
737
738static void
739pm_locals_free(pm_locals_t *locals) {
740 if (locals->capacity > 0) {
741 xfree(locals->locals);
742 }
743}
744
749static uint32_t
750pm_locals_hash(pm_constant_id_t name) {
751 name = ((name >> 16) ^ name) * 0x45d9f3b;
752 name = ((name >> 16) ^ name) * 0x45d9f3b;
753 name = (name >> 16) ^ name;
754 return name;
755}
756
761static void
762pm_locals_resize(pm_locals_t *locals) {
763 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764 assert(next_capacity > locals->capacity);
765
766 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767 if (next_locals == NULL) abort();
768
769 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770 if (locals->size > 0) {
771 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772 }
773 } else {
774 // If we just switched from a list to a hash, then we need to fill in
775 // the hash values of all of the locals.
776 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777 uint32_t mask = next_capacity - 1;
778
779 for (uint32_t index = 0; index < locals->capacity; index++) {
780 pm_local_t *local = &locals->locals[index];
781
782 if (local->name != PM_CONSTANT_ID_UNSET) {
783 if (hash_needed) local->hash = pm_locals_hash(local->name);
784
785 uint32_t hash = local->hash;
786 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787 next_locals[hash & mask] = *local;
788 }
789 }
790 }
791
792 pm_locals_free(locals);
793 locals->locals = next_locals;
794 locals->capacity = next_capacity;
795}
796
812static bool
813pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814 if (locals->size >= (locals->capacity / 4 * 3)) {
815 pm_locals_resize(locals);
816 }
817
818 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819 for (uint32_t index = 0; index < locals->capacity; index++) {
820 pm_local_t *local = &locals->locals[index];
821
822 if (local->name == PM_CONSTANT_ID_UNSET) {
823 *local = (pm_local_t) {
824 .name = name,
825 .location = { .start = start, .end = end },
826 .index = locals->size++,
827 .reads = reads,
828 .hash = 0
829 };
830 return true;
831 } else if (local->name == name) {
832 return false;
833 }
834 }
835 } else {
836 uint32_t mask = locals->capacity - 1;
837 uint32_t hash = pm_locals_hash(name);
838 uint32_t initial_hash = hash;
839
840 do {
841 pm_local_t *local = &locals->locals[hash & mask];
842
843 if (local->name == PM_CONSTANT_ID_UNSET) {
844 *local = (pm_local_t) {
845 .name = name,
846 .location = { .start = start, .end = end },
847 .index = locals->size++,
848 .reads = reads,
849 .hash = initial_hash
850 };
851 return true;
852 } else if (local->name == name) {
853 return false;
854 } else {
855 hash++;
856 }
857 } while ((hash & mask) != initial_hash);
858 }
859
860 assert(false && "unreachable");
861 return true;
862}
863
868static uint32_t
869pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871 for (uint32_t index = 0; index < locals->size; index++) {
872 pm_local_t *local = &locals->locals[index];
873 if (local->name == name) return index;
874 }
875 } else {
876 uint32_t mask = locals->capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash & mask;
879
880 do {
881 pm_local_t *local = &locals->locals[hash & mask];
882
883 if (local->name == PM_CONSTANT_ID_UNSET) {
884 return UINT32_MAX;
885 } else if (local->name == name) {
886 return hash & mask;
887 } else {
888 hash++;
889 }
890 } while ((hash & mask) != initial_hash);
891 }
892
893 return UINT32_MAX;
894}
895
900static void
901pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902 uint32_t index = pm_locals_find(locals, name);
903 assert(index != UINT32_MAX);
904
905 pm_local_t *local = &locals->locals[index];
906 assert(local->reads < UINT32_MAX);
907
908 local->reads++;
909}
910
915static void
916pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917 uint32_t index = pm_locals_find(locals, name);
918 assert(index != UINT32_MAX);
919
920 pm_local_t *local = &locals->locals[index];
921 assert(local->reads > 0);
922
923 local->reads--;
924}
925
929static uint32_t
930pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931 uint32_t index = pm_locals_find(locals, name);
932 assert(index != UINT32_MAX);
933
934 return locals->locals[index].reads;
935}
936
945static void
946pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
947 pm_constant_id_list_init_capacity(list, locals->size);
948
949 // If we're still below the threshold for switching to a hash, then we only
950 // need to loop over the locals until we hit the size because the locals are
951 // stored in a list.
952 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953
954 // We will only warn for unused variables if we're not at the top level, or
955 // if we're parsing a file outside of eval or -e.
956 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957
958 for (uint32_t index = 0; index < capacity; index++) {
959 pm_local_t *local = &locals->locals[index];
960
961 if (local->name != PM_CONSTANT_ID_UNSET) {
962 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963
964 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966
967 if (constant->length >= 1 && *constant->start != '_') {
968 PM_PARSER_WARN_FORMAT(
969 parser,
970 local->location.start,
971 local->location.end,
972 PM_WARN_UNUSED_LOCAL_VARIABLE,
973 (int) constant->length,
974 (const char *) constant->start
975 );
976 }
977 }
978 }
979 }
980}
981
982/******************************************************************************/
983/* Node-related functions */
984/******************************************************************************/
985
989static inline pm_constant_id_t
990pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992}
993
997static inline pm_constant_id_t
998pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000}
1001
1005static inline pm_constant_id_t
1006pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008}
1009
1013static inline pm_constant_id_t
1014pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015 return pm_parser_constant_id_location(parser, token->start, token->end);
1016}
1017
1022static inline pm_constant_id_t
1023pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025}
1026
1032static pm_node_t *
1033pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034 pm_node_t *void_node = NULL;
1035
1036 while (node != NULL) {
1037 switch (PM_NODE_TYPE(node)) {
1038 case PM_RETURN_NODE:
1039 case PM_BREAK_NODE:
1040 case PM_NEXT_NODE:
1041 case PM_REDO_NODE:
1042 case PM_RETRY_NODE:
1044 return void_node != NULL ? void_node : node;
1046 return NULL;
1047 case PM_BEGIN_NODE: {
1048 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049
1050 if (cast->ensure_clause != NULL) {
1051 if (cast->rescue_clause != NULL) {
1052 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053 if (vn != NULL) return vn;
1054 }
1055
1056 if (cast->statements != NULL) {
1057 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058 if (vn != NULL) return vn;
1059 }
1060
1061 node = (pm_node_t *) cast->ensure_clause;
1062 } else if (cast->rescue_clause != NULL) {
1063 if (cast->statements == NULL) return NULL;
1064
1065 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066 if (vn == NULL) return NULL;
1067 if (void_node == NULL) void_node = vn;
1068
1069 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071 if (vn == NULL) {
1072 void_node = NULL;
1073 break;
1074 }
1075 if (void_node == NULL) {
1076 void_node = vn;
1077 }
1078 }
1079
1080 if (cast->else_clause != NULL) {
1081 node = (pm_node_t *) cast->else_clause;
1082 } else {
1083 return void_node;
1084 }
1085 } else {
1086 node = (pm_node_t *) cast->statements;
1087 }
1088
1089 break;
1090 }
1091 case PM_ENSURE_NODE: {
1092 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093 node = (pm_node_t *) cast->statements;
1094 break;
1095 }
1096 case PM_PARENTHESES_NODE: {
1098 node = (pm_node_t *) cast->body;
1099 break;
1100 }
1101 case PM_STATEMENTS_NODE: {
1103 node = cast->body.nodes[cast->body.size - 1];
1104 break;
1105 }
1106 case PM_IF_NODE: {
1107 pm_if_node_t *cast = (pm_if_node_t *) node;
1108 if (cast->statements == NULL || cast->subsequent == NULL) {
1109 return NULL;
1110 }
1111 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112 if (vn == NULL) {
1113 return NULL;
1114 }
1115 if (void_node == NULL) {
1116 void_node = vn;
1117 }
1118 node = cast->subsequent;
1119 break;
1120 }
1121 case PM_UNLESS_NODE: {
1122 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123 if (cast->statements == NULL || cast->else_clause == NULL) {
1124 return NULL;
1125 }
1126 pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127 if (vn == NULL) {
1128 return NULL;
1129 }
1130 if (void_node == NULL) {
1131 void_node = vn;
1132 }
1133 node = (pm_node_t *) cast->else_clause;
1134 break;
1135 }
1136 case PM_ELSE_NODE: {
1137 pm_else_node_t *cast = (pm_else_node_t *) node;
1138 node = (pm_node_t *) cast->statements;
1139 break;
1140 }
1141 case PM_AND_NODE: {
1142 pm_and_node_t *cast = (pm_and_node_t *) node;
1143 node = cast->left;
1144 break;
1145 }
1146 case PM_OR_NODE: {
1147 pm_or_node_t *cast = (pm_or_node_t *) node;
1148 node = cast->left;
1149 break;
1150 }
1153
1154 pm_scope_t *scope = parser->current_scope;
1155 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156
1157 pm_locals_read(&scope->locals, cast->name);
1158 return NULL;
1159 }
1160 default:
1161 return NULL;
1162 }
1163 }
1164
1165 return NULL;
1166}
1167
1168static inline void
1169pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170 pm_node_t *void_node = pm_check_value_expression(parser, node);
1171 if (void_node != NULL) {
1172 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173 }
1174}
1175
1179static void
1180pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181 const char *type = NULL;
1182 int length = 0;
1183
1184 switch (PM_NODE_TYPE(node)) {
1191 type = "a variable";
1192 length = 10;
1193 break;
1194 case PM_CALL_NODE: {
1195 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196 if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197
1198 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199 switch (message->length) {
1200 case 1:
1201 switch (message->start[0]) {
1202 case '+':
1203 case '-':
1204 case '*':
1205 case '/':
1206 case '%':
1207 case '|':
1208 case '^':
1209 case '&':
1210 case '>':
1211 case '<':
1212 type = (const char *) message->start;
1213 length = 1;
1214 break;
1215 }
1216 break;
1217 case 2:
1218 switch (message->start[1]) {
1219 case '=':
1220 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221 type = (const char *) message->start;
1222 length = 2;
1223 }
1224 break;
1225 case '@':
1226 if (message->start[0] == '+' || message->start[0] == '-') {
1227 type = (const char *) message->start;
1228 length = 2;
1229 }
1230 break;
1231 case '*':
1232 if (message->start[0] == '*') {
1233 type = (const char *) message->start;
1234 length = 2;
1235 }
1236 break;
1237 }
1238 break;
1239 case 3:
1240 if (memcmp(message->start, "<=>", 3) == 0) {
1241 type = "<=>";
1242 length = 3;
1243 }
1244 break;
1245 }
1246
1247 break;
1248 }
1250 type = "::";
1251 length = 2;
1252 break;
1254 type = "a constant";
1255 length = 10;
1256 break;
1257 case PM_DEFINED_NODE:
1258 type = "defined?";
1259 length = 8;
1260 break;
1261 case PM_FALSE_NODE:
1262 type = "false";
1263 length = 5;
1264 break;
1265 case PM_FLOAT_NODE:
1266 case PM_IMAGINARY_NODE:
1267 case PM_INTEGER_NODE:
1270 case PM_RATIONAL_NODE:
1275 case PM_STRING_NODE:
1276 case PM_SYMBOL_NODE:
1277 type = "a literal";
1278 length = 9;
1279 break;
1280 case PM_NIL_NODE:
1281 type = "nil";
1282 length = 3;
1283 break;
1284 case PM_RANGE_NODE: {
1285 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286
1288 type = "...";
1289 length = 3;
1290 } else {
1291 type = "..";
1292 length = 2;
1293 }
1294
1295 break;
1296 }
1297 case PM_SELF_NODE:
1298 type = "self";
1299 length = 4;
1300 break;
1301 case PM_TRUE_NODE:
1302 type = "true";
1303 length = 4;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 if (type != NULL) {
1310 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311 }
1312}
1313
1318static void
1319pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320 assert(node->body.size > 0);
1321 const size_t size = node->body.size - (last_value ? 1 : 0);
1322 for (size_t index = 0; index < size; index++) {
1323 pm_void_statement_check(parser, node->body.nodes[index]);
1324 }
1325}
1326
1332typedef enum {
1333 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336} pm_conditional_predicate_type_t;
1337
1341static void
1342pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343 switch (type) {
1344 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346 break;
1347 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349 break;
1350 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351 break;
1352 }
1353}
1354
1359static bool
1360pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361 switch (PM_NODE_TYPE(node)) {
1362 case PM_ARRAY_NODE: {
1363 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364
1365 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366 for (size_t index = 0; index < cast->elements.size; index++) {
1367 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368 }
1369
1370 return true;
1371 }
1372 case PM_HASH_NODE: {
1373 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374
1375 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376 for (size_t index = 0; index < cast->elements.size; index++) {
1377 const pm_node_t *element = cast->elements.nodes[index];
1378 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379
1380 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382 }
1383
1384 return true;
1385 }
1386 case PM_FALSE_NODE:
1387 case PM_FLOAT_NODE:
1388 case PM_IMAGINARY_NODE:
1389 case PM_INTEGER_NODE:
1390 case PM_NIL_NODE:
1391 case PM_RATIONAL_NODE:
1396 case PM_STRING_NODE:
1397 case PM_SYMBOL_NODE:
1398 case PM_TRUE_NODE:
1399 return true;
1400 default:
1401 return false;
1402 }
1403}
1404
1409static inline void
1410pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412 pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413 }
1414}
1415
1428static void
1429pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430 switch (PM_NODE_TYPE(node)) {
1431 case PM_AND_NODE: {
1432 pm_and_node_t *cast = (pm_and_node_t *) node;
1433 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435 break;
1436 }
1437 case PM_OR_NODE: {
1438 pm_or_node_t *cast = (pm_or_node_t *) node;
1439 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441 break;
1442 }
1443 case PM_PARENTHESES_NODE: {
1445
1446 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449 }
1450
1451 break;
1452 }
1453 case PM_BEGIN_NODE: {
1454 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455 if (cast->statements != NULL) {
1456 pm_statements_node_t *statements = cast->statements;
1457 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458 }
1459 break;
1460 }
1461 case PM_RANGE_NODE: {
1462 pm_range_node_t *cast = (pm_range_node_t *) node;
1463
1464 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466
1467 // Here we change the range node into a flip flop node. We can do
1468 // this since the nodes are exactly the same except for the type.
1469 // We're only asserting against the size when we should probably
1470 // assert against the entire layout, but we'll assume tests will
1471 // catch this.
1472 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473 node->type = PM_FLIP_FLOP_NODE;
1474
1475 break;
1476 }
1478 // Here we change the regular expression node into a match last line
1479 // node. We can do this since the nodes are exactly the same except
1480 // for the type.
1483
1484 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486 }
1487
1488 break;
1490 // Here we change the interpolated regular expression node into an
1491 // interpolated match last line node. We can do this since the nodes
1492 // are exactly the same except for the type.
1495
1496 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498 }
1499
1500 break;
1501 case PM_INTEGER_NODE:
1502 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505 }
1506 } else {
1507 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508 }
1509 break;
1510 case PM_STRING_NODE:
1513 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514 break;
1515 case PM_SYMBOL_NODE:
1517 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518 break;
1521 case PM_FLOAT_NODE:
1522 case PM_RATIONAL_NODE:
1523 case PM_IMAGINARY_NODE:
1524 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525 break;
1527 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528 break;
1530 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531 break;
1533 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534 break;
1536 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537 break;
1539 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540 break;
1542 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543 break;
1544 default:
1545 break;
1546 }
1547}
1548
1557static inline pm_token_t
1558not_provided(pm_parser_t *parser) {
1559 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560}
1561
1562#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568
1591
1595static inline const uint8_t *
1596pm_arguments_end(pm_arguments_t *arguments) {
1597 if (arguments->block != NULL) {
1598 const uint8_t *end = arguments->block->location.end;
1599 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600 end = arguments->closing_loc.end;
1601 }
1602 return end;
1603 }
1604 if (arguments->closing_loc.start != NULL) {
1605 return arguments->closing_loc.end;
1606 }
1607 if (arguments->arguments != NULL) {
1608 return arguments->arguments->base.location.end;
1609 }
1610 return arguments->closing_loc.end;
1611}
1612
1617static void
1618pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619 // First, check that we have arguments and that we don't have a closing
1620 // location for them.
1621 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622 return;
1623 }
1624
1625 // Next, check that we don't have a single parentheses argument. This would
1626 // look like:
1627 //
1628 // foo (1) {}
1629 //
1630 // In this case, it's actually okay for the block to be attached to the
1631 // call, even though it looks like it's attached to the argument.
1632 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633 return;
1634 }
1635
1636 // If we didn't hit a case before this check, then at this point we need to
1637 // add a syntax error.
1638 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639}
1640
1641/******************************************************************************/
1642/* Basic character checks */
1643/******************************************************************************/
1644
1651static inline size_t
1652char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
1653 if (parser->encoding_changed) {
1654 size_t width;
1655 if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
1656 return width;
1657 } else if (*b == '_') {
1658 return 1;
1659 } else if (*b >= 0x80) {
1660 return parser->encoding->char_width(b, parser->end - b);
1661 } else {
1662 return 0;
1663 }
1664 } else if (*b < 0x80) {
1665 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1666 } else {
1667 return pm_encoding_utf_8_char_width(b, parser->end - b);
1668 }
1669}
1670
1675static inline size_t
1676char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1677 if (*b < 0x80) {
1678 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1679 } else {
1680 return pm_encoding_utf_8_char_width(b, end - b);
1681 }
1682}
1683
1689static inline size_t
1690char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1691 if (parser->encoding_changed) {
1692 size_t width;
1693 if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
1694 return width;
1695 } else if (*b == '_') {
1696 return 1;
1697 } else if (*b >= 0x80) {
1698 return parser->encoding->char_width(b, parser->end - b);
1699 } else {
1700 return 0;
1701 }
1702 }
1703 return char_is_identifier_utf8(b, parser->end);
1704}
1705
1706// Here we're defining a perfect hash for the characters that are allowed in
1707// global names. This is used to quickly check the next character after a $ to
1708// see if it's a valid character for a global name.
1709#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1710#define PUNCT(idx) ( \
1711 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1712 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1713 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1714 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1715 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1716 BIT('0', idx))
1717
1718const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1719
1720#undef BIT
1721#undef PUNCT
1722
1723static inline bool
1724char_is_global_name_punctuation(const uint8_t b) {
1725 const unsigned int i = (const unsigned int) b;
1726 if (i <= 0x20 || 0x7e < i) return false;
1727
1728 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1729}
1730
1731static inline bool
1732token_is_setter_name(pm_token_t *token) {
1733 return (
1734 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1735 ((token->type == PM_TOKEN_IDENTIFIER) &&
1736 (token->end - token->start >= 2) &&
1737 (token->end[-1] == '='))
1738 );
1739}
1740
1744static bool
1745pm_local_is_keyword(const char *source, size_t length) {
1746#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1747
1748 switch (length) {
1749 case 2:
1750 switch (source[0]) {
1751 case 'd': KEYWORD("do"); return false;
1752 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1753 case 'o': KEYWORD("or"); return false;
1754 default: return false;
1755 }
1756 case 3:
1757 switch (source[0]) {
1758 case 'a': KEYWORD("and"); return false;
1759 case 'd': KEYWORD("def"); return false;
1760 case 'e': KEYWORD("end"); return false;
1761 case 'f': KEYWORD("for"); return false;
1762 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1763 default: return false;
1764 }
1765 case 4:
1766 switch (source[0]) {
1767 case 'c': KEYWORD("case"); return false;
1768 case 'e': KEYWORD("else"); return false;
1769 case 'n': KEYWORD("next"); return false;
1770 case 'r': KEYWORD("redo"); return false;
1771 case 's': KEYWORD("self"); return false;
1772 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1773 case 'w': KEYWORD("when"); return false;
1774 default: return false;
1775 }
1776 case 5:
1777 switch (source[0]) {
1778 case 'a': KEYWORD("alias"); return false;
1779 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1780 case 'c': KEYWORD("class"); return false;
1781 case 'e': KEYWORD("elsif"); return false;
1782 case 'f': KEYWORD("false"); return false;
1783 case 'r': KEYWORD("retry"); return false;
1784 case 's': KEYWORD("super"); return false;
1785 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1786 case 'w': KEYWORD("while"); return false;
1787 case 'y': KEYWORD("yield"); return false;
1788 default: return false;
1789 }
1790 case 6:
1791 switch (source[0]) {
1792 case 'e': KEYWORD("ensure"); return false;
1793 case 'm': KEYWORD("module"); return false;
1794 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1795 case 'u': KEYWORD("unless"); return false;
1796 default: return false;
1797 }
1798 case 8:
1799 KEYWORD("__LINE__");
1800 KEYWORD("__FILE__");
1801 return false;
1802 case 12:
1803 KEYWORD("__ENCODING__");
1804 return false;
1805 default:
1806 return false;
1807 }
1808
1809#undef KEYWORD
1810}
1811
1812/******************************************************************************/
1813/* Node flag handling functions */
1814/******************************************************************************/
1815
1819static inline void
1820pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1821 node->flags |= flag;
1822}
1823
1827static inline void
1828pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1829 node->flags &= (pm_node_flags_t) ~flag;
1830}
1831
1835static inline void
1836pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1837 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1838 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1839 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1840 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1841 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1842 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1843 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1844 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1845
1846 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1847}
1848
1849/******************************************************************************/
1850/* Node creation functions */
1851/******************************************************************************/
1852
1858#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1859
1863static inline pm_node_flags_t
1864pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1865 pm_node_flags_t flags = 0;
1866
1867 if (closing->type == PM_TOKEN_REGEXP_END) {
1868 pm_buffer_t unknown_flags = { 0 };
1869
1870 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1871 switch (*flag) {
1872 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1873 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1874 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1875 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1876
1877 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1878 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1879 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1880 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1881
1882 default: pm_buffer_append_byte(&unknown_flags, *flag);
1883 }
1884 }
1885
1886 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1887 if (unknown_flags_length != 0) {
1888 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1889 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1890 }
1891 pm_buffer_free(&unknown_flags);
1892 }
1893
1894 return flags;
1895}
1896
1897#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1898
1899static pm_statements_node_t *
1900pm_statements_node_create(pm_parser_t *parser);
1901
1902static void
1903pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1904
1905static size_t
1906pm_statements_node_body_length(pm_statements_node_t *node);
1907
1912static inline void *
1913pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1914 void *memory = xcalloc(1, size);
1915 if (memory == NULL) {
1916 fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1917 abort();
1918 }
1919 return memory;
1920}
1921
1922#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1923#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1924
1928static pm_missing_node_t *
1929pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1930 pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1931
1932 *node = (pm_missing_node_t) {{
1933 .type = PM_MISSING_NODE,
1934 .node_id = PM_NODE_IDENTIFY(parser),
1935 .location = { .start = start, .end = end }
1936 }};
1937
1938 return node;
1939}
1940
1944static pm_alias_global_variable_node_t *
1945pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1946 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1947 pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1948
1949 *node = (pm_alias_global_variable_node_t) {
1950 {
1951 .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1952 .node_id = PM_NODE_IDENTIFY(parser),
1953 .location = {
1954 .start = keyword->start,
1955 .end = old_name->location.end
1956 },
1957 },
1958 .new_name = new_name,
1959 .old_name = old_name,
1960 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1961 };
1962
1963 return node;
1964}
1965
1969static pm_alias_method_node_t *
1970pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1971 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1972 pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1973
1974 *node = (pm_alias_method_node_t) {
1975 {
1976 .type = PM_ALIAS_METHOD_NODE,
1977 .node_id = PM_NODE_IDENTIFY(parser),
1978 .location = {
1979 .start = keyword->start,
1980 .end = old_name->location.end
1981 },
1982 },
1983 .new_name = new_name,
1984 .old_name = old_name,
1985 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1986 };
1987
1988 return node;
1989}
1990
1994static pm_alternation_pattern_node_t *
1995pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
1996 pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
1997
1998 *node = (pm_alternation_pattern_node_t) {
1999 {
2000 .type = PM_ALTERNATION_PATTERN_NODE,
2001 .node_id = PM_NODE_IDENTIFY(parser),
2002 .location = {
2003 .start = left->location.start,
2004 .end = right->location.end
2005 },
2006 },
2007 .left = left,
2008 .right = right,
2009 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2010 };
2011
2012 return node;
2013}
2014
2018static pm_and_node_t *
2019pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2020 pm_assert_value_expression(parser, left);
2021
2022 pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2023
2024 *node = (pm_and_node_t) {
2025 {
2026 .type = PM_AND_NODE,
2027 .node_id = PM_NODE_IDENTIFY(parser),
2028 .location = {
2029 .start = left->location.start,
2030 .end = right->location.end
2031 },
2032 },
2033 .left = left,
2034 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2035 .right = right
2036 };
2037
2038 return node;
2039}
2040
2044static pm_arguments_node_t *
2045pm_arguments_node_create(pm_parser_t *parser) {
2046 pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2047
2048 *node = (pm_arguments_node_t) {
2049 {
2050 .type = PM_ARGUMENTS_NODE,
2051 .node_id = PM_NODE_IDENTIFY(parser),
2052 .location = PM_LOCATION_NULL_VALUE(parser)
2053 },
2054 .arguments = { 0 }
2055 };
2056
2057 return node;
2058}
2059
2063static size_t
2064pm_arguments_node_size(pm_arguments_node_t *node) {
2065 return node->arguments.size;
2066}
2067
2071static void
2072pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2073 if (pm_arguments_node_size(node) == 0) {
2074 node->base.location.start = argument->location.start;
2075 }
2076
2077 node->base.location.end = argument->location.end;
2078 pm_node_list_append(&node->arguments, argument);
2079
2080 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2081 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2082 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2083 } else {
2084 pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2085 }
2086 }
2087}
2088
2092static pm_array_node_t *
2093pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2094 pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2095
2096 *node = (pm_array_node_t) {
2097 {
2098 .type = PM_ARRAY_NODE,
2099 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2100 .node_id = PM_NODE_IDENTIFY(parser),
2101 .location = PM_LOCATION_TOKEN_VALUE(opening)
2102 },
2103 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2104 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2105 .elements = { 0 }
2106 };
2107
2108 return node;
2109}
2110
2114static inline void
2115pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2116 if (!node->elements.size && !node->opening_loc.start) {
2117 node->base.location.start = element->location.start;
2118 }
2119
2120 pm_node_list_append(&node->elements, element);
2121 node->base.location.end = element->location.end;
2122
2123 // If the element is not a static literal, then the array is not a static
2124 // literal. Turn that flag off.
2125 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2126 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2127 }
2128
2129 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2130 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2131 }
2132}
2133
2137static void
2138pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2139 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2140 node->base.location.end = closing->end;
2141 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2142}
2143
2148static pm_array_pattern_node_t *
2149pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2150 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2151
2152 *node = (pm_array_pattern_node_t) {
2153 {
2154 .type = PM_ARRAY_PATTERN_NODE,
2155 .node_id = PM_NODE_IDENTIFY(parser),
2156 .location = {
2157 .start = nodes->nodes[0]->location.start,
2158 .end = nodes->nodes[nodes->size - 1]->location.end
2159 },
2160 },
2161 .constant = NULL,
2162 .rest = NULL,
2163 .requireds = { 0 },
2164 .posts = { 0 },
2165 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2166 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2167 };
2168
2169 // For now we're going to just copy over each pointer manually. This could be
2170 // much more efficient, as we could instead resize the node list.
2171 bool found_rest = false;
2172 pm_node_t *child;
2173
2174 PM_NODE_LIST_FOREACH(nodes, index, child) {
2175 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2176 node->rest = child;
2177 found_rest = true;
2178 } else if (found_rest) {
2179 pm_node_list_append(&node->posts, child);
2180 } else {
2181 pm_node_list_append(&node->requireds, child);
2182 }
2183 }
2184
2185 return node;
2186}
2187
2191static pm_array_pattern_node_t *
2192pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2193 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2194
2195 *node = (pm_array_pattern_node_t) {
2196 {
2197 .type = PM_ARRAY_PATTERN_NODE,
2198 .node_id = PM_NODE_IDENTIFY(parser),
2199 .location = rest->location,
2200 },
2201 .constant = NULL,
2202 .rest = rest,
2203 .requireds = { 0 },
2204 .posts = { 0 },
2205 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2206 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2207 };
2208
2209 return node;
2210}
2211
2216static pm_array_pattern_node_t *
2217pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2218 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2219
2220 *node = (pm_array_pattern_node_t) {
2221 {
2222 .type = PM_ARRAY_PATTERN_NODE,
2223 .node_id = PM_NODE_IDENTIFY(parser),
2224 .location = {
2225 .start = constant->location.start,
2226 .end = closing->end
2227 },
2228 },
2229 .constant = constant,
2230 .rest = NULL,
2231 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2232 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2233 .requireds = { 0 },
2234 .posts = { 0 }
2235 };
2236
2237 return node;
2238}
2239
2244static pm_array_pattern_node_t *
2245pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2246 pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2247
2248 *node = (pm_array_pattern_node_t) {
2249 {
2250 .type = PM_ARRAY_PATTERN_NODE,
2251 .node_id = PM_NODE_IDENTIFY(parser),
2252 .location = {
2253 .start = opening->start,
2254 .end = closing->end
2255 },
2256 },
2257 .constant = NULL,
2258 .rest = NULL,
2259 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2260 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2261 .requireds = { 0 },
2262 .posts = { 0 }
2263 };
2264
2265 return node;
2266}
2267
2268static inline void
2269pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2270 pm_node_list_append(&node->requireds, inner);
2271}
2272
2276static pm_assoc_node_t *
2277pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2278 pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2279 const uint8_t *end;
2280
2281 if (value != NULL && value->location.end > key->location.end) {
2282 end = value->location.end;
2283 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2284 end = operator->end;
2285 } else {
2286 end = key->location.end;
2287 }
2288
2289 // Hash string keys will be frozen, so we can mark them as frozen here so
2290 // that the compiler picks them up and also when we check for static literal
2291 // on the keys it gets factored in.
2292 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2293 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2294 }
2295
2296 // If the key and value of this assoc node are both static literals, then
2297 // we can mark this node as a static literal.
2298 pm_node_flags_t flags = 0;
2299 if (
2300 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2301 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2302 ) {
2303 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2304 }
2305
2306 *node = (pm_assoc_node_t) {
2307 {
2308 .type = PM_ASSOC_NODE,
2309 .flags = flags,
2310 .node_id = PM_NODE_IDENTIFY(parser),
2311 .location = {
2312 .start = key->location.start,
2313 .end = end
2314 },
2315 },
2316 .key = key,
2317 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2318 .value = value
2319 };
2320
2321 return node;
2322}
2323
2327static pm_assoc_splat_node_t *
2328pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2329 assert(operator->type == PM_TOKEN_USTAR_STAR);
2330 pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2331
2332 *node = (pm_assoc_splat_node_t) {
2333 {
2334 .type = PM_ASSOC_SPLAT_NODE,
2335 .node_id = PM_NODE_IDENTIFY(parser),
2336 .location = {
2337 .start = operator->start,
2338 .end = value == NULL ? operator->end : value->location.end
2339 },
2340 },
2341 .value = value,
2342 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2343 };
2344
2345 return node;
2346}
2347
2351static pm_back_reference_read_node_t *
2352pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2353 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2354 pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2355
2356 *node = (pm_back_reference_read_node_t) {
2357 {
2358 .type = PM_BACK_REFERENCE_READ_NODE,
2359 .node_id = PM_NODE_IDENTIFY(parser),
2360 .location = PM_LOCATION_TOKEN_VALUE(name),
2361 },
2362 .name = pm_parser_constant_id_token(parser, name)
2363 };
2364
2365 return node;
2366}
2367
2371static pm_begin_node_t *
2372pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2373 pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2374
2375 *node = (pm_begin_node_t) {
2376 {
2377 .type = PM_BEGIN_NODE,
2378 .node_id = PM_NODE_IDENTIFY(parser),
2379 .location = {
2380 .start = begin_keyword->start,
2381 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2382 },
2383 },
2384 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2385 .statements = statements,
2386 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2387 };
2388
2389 return node;
2390}
2391
2395static void
2396pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2397 // If the begin keyword doesn't exist, we set the start on the begin_node
2398 if (!node->begin_keyword_loc.start) {
2399 node->base.location.start = rescue_clause->base.location.start;
2400 }
2401 node->base.location.end = rescue_clause->base.location.end;
2402 node->rescue_clause = rescue_clause;
2403}
2404
2408static void
2409pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2410 node->base.location.end = else_clause->base.location.end;
2411 node->else_clause = else_clause;
2412}
2413
2417static void
2418pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2419 node->base.location.end = ensure_clause->base.location.end;
2420 node->ensure_clause = ensure_clause;
2421}
2422
2426static void
2427pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2428 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2429
2430 node->base.location.end = end_keyword->end;
2431 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2432}
2433
2437static pm_block_argument_node_t *
2438pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2439 pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2440
2441 *node = (pm_block_argument_node_t) {
2442 {
2443 .type = PM_BLOCK_ARGUMENT_NODE,
2444 .node_id = PM_NODE_IDENTIFY(parser),
2445 .location = {
2446 .start = operator->start,
2447 .end = expression == NULL ? operator->end : expression->location.end
2448 },
2449 },
2450 .expression = expression,
2451 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2452 };
2453
2454 return node;
2455}
2456
2460static pm_block_node_t *
2461pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2462 pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2463
2464 *node = (pm_block_node_t) {
2465 {
2466 .type = PM_BLOCK_NODE,
2467 .node_id = PM_NODE_IDENTIFY(parser),
2468 .location = { .start = opening->start, .end = closing->end },
2469 },
2470 .locals = *locals,
2471 .parameters = parameters,
2472 .body = body,
2473 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2474 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2475 };
2476
2477 return node;
2478}
2479
2483static pm_block_parameter_node_t *
2484pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2485 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2486 pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2487
2488 *node = (pm_block_parameter_node_t) {
2489 {
2490 .type = PM_BLOCK_PARAMETER_NODE,
2491 .node_id = PM_NODE_IDENTIFY(parser),
2492 .location = {
2493 .start = operator->start,
2494 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2495 },
2496 },
2497 .name = pm_parser_optional_constant_id_token(parser, name),
2498 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2499 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2500 };
2501
2502 return node;
2503}
2504
2508static pm_block_parameters_node_t *
2509pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2510 pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2511
2512 const uint8_t *start;
2513 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2514 start = opening->start;
2515 } else if (parameters != NULL) {
2516 start = parameters->base.location.start;
2517 } else {
2518 start = NULL;
2519 }
2520
2521 const uint8_t *end;
2522 if (parameters != NULL) {
2523 end = parameters->base.location.end;
2524 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2525 end = opening->end;
2526 } else {
2527 end = NULL;
2528 }
2529
2530 *node = (pm_block_parameters_node_t) {
2531 {
2532 .type = PM_BLOCK_PARAMETERS_NODE,
2533 .node_id = PM_NODE_IDENTIFY(parser),
2534 .location = {
2535 .start = start,
2536 .end = end
2537 }
2538 },
2539 .parameters = parameters,
2540 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2541 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2542 .locals = { 0 }
2543 };
2544
2545 return node;
2546}
2547
2551static void
2552pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2553 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2554
2555 node->base.location.end = closing->end;
2556 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2557}
2558
2562static pm_block_local_variable_node_t *
2563pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2564 pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2565
2566 *node = (pm_block_local_variable_node_t) {
2567 {
2568 .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2569 .node_id = PM_NODE_IDENTIFY(parser),
2570 .location = PM_LOCATION_TOKEN_VALUE(name),
2571 },
2572 .name = pm_parser_constant_id_token(parser, name)
2573 };
2574
2575 return node;
2576}
2577
2581static void
2582pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2583 pm_node_list_append(&node->locals, (pm_node_t *) local);
2584
2585 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2586 node->base.location.end = local->base.location.end;
2587}
2588
2592static pm_break_node_t *
2593pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2594 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2595 pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2596
2597 *node = (pm_break_node_t) {
2598 {
2599 .type = PM_BREAK_NODE,
2600 .node_id = PM_NODE_IDENTIFY(parser),
2601 .location = {
2602 .start = keyword->start,
2603 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2604 },
2605 },
2606 .arguments = arguments,
2607 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2608 };
2609
2610 return node;
2611}
2612
2613// There are certain flags that we want to use internally but don't want to
2614// expose because they are not relevant beyond parsing. Therefore we'll define
2615// them here and not define them in config.yml/a header file.
2616static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2617static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2618static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2619static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2620
2626static pm_call_node_t *
2627pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2628 pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2629
2630 *node = (pm_call_node_t) {
2631 {
2632 .type = PM_CALL_NODE,
2633 .flags = flags,
2634 .node_id = PM_NODE_IDENTIFY(parser),
2635 .location = PM_LOCATION_NULL_VALUE(parser),
2636 },
2637 .receiver = NULL,
2638 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2639 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2640 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2641 .arguments = NULL,
2642 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2643 .block = NULL,
2644 .name = 0
2645 };
2646
2647 return node;
2648}
2649
2654static inline pm_node_flags_t
2655pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2656 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2657}
2658
2663static pm_call_node_t *
2664pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2665 pm_assert_value_expression(parser, receiver);
2666
2667 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2668 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2669 flags |= PM_CALL_NODE_FLAGS_INDEX;
2670 }
2671
2672 pm_call_node_t *node = pm_call_node_create(parser, flags);
2673
2674 node->base.location.start = receiver->location.start;
2675 node->base.location.end = pm_arguments_end(arguments);
2676
2677 node->receiver = receiver;
2678 node->message_loc.start = arguments->opening_loc.start;
2679 node->message_loc.end = arguments->closing_loc.end;
2680
2681 node->opening_loc = arguments->opening_loc;
2682 node->arguments = arguments->arguments;
2683 node->closing_loc = arguments->closing_loc;
2684 node->block = arguments->block;
2685
2686 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2687 return node;
2688}
2689
2693static pm_call_node_t *
2694pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2695 pm_assert_value_expression(parser, receiver);
2696 pm_assert_value_expression(parser, argument);
2697
2698 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2699
2700 node->base.location.start = MIN(receiver->location.start, argument->location.start);
2701 node->base.location.end = MAX(receiver->location.end, argument->location.end);
2702
2703 node->receiver = receiver;
2704 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2705
2706 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2707 pm_arguments_node_arguments_append(arguments, argument);
2708 node->arguments = arguments;
2709
2710 node->name = pm_parser_constant_id_token(parser, operator);
2711 return node;
2712}
2713
2717static pm_call_node_t *
2718pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2719 pm_assert_value_expression(parser, receiver);
2720
2721 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2722
2723 node->base.location.start = receiver->location.start;
2724 const uint8_t *end = pm_arguments_end(arguments);
2725 if (end == NULL) {
2726 end = message->end;
2727 }
2728 node->base.location.end = end;
2729
2730 node->receiver = receiver;
2731 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2732 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2733 node->opening_loc = arguments->opening_loc;
2734 node->arguments = arguments->arguments;
2735 node->closing_loc = arguments->closing_loc;
2736 node->block = arguments->block;
2737
2738 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2739 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2740 }
2741
2742 node->name = pm_parser_constant_id_token(parser, message);
2743 return node;
2744}
2745
2749static pm_call_node_t *
2750pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2751 pm_call_node_t *node = pm_call_node_create(parser, 0);
2752 node->base.location.start = parser->start;
2753 node->base.location.end = parser->end;
2754
2755 node->receiver = receiver;
2756 node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2757 node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2758 node->arguments = arguments;
2759
2760 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2761 return node;
2762}
2763
2768static pm_call_node_t *
2769pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2770 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2771
2772 node->base.location.start = message->start;
2773 node->base.location.end = pm_arguments_end(arguments);
2774
2775 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2776 node->opening_loc = arguments->opening_loc;
2777 node->arguments = arguments->arguments;
2778 node->closing_loc = arguments->closing_loc;
2779 node->block = arguments->block;
2780
2781 node->name = pm_parser_constant_id_token(parser, message);
2782 return node;
2783}
2784
2789static pm_call_node_t *
2790pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2791 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2792
2793 node->base.location = PM_LOCATION_NULL_VALUE(parser);
2794 node->arguments = arguments;
2795
2796 node->name = name;
2797 return node;
2798}
2799
2803static pm_call_node_t *
2804pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2805 pm_assert_value_expression(parser, receiver);
2806 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2807
2808 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2809
2810 node->base.location.start = message->start;
2811 if (arguments->closing_loc.start != NULL) {
2812 node->base.location.end = arguments->closing_loc.end;
2813 } else {
2814 assert(receiver != NULL);
2815 node->base.location.end = receiver->location.end;
2816 }
2817
2818 node->receiver = receiver;
2819 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2820 node->opening_loc = arguments->opening_loc;
2821 node->arguments = arguments->arguments;
2822 node->closing_loc = arguments->closing_loc;
2823
2824 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2825 return node;
2826}
2827
2831static pm_call_node_t *
2832pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2833 pm_assert_value_expression(parser, receiver);
2834
2835 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2836
2837 node->base.location.start = receiver->location.start;
2838 node->base.location.end = pm_arguments_end(arguments);
2839
2840 node->receiver = receiver;
2841 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2842 node->opening_loc = arguments->opening_loc;
2843 node->arguments = arguments->arguments;
2844 node->closing_loc = arguments->closing_loc;
2845 node->block = arguments->block;
2846
2847 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2848 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2849 }
2850
2851 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2852 return node;
2853}
2854
2858static pm_call_node_t *
2859pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2860 pm_assert_value_expression(parser, receiver);
2861
2862 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2863
2864 node->base.location.start = operator->start;
2865 node->base.location.end = receiver->location.end;
2866
2867 node->receiver = receiver;
2868 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2869
2870 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2871 return node;
2872}
2873
2878static pm_call_node_t *
2879pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2880 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2881
2882 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2883 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2884
2885 node->name = pm_parser_constant_id_token(parser, message);
2886 return node;
2887}
2888
2893static inline bool
2894pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2895 return (
2896 (node->message_loc.start != NULL) &&
2897 (node->message_loc.end[-1] != '!') &&
2898 (node->message_loc.end[-1] != '?') &&
2899 char_is_identifier_start(parser, node->message_loc.start) &&
2900 (node->opening_loc.start == NULL) &&
2901 (node->arguments == NULL) &&
2902 (node->block == NULL)
2903 );
2904}
2905
2909static void
2910pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2911 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2912
2913 if (write_constant->length > 0) {
2914 size_t length = write_constant->length - 1;
2915
2916 void *memory = xmalloc(length);
2917 memcpy(memory, write_constant->start, length);
2918
2919 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2920 } else {
2921 // We can get here if the message was missing because of a syntax error.
2922 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2923 }
2924}
2925
2929static pm_call_and_write_node_t *
2930pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2931 assert(target->block == NULL);
2932 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2933 pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2934
2935 *node = (pm_call_and_write_node_t) {
2936 {
2937 .type = PM_CALL_AND_WRITE_NODE,
2938 .flags = target->base.flags,
2939 .node_id = PM_NODE_IDENTIFY(parser),
2940 .location = {
2941 .start = target->base.location.start,
2942 .end = value->location.end
2943 }
2944 },
2945 .receiver = target->receiver,
2946 .call_operator_loc = target->call_operator_loc,
2947 .message_loc = target->message_loc,
2948 .read_name = 0,
2949 .write_name = target->name,
2950 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2951 .value = value
2952 };
2953
2954 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2955
2956 // Here we're going to free the target, since it is no longer necessary.
2957 // However, we don't want to call `pm_node_destroy` because we want to keep
2958 // around all of its children since we just reused them.
2959 xfree(target);
2960
2961 return node;
2962}
2963
2968static void
2969pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2970 if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2971 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2972 pm_node_t *node;
2973 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2974 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2975 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2976 break;
2977 }
2978 }
2979 }
2980
2981 if (block != NULL) {
2982 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2983 }
2984 }
2985}
2986
2990static pm_index_and_write_node_t *
2991pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2992 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2993 pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2994
2995 pm_index_arguments_check(parser, target->arguments, target->block);
2996
2997 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2998 *node = (pm_index_and_write_node_t) {
2999 {
3000 .type = PM_INDEX_AND_WRITE_NODE,
3001 .flags = target->base.flags,
3002 .node_id = PM_NODE_IDENTIFY(parser),
3003 .location = {
3004 .start = target->base.location.start,
3005 .end = value->location.end
3006 }
3007 },
3008 .receiver = target->receiver,
3009 .call_operator_loc = target->call_operator_loc,
3010 .opening_loc = target->opening_loc,
3011 .arguments = target->arguments,
3012 .closing_loc = target->closing_loc,
3013 .block = (pm_block_argument_node_t *) target->block,
3014 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3015 .value = value
3016 };
3017
3018 // Here we're going to free the target, since it is no longer necessary.
3019 // However, we don't want to call `pm_node_destroy` because we want to keep
3020 // around all of its children since we just reused them.
3021 xfree(target);
3022
3023 return node;
3024}
3025
3029static pm_call_operator_write_node_t *
3030pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3031 assert(target->block == NULL);
3032 pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3033
3034 *node = (pm_call_operator_write_node_t) {
3035 {
3036 .type = PM_CALL_OPERATOR_WRITE_NODE,
3037 .flags = target->base.flags,
3038 .node_id = PM_NODE_IDENTIFY(parser),
3039 .location = {
3040 .start = target->base.location.start,
3041 .end = value->location.end
3042 }
3043 },
3044 .receiver = target->receiver,
3045 .call_operator_loc = target->call_operator_loc,
3046 .message_loc = target->message_loc,
3047 .read_name = 0,
3048 .write_name = target->name,
3049 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3050 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3051 .value = value
3052 };
3053
3054 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3055
3056 // Here we're going to free the target, since it is no longer necessary.
3057 // However, we don't want to call `pm_node_destroy` because we want to keep
3058 // around all of its children since we just reused them.
3059 xfree(target);
3060
3061 return node;
3062}
3063
3067static pm_index_operator_write_node_t *
3068pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3069 pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3070
3071 pm_index_arguments_check(parser, target->arguments, target->block);
3072
3073 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3074 *node = (pm_index_operator_write_node_t) {
3075 {
3076 .type = PM_INDEX_OPERATOR_WRITE_NODE,
3077 .flags = target->base.flags,
3078 .node_id = PM_NODE_IDENTIFY(parser),
3079 .location = {
3080 .start = target->base.location.start,
3081 .end = value->location.end
3082 }
3083 },
3084 .receiver = target->receiver,
3085 .call_operator_loc = target->call_operator_loc,
3086 .opening_loc = target->opening_loc,
3087 .arguments = target->arguments,
3088 .closing_loc = target->closing_loc,
3089 .block = (pm_block_argument_node_t *) target->block,
3090 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3091 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3092 .value = value
3093 };
3094
3095 // Here we're going to free the target, since it is no longer necessary.
3096 // However, we don't want to call `pm_node_destroy` because we want to keep
3097 // around all of its children since we just reused them.
3098 xfree(target);
3099
3100 return node;
3101}
3102
3106static pm_call_or_write_node_t *
3107pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3108 assert(target->block == NULL);
3109 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3110 pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3111
3112 *node = (pm_call_or_write_node_t) {
3113 {
3114 .type = PM_CALL_OR_WRITE_NODE,
3115 .flags = target->base.flags,
3116 .node_id = PM_NODE_IDENTIFY(parser),
3117 .location = {
3118 .start = target->base.location.start,
3119 .end = value->location.end
3120 }
3121 },
3122 .receiver = target->receiver,
3123 .call_operator_loc = target->call_operator_loc,
3124 .message_loc = target->message_loc,
3125 .read_name = 0,
3126 .write_name = target->name,
3127 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3128 .value = value
3129 };
3130
3131 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3132
3133 // Here we're going to free the target, since it is no longer necessary.
3134 // However, we don't want to call `pm_node_destroy` because we want to keep
3135 // around all of its children since we just reused them.
3136 xfree(target);
3137
3138 return node;
3139}
3140
3144static pm_index_or_write_node_t *
3145pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3146 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3147 pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3148
3149 pm_index_arguments_check(parser, target->arguments, target->block);
3150
3151 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3152 *node = (pm_index_or_write_node_t) {
3153 {
3154 .type = PM_INDEX_OR_WRITE_NODE,
3155 .flags = target->base.flags,
3156 .node_id = PM_NODE_IDENTIFY(parser),
3157 .location = {
3158 .start = target->base.location.start,
3159 .end = value->location.end
3160 }
3161 },
3162 .receiver = target->receiver,
3163 .call_operator_loc = target->call_operator_loc,
3164 .opening_loc = target->opening_loc,
3165 .arguments = target->arguments,
3166 .closing_loc = target->closing_loc,
3167 .block = (pm_block_argument_node_t *) target->block,
3168 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3169 .value = value
3170 };
3171
3172 // Here we're going to free the target, since it is no longer necessary.
3173 // However, we don't want to call `pm_node_destroy` because we want to keep
3174 // around all of its children since we just reused them.
3175 xfree(target);
3176
3177 return node;
3178}
3179
3184static pm_call_target_node_t *
3185pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3186 pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3187
3188 *node = (pm_call_target_node_t) {
3189 {
3190 .type = PM_CALL_TARGET_NODE,
3191 .flags = target->base.flags,
3192 .node_id = PM_NODE_IDENTIFY(parser),
3193 .location = target->base.location
3194 },
3195 .receiver = target->receiver,
3196 .call_operator_loc = target->call_operator_loc,
3197 .name = target->name,
3198 .message_loc = target->message_loc
3199 };
3200
3201 // Here we're going to free the target, since it is no longer necessary.
3202 // However, we don't want to call `pm_node_destroy` because we want to keep
3203 // around all of its children since we just reused them.
3204 xfree(target);
3205
3206 return node;
3207}
3208
3213static pm_index_target_node_t *
3214pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3215 pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3216 pm_node_flags_t flags = target->base.flags;
3217
3218 pm_index_arguments_check(parser, target->arguments, target->block);
3219
3220 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3221 *node = (pm_index_target_node_t) {
3222 {
3223 .type = PM_INDEX_TARGET_NODE,
3224 .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3225 .node_id = PM_NODE_IDENTIFY(parser),
3226 .location = target->base.location
3227 },
3228 .receiver = target->receiver,
3229 .opening_loc = target->opening_loc,
3230 .arguments = target->arguments,
3231 .closing_loc = target->closing_loc,
3232 .block = (pm_block_argument_node_t *) target->block,
3233 };
3234
3235 // Here we're going to free the target, since it is no longer necessary.
3236 // However, we don't want to call `pm_node_destroy` because we want to keep
3237 // around all of its children since we just reused them.
3238 xfree(target);
3239
3240 return node;
3241}
3242
3246static pm_capture_pattern_node_t *
3247pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3248 pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3249
3250 *node = (pm_capture_pattern_node_t) {
3251 {
3252 .type = PM_CAPTURE_PATTERN_NODE,
3253 .node_id = PM_NODE_IDENTIFY(parser),
3254 .location = {
3255 .start = value->location.start,
3256 .end = target->base.location.end
3257 },
3258 },
3259 .value = value,
3260 .target = target,
3261 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3262 };
3263
3264 return node;
3265}
3266
3270static pm_case_node_t *
3271pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3272 pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3273
3274 *node = (pm_case_node_t) {
3275 {
3276 .type = PM_CASE_NODE,
3277 .node_id = PM_NODE_IDENTIFY(parser),
3278 .location = {
3279 .start = case_keyword->start,
3280 .end = end_keyword->end
3281 },
3282 },
3283 .predicate = predicate,
3284 .else_clause = NULL,
3285 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3286 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3287 .conditions = { 0 }
3288 };
3289
3290 return node;
3291}
3292
3296static void
3297pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3298 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3299
3300 pm_node_list_append(&node->conditions, condition);
3301 node->base.location.end = condition->location.end;
3302}
3303
3307static void
3308pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3309 node->else_clause = else_clause;
3310 node->base.location.end = else_clause->base.location.end;
3311}
3312
3316static void
3317pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3318 node->base.location.end = end_keyword->end;
3319 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3320}
3321
3325static pm_case_match_node_t *
3326pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3327 pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3328
3329 *node = (pm_case_match_node_t) {
3330 {
3331 .type = PM_CASE_MATCH_NODE,
3332 .node_id = PM_NODE_IDENTIFY(parser),
3333 .location = {
3334 .start = case_keyword->start,
3335 .end = end_keyword->end
3336 },
3337 },
3338 .predicate = predicate,
3339 .else_clause = NULL,
3340 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3341 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3342 .conditions = { 0 }
3343 };
3344
3345 return node;
3346}
3347
3351static void
3352pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3353 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3354
3355 pm_node_list_append(&node->conditions, condition);
3356 node->base.location.end = condition->location.end;
3357}
3358
3362static void
3363pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3364 node->else_clause = else_clause;
3365 node->base.location.end = else_clause->base.location.end;
3366}
3367
3371static void
3372pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3373 node->base.location.end = end_keyword->end;
3374 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3375}
3376
3380static pm_class_node_t *
3381pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3382 pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3383
3384 *node = (pm_class_node_t) {
3385 {
3386 .type = PM_CLASS_NODE,
3387 .node_id = PM_NODE_IDENTIFY(parser),
3388 .location = { .start = class_keyword->start, .end = end_keyword->end },
3389 },
3390 .locals = *locals,
3391 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3392 .constant_path = constant_path,
3393 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3394 .superclass = superclass,
3395 .body = body,
3396 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3397 .name = pm_parser_constant_id_token(parser, name)
3398 };
3399
3400 return node;
3401}
3402
3406static pm_class_variable_and_write_node_t *
3407pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3408 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3409 pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3410
3411 *node = (pm_class_variable_and_write_node_t) {
3412 {
3413 .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3414 .node_id = PM_NODE_IDENTIFY(parser),
3415 .location = {
3416 .start = target->base.location.start,
3417 .end = value->location.end
3418 }
3419 },
3420 .name = target->name,
3421 .name_loc = target->base.location,
3422 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3423 .value = value
3424 };
3425
3426 return node;
3427}
3428
3432static pm_class_variable_operator_write_node_t *
3433pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3434 pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3435
3436 *node = (pm_class_variable_operator_write_node_t) {
3437 {
3438 .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3439 .node_id = PM_NODE_IDENTIFY(parser),
3440 .location = {
3441 .start = target->base.location.start,
3442 .end = value->location.end
3443 }
3444 },
3445 .name = target->name,
3446 .name_loc = target->base.location,
3447 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3448 .value = value,
3449 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3450 };
3451
3452 return node;
3453}
3454
3458static pm_class_variable_or_write_node_t *
3459pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3460 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3461 pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3462
3463 *node = (pm_class_variable_or_write_node_t) {
3464 {
3465 .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3466 .node_id = PM_NODE_IDENTIFY(parser),
3467 .location = {
3468 .start = target->base.location.start,
3469 .end = value->location.end
3470 }
3471 },
3472 .name = target->name,
3473 .name_loc = target->base.location,
3474 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3475 .value = value
3476 };
3477
3478 return node;
3479}
3480
3484static pm_class_variable_read_node_t *
3485pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3486 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3487 pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3488
3489 *node = (pm_class_variable_read_node_t) {
3490 {
3491 .type = PM_CLASS_VARIABLE_READ_NODE,
3492 .node_id = PM_NODE_IDENTIFY(parser),
3493 .location = PM_LOCATION_TOKEN_VALUE(token)
3494 },
3495 .name = pm_parser_constant_id_token(parser, token)
3496 };
3497
3498 return node;
3499}
3500
3507static inline pm_node_flags_t
3508pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3509 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3510 return flags;
3511 }
3512 return 0;
3513}
3514
3518static pm_class_variable_write_node_t *
3519pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3520 pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3521
3522 *node = (pm_class_variable_write_node_t) {
3523 {
3524 .type = PM_CLASS_VARIABLE_WRITE_NODE,
3525 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3526 .node_id = PM_NODE_IDENTIFY(parser),
3527 .location = {
3528 .start = read_node->base.location.start,
3529 .end = value->location.end
3530 },
3531 },
3532 .name = read_node->name,
3533 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3534 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3535 .value = value
3536 };
3537
3538 return node;
3539}
3540
3544static pm_constant_path_and_write_node_t *
3545pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3546 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3547 pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3548
3549 *node = (pm_constant_path_and_write_node_t) {
3550 {
3551 .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3552 .node_id = PM_NODE_IDENTIFY(parser),
3553 .location = {
3554 .start = target->base.location.start,
3555 .end = value->location.end
3556 }
3557 },
3558 .target = target,
3559 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3560 .value = value
3561 };
3562
3563 return node;
3564}
3565
3569static pm_constant_path_operator_write_node_t *
3570pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3571 pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3572
3573 *node = (pm_constant_path_operator_write_node_t) {
3574 {
3575 .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3576 .node_id = PM_NODE_IDENTIFY(parser),
3577 .location = {
3578 .start = target->base.location.start,
3579 .end = value->location.end
3580 }
3581 },
3582 .target = target,
3583 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3584 .value = value,
3585 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3586 };
3587
3588 return node;
3589}
3590
3594static pm_constant_path_or_write_node_t *
3595pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3596 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3597 pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3598
3599 *node = (pm_constant_path_or_write_node_t) {
3600 {
3601 .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3602 .node_id = PM_NODE_IDENTIFY(parser),
3603 .location = {
3604 .start = target->base.location.start,
3605 .end = value->location.end
3606 }
3607 },
3608 .target = target,
3609 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3610 .value = value
3611 };
3612
3613 return node;
3614}
3615
3619static pm_constant_path_node_t *
3620pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3621 pm_assert_value_expression(parser, parent);
3622 pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3623
3624 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3625 if (name_token->type == PM_TOKEN_CONSTANT) {
3626 name = pm_parser_constant_id_token(parser, name_token);
3627 }
3628
3629 *node = (pm_constant_path_node_t) {
3630 {
3631 .type = PM_CONSTANT_PATH_NODE,
3632 .node_id = PM_NODE_IDENTIFY(parser),
3633 .location = {
3634 .start = parent == NULL ? delimiter->start : parent->location.start,
3635 .end = name_token->end
3636 },
3637 },
3638 .parent = parent,
3639 .name = name,
3640 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3641 .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3642 };
3643
3644 return node;
3645}
3646
3650static pm_constant_path_write_node_t *
3651pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3652 pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3653
3654 *node = (pm_constant_path_write_node_t) {
3655 {
3656 .type = PM_CONSTANT_PATH_WRITE_NODE,
3657 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3658 .node_id = PM_NODE_IDENTIFY(parser),
3659 .location = {
3660 .start = target->base.location.start,
3661 .end = value->location.end
3662 },
3663 },
3664 .target = target,
3665 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3666 .value = value
3667 };
3668
3669 return node;
3670}
3671
3675static pm_constant_and_write_node_t *
3676pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3677 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3678 pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3679
3680 *node = (pm_constant_and_write_node_t) {
3681 {
3682 .type = PM_CONSTANT_AND_WRITE_NODE,
3683 .node_id = PM_NODE_IDENTIFY(parser),
3684 .location = {
3685 .start = target->base.location.start,
3686 .end = value->location.end
3687 }
3688 },
3689 .name = target->name,
3690 .name_loc = target->base.location,
3691 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3692 .value = value
3693 };
3694
3695 return node;
3696}
3697
3701static pm_constant_operator_write_node_t *
3702pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3703 pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3704
3705 *node = (pm_constant_operator_write_node_t) {
3706 {
3707 .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3708 .node_id = PM_NODE_IDENTIFY(parser),
3709 .location = {
3710 .start = target->base.location.start,
3711 .end = value->location.end
3712 }
3713 },
3714 .name = target->name,
3715 .name_loc = target->base.location,
3716 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3717 .value = value,
3718 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3719 };
3720
3721 return node;
3722}
3723
3727static pm_constant_or_write_node_t *
3728pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3729 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3730 pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3731
3732 *node = (pm_constant_or_write_node_t) {
3733 {
3734 .type = PM_CONSTANT_OR_WRITE_NODE,
3735 .node_id = PM_NODE_IDENTIFY(parser),
3736 .location = {
3737 .start = target->base.location.start,
3738 .end = value->location.end
3739 }
3740 },
3741 .name = target->name,
3742 .name_loc = target->base.location,
3743 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3744 .value = value
3745 };
3746
3747 return node;
3748}
3749
3753static pm_constant_read_node_t *
3754pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3755 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3756 pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3757
3758 *node = (pm_constant_read_node_t) {
3759 {
3760 .type = PM_CONSTANT_READ_NODE,
3761 .node_id = PM_NODE_IDENTIFY(parser),
3762 .location = PM_LOCATION_TOKEN_VALUE(name)
3763 },
3764 .name = pm_parser_constant_id_token(parser, name)
3765 };
3766
3767 return node;
3768}
3769
3773static pm_constant_write_node_t *
3774pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3775 pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3776
3777 *node = (pm_constant_write_node_t) {
3778 {
3779 .type = PM_CONSTANT_WRITE_NODE,
3780 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3781 .node_id = PM_NODE_IDENTIFY(parser),
3782 .location = {
3783 .start = target->base.location.start,
3784 .end = value->location.end
3785 }
3786 },
3787 .name = target->name,
3788 .name_loc = target->base.location,
3789 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3790 .value = value
3791 };
3792
3793 return node;
3794}
3795
3799static void
3800pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3801 switch (PM_NODE_TYPE(node)) {
3802 case PM_BEGIN_NODE: {
3803 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3804 if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3805 break;
3806 }
3807 case PM_PARENTHESES_NODE: {
3808 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3809 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3810 break;
3811 }
3812 case PM_STATEMENTS_NODE: {
3813 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3814 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3815 break;
3816 }
3817 case PM_ARRAY_NODE:
3818 case PM_FLOAT_NODE:
3819 case PM_IMAGINARY_NODE:
3820 case PM_INTEGER_NODE:
3821 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3822 case PM_INTERPOLATED_STRING_NODE:
3823 case PM_INTERPOLATED_SYMBOL_NODE:
3824 case PM_INTERPOLATED_X_STRING_NODE:
3825 case PM_RATIONAL_NODE:
3826 case PM_REGULAR_EXPRESSION_NODE:
3827 case PM_SOURCE_ENCODING_NODE:
3828 case PM_SOURCE_FILE_NODE:
3829 case PM_SOURCE_LINE_NODE:
3830 case PM_STRING_NODE:
3831 case PM_SYMBOL_NODE:
3832 case PM_X_STRING_NODE:
3833 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3834 break;
3835 default:
3836 break;
3837 }
3838}
3839
3843static pm_def_node_t *
3844pm_def_node_create(
3845 pm_parser_t *parser,
3846 pm_constant_id_t name,
3847 const pm_token_t *name_loc,
3848 pm_node_t *receiver,
3849 pm_parameters_node_t *parameters,
3850 pm_node_t *body,
3851 pm_constant_id_list_t *locals,
3852 const pm_token_t *def_keyword,
3853 const pm_token_t *operator,
3854 const pm_token_t *lparen,
3855 const pm_token_t *rparen,
3856 const pm_token_t *equal,
3857 const pm_token_t *end_keyword
3858) {
3859 pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3860 const uint8_t *end;
3861
3862 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3863 end = body->location.end;
3864 } else {
3865 end = end_keyword->end;
3866 }
3867
3868 if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
3869 pm_def_node_receiver_check(parser, receiver);
3870 }
3871
3872 *node = (pm_def_node_t) {
3873 {
3874 .type = PM_DEF_NODE,
3875 .node_id = PM_NODE_IDENTIFY(parser),
3876 .location = { .start = def_keyword->start, .end = end },
3877 },
3878 .name = name,
3879 .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3880 .receiver = receiver,
3881 .parameters = parameters,
3882 .body = body,
3883 .locals = *locals,
3884 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3885 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3886 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3887 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3888 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3889 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3890 };
3891
3892 return node;
3893}
3894
3898static pm_defined_node_t *
3899pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3900 pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3901
3902 *node = (pm_defined_node_t) {
3903 {
3904 .type = PM_DEFINED_NODE,
3905 .node_id = PM_NODE_IDENTIFY(parser),
3906 .location = {
3907 .start = keyword_loc->start,
3908 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3909 },
3910 },
3911 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3912 .value = value,
3913 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3914 .keyword_loc = *keyword_loc
3915 };
3916
3917 return node;
3918}
3919
3923static pm_else_node_t *
3924pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3925 pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3926 const uint8_t *end = NULL;
3927 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3928 end = statements->base.location.end;
3929 } else {
3930 end = end_keyword->end;
3931 }
3932
3933 *node = (pm_else_node_t) {
3934 {
3935 .type = PM_ELSE_NODE,
3936 .node_id = PM_NODE_IDENTIFY(parser),
3937 .location = {
3938 .start = else_keyword->start,
3939 .end = end,
3940 },
3941 },
3942 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3943 .statements = statements,
3944 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3945 };
3946
3947 return node;
3948}
3949
3953static pm_embedded_statements_node_t *
3954pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3955 pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3956
3957 *node = (pm_embedded_statements_node_t) {
3958 {
3959 .type = PM_EMBEDDED_STATEMENTS_NODE,
3960 .node_id = PM_NODE_IDENTIFY(parser),
3961 .location = {
3962 .start = opening->start,
3963 .end = closing->end
3964 }
3965 },
3966 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3967 .statements = statements,
3968 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3969 };
3970
3971 return node;
3972}
3973
3977static pm_embedded_variable_node_t *
3978pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3979 pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3980
3981 *node = (pm_embedded_variable_node_t) {
3982 {
3983 .type = PM_EMBEDDED_VARIABLE_NODE,
3984 .node_id = PM_NODE_IDENTIFY(parser),
3985 .location = {
3986 .start = operator->start,
3987 .end = variable->location.end
3988 }
3989 },
3990 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3991 .variable = variable
3992 };
3993
3994 return node;
3995}
3996
4000static pm_ensure_node_t *
4001pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4002 pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4003
4004 *node = (pm_ensure_node_t) {
4005 {
4006 .type = PM_ENSURE_NODE,
4007 .node_id = PM_NODE_IDENTIFY(parser),
4008 .location = {
4009 .start = ensure_keyword->start,
4010 .end = end_keyword->end
4011 },
4012 },
4013 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4014 .statements = statements,
4015 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4016 };
4017
4018 return node;
4019}
4020
4024static pm_false_node_t *
4025pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4026 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4027 pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4028
4029 *node = (pm_false_node_t) {{
4030 .type = PM_FALSE_NODE,
4031 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4032 .node_id = PM_NODE_IDENTIFY(parser),
4033 .location = PM_LOCATION_TOKEN_VALUE(token)
4034 }};
4035
4036 return node;
4037}
4038
4043static pm_find_pattern_node_t *
4044pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4045 pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4046
4047 pm_node_t *left = nodes->nodes[0];
4048 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4049 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4050
4051 pm_node_t *right;
4052
4053 if (nodes->size == 1) {
4054 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4055 } else {
4056 right = nodes->nodes[nodes->size - 1];
4057 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4058 }
4059
4060#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4061 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4062 // The resulting AST will anyway be ignored, but this file still needs to compile.
4063 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4064#else
4065 pm_node_t *right_splat_node = right;
4066#endif
4067 *node = (pm_find_pattern_node_t) {
4068 {
4069 .type = PM_FIND_PATTERN_NODE,
4070 .node_id = PM_NODE_IDENTIFY(parser),
4071 .location = {
4072 .start = left->location.start,
4073 .end = right->location.end,
4074 },
4075 },
4076 .constant = NULL,
4077 .left = left_splat_node,
4078 .right = right_splat_node,
4079 .requireds = { 0 },
4080 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4081 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4082 };
4083
4084 // For now we're going to just copy over each pointer manually. This could be
4085 // much more efficient, as we could instead resize the node list to only point
4086 // to 1...-1.
4087 for (size_t index = 1; index < nodes->size - 1; index++) {
4088 pm_node_list_append(&node->requireds, nodes->nodes[index]);
4089 }
4090
4091 return node;
4092}
4093
4098static double
4099pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4100 ptrdiff_t diff = token->end - token->start;
4101 if (diff <= 0) return 0.0;
4102
4103 // First, get a buffer of the content.
4104 size_t length = (size_t) diff;
4105 char *buffer = xmalloc(sizeof(char) * (length + 1));
4106 memcpy((void *) buffer, token->start, length);
4107
4108 // Next, determine if we need to replace the decimal point because of
4109 // locale-specific options, and then normalize them if we have to.
4110 char decimal_point = *localeconv()->decimal_point;
4111 if (decimal_point != '.') {
4112 for (size_t index = 0; index < length; index++) {
4113 if (buffer[index] == '.') buffer[index] = decimal_point;
4114 }
4115 }
4116
4117 // Next, handle underscores by removing them from the buffer.
4118 for (size_t index = 0; index < length; index++) {
4119 if (buffer[index] == '_') {
4120 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4121 length--;
4122 }
4123 }
4124
4125 // Null-terminate the buffer so that strtod cannot read off the end.
4126 buffer[length] = '\0';
4127
4128 // Now, call strtod to parse the value. Note that CRuby has their own
4129 // version of strtod which avoids locales. We're okay using the locale-aware
4130 // version because we've already validated through the parser that the token
4131 // is in a valid format.
4132 errno = 0;
4133 char *eptr;
4134 double value = strtod(buffer, &eptr);
4135
4136 // This should never happen, because we've already checked that the token
4137 // is in a valid format. However it's good to be safe.
4138 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4139 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4140 xfree((void *) buffer);
4141 return 0.0;
4142 }
4143
4144 // If errno is set, then it should only be ERANGE. At this point we need to
4145 // check if it's infinity (it should be).
4146 if (errno == ERANGE && PRISM_ISINF(value)) {
4147 int warn_width;
4148 const char *ellipsis;
4149
4150 if (length > 20) {
4151 warn_width = 20;
4152 ellipsis = "...";
4153 } else {
4154 warn_width = (int) length;
4155 ellipsis = "";
4156 }
4157
4158 pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4159 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4160 }
4161
4162 // Finally we can free the buffer and return the value.
4163 xfree((void *) buffer);
4164 return value;
4165}
4166
4170static pm_float_node_t *
4171pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4172 assert(token->type == PM_TOKEN_FLOAT);
4173 pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4174
4175 *node = (pm_float_node_t) {
4176 {
4177 .type = PM_FLOAT_NODE,
4178 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4179 .node_id = PM_NODE_IDENTIFY(parser),
4180 .location = PM_LOCATION_TOKEN_VALUE(token)
4181 },
4182 .value = pm_double_parse(parser, token)
4183 };
4184
4185 return node;
4186}
4187
4191static pm_imaginary_node_t *
4192pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4193 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4194
4195 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4196 *node = (pm_imaginary_node_t) {
4197 {
4198 .type = PM_IMAGINARY_NODE,
4199 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4200 .node_id = PM_NODE_IDENTIFY(parser),
4201 .location = PM_LOCATION_TOKEN_VALUE(token)
4202 },
4203 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4204 .type = PM_TOKEN_FLOAT,
4205 .start = token->start,
4206 .end = token->end - 1
4207 }))
4208 };
4209
4210 return node;
4211}
4212
4216static pm_rational_node_t *
4217pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4218 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4219
4220 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4221 *node = (pm_rational_node_t) {
4222 {
4223 .type = PM_RATIONAL_NODE,
4224 .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4225 .node_id = PM_NODE_IDENTIFY(parser),
4226 .location = PM_LOCATION_TOKEN_VALUE(token)
4227 },
4228 .numerator = { 0 },
4229 .denominator = { 0 }
4230 };
4231
4232 const uint8_t *start = token->start;
4233 const uint8_t *end = token->end - 1; // r
4234
4235 while (start < end && *start == '0') start++; // 0.1 -> .1
4236 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4237
4238 size_t length = (size_t) (end - start);
4239 if (length == 1) {
4240 node->denominator.value = 1;
4241 return node;
4242 }
4243
4244 const uint8_t *point = memchr(start, '.', length);
4245 assert(point && "should have a decimal point");
4246
4247 uint8_t *digits = malloc(length);
4248 if (digits == NULL) {
4249 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4250 abort();
4251 }
4252
4253 memcpy(digits, start, (unsigned long) (point - start));
4254 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4255 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4256
4257 digits[0] = '1';
4258 if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4259 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4260 free(digits);
4261
4262 pm_integers_reduce(&node->numerator, &node->denominator);
4263 return node;
4264}
4265
4270static pm_imaginary_node_t *
4271pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4272 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4273
4274 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4275 *node = (pm_imaginary_node_t) {
4276 {
4277 .type = PM_IMAGINARY_NODE,
4278 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4279 .node_id = PM_NODE_IDENTIFY(parser),
4280 .location = PM_LOCATION_TOKEN_VALUE(token)
4281 },
4282 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4283 .type = PM_TOKEN_FLOAT_RATIONAL,
4284 .start = token->start,
4285 .end = token->end - 1
4286 }))
4287 };
4288
4289 return node;
4290}
4291
4295static pm_for_node_t *
4296pm_for_node_create(
4297 pm_parser_t *parser,
4298 pm_node_t *index,
4299 pm_node_t *collection,
4300 pm_statements_node_t *statements,
4301 const pm_token_t *for_keyword,
4302 const pm_token_t *in_keyword,
4303 const pm_token_t *do_keyword,
4304 const pm_token_t *end_keyword
4305) {
4306 pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4307
4308 *node = (pm_for_node_t) {
4309 {
4310 .type = PM_FOR_NODE,
4311 .node_id = PM_NODE_IDENTIFY(parser),
4312 .location = {
4313 .start = for_keyword->start,
4314 .end = end_keyword->end
4315 },
4316 },
4317 .index = index,
4318 .collection = collection,
4319 .statements = statements,
4320 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4321 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4322 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4323 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4324 };
4325
4326 return node;
4327}
4328
4332static pm_forwarding_arguments_node_t *
4333pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4334 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4335 pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4336
4337 *node = (pm_forwarding_arguments_node_t) {{
4338 .type = PM_FORWARDING_ARGUMENTS_NODE,
4339 .node_id = PM_NODE_IDENTIFY(parser),
4340 .location = PM_LOCATION_TOKEN_VALUE(token)
4341 }};
4342
4343 return node;
4344}
4345
4349static pm_forwarding_parameter_node_t *
4350pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4351 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4352 pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4353
4354 *node = (pm_forwarding_parameter_node_t) {{
4355 .type = PM_FORWARDING_PARAMETER_NODE,
4356 .node_id = PM_NODE_IDENTIFY(parser),
4357 .location = PM_LOCATION_TOKEN_VALUE(token)
4358 }};
4359
4360 return node;
4361}
4362
4366static pm_forwarding_super_node_t *
4367pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4368 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4369 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4370 pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4371
4372 pm_block_node_t *block = NULL;
4373 if (arguments->block != NULL) {
4374 block = (pm_block_node_t *) arguments->block;
4375 }
4376
4377 *node = (pm_forwarding_super_node_t) {
4378 {
4379 .type = PM_FORWARDING_SUPER_NODE,
4380 .node_id = PM_NODE_IDENTIFY(parser),
4381 .location = {
4382 .start = token->start,
4383 .end = block != NULL ? block->base.location.end : token->end
4384 },
4385 },
4386 .block = block
4387 };
4388
4389 return node;
4390}
4391
4396static pm_hash_pattern_node_t *
4397pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4398 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4399
4400 *node = (pm_hash_pattern_node_t) {
4401 {
4402 .type = PM_HASH_PATTERN_NODE,
4403 .node_id = PM_NODE_IDENTIFY(parser),
4404 .location = {
4405 .start = opening->start,
4406 .end = closing->end
4407 },
4408 },
4409 .constant = NULL,
4410 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4411 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4412 .elements = { 0 },
4413 .rest = NULL
4414 };
4415
4416 return node;
4417}
4418
4422static pm_hash_pattern_node_t *
4423pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4424 pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4425
4426 const uint8_t *start;
4427 const uint8_t *end;
4428
4429 if (elements->size > 0) {
4430 if (rest) {
4431 start = elements->nodes[0]->location.start;
4432 end = rest->location.end;
4433 } else {
4434 start = elements->nodes[0]->location.start;
4435 end = elements->nodes[elements->size - 1]->location.end;
4436 }
4437 } else {
4438 assert(rest != NULL);
4439 start = rest->location.start;
4440 end = rest->location.end;
4441 }
4442
4443 *node = (pm_hash_pattern_node_t) {
4444 {
4445 .type = PM_HASH_PATTERN_NODE,
4446 .node_id = PM_NODE_IDENTIFY(parser),
4447 .location = {
4448 .start = start,
4449 .end = end
4450 },
4451 },
4452 .constant = NULL,
4453 .elements = { 0 },
4454 .rest = rest,
4455 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4456 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4457 };
4458
4459 pm_node_t *element;
4460 PM_NODE_LIST_FOREACH(elements, index, element) {
4461 pm_node_list_append(&node->elements, element);
4462 }
4463
4464 return node;
4465}
4466
4470static pm_constant_id_t
4471pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4472 switch (PM_NODE_TYPE(target)) {
4473 case PM_GLOBAL_VARIABLE_READ_NODE:
4474 return ((pm_global_variable_read_node_t *) target)->name;
4475 case PM_BACK_REFERENCE_READ_NODE:
4476 return ((pm_back_reference_read_node_t *) target)->name;
4477 case PM_NUMBERED_REFERENCE_READ_NODE:
4478 // This will only ever happen in the event of a syntax error, but we
4479 // still need to provide something for the node.
4480 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4481 default:
4482 assert(false && "unreachable");
4483 return (pm_constant_id_t) -1;
4484 }
4485}
4486
4490static pm_global_variable_and_write_node_t *
4491pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4492 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4493 pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4494
4495 *node = (pm_global_variable_and_write_node_t) {
4496 {
4497 .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4498 .node_id = PM_NODE_IDENTIFY(parser),
4499 .location = {
4500 .start = target->location.start,
4501 .end = value->location.end
4502 }
4503 },
4504 .name = pm_global_variable_write_name(parser, target),
4505 .name_loc = target->location,
4506 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4507 .value = value
4508 };
4509
4510 return node;
4511}
4512
4516static pm_global_variable_operator_write_node_t *
4517pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4518 pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4519
4520 *node = (pm_global_variable_operator_write_node_t) {
4521 {
4522 .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4523 .node_id = PM_NODE_IDENTIFY(parser),
4524 .location = {
4525 .start = target->location.start,
4526 .end = value->location.end
4527 }
4528 },
4529 .name = pm_global_variable_write_name(parser, target),
4530 .name_loc = target->location,
4531 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4532 .value = value,
4533 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4534 };
4535
4536 return node;
4537}
4538
4542static pm_global_variable_or_write_node_t *
4543pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4544 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4545 pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4546
4547 *node = (pm_global_variable_or_write_node_t) {
4548 {
4549 .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4550 .node_id = PM_NODE_IDENTIFY(parser),
4551 .location = {
4552 .start = target->location.start,
4553 .end = value->location.end
4554 }
4555 },
4556 .name = pm_global_variable_write_name(parser, target),
4557 .name_loc = target->location,
4558 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4559 .value = value
4560 };
4561
4562 return node;
4563}
4564
4568static pm_global_variable_read_node_t *
4569pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4570 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4571
4572 *node = (pm_global_variable_read_node_t) {
4573 {
4574 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4575 .node_id = PM_NODE_IDENTIFY(parser),
4576 .location = PM_LOCATION_TOKEN_VALUE(name),
4577 },
4578 .name = pm_parser_constant_id_token(parser, name)
4579 };
4580
4581 return node;
4582}
4583
4587static pm_global_variable_read_node_t *
4588pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4589 pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4590
4591 *node = (pm_global_variable_read_node_t) {
4592 {
4593 .type = PM_GLOBAL_VARIABLE_READ_NODE,
4594 .node_id = PM_NODE_IDENTIFY(parser),
4595 .location = PM_LOCATION_NULL_VALUE(parser)
4596 },
4597 .name = name
4598 };
4599
4600 return node;
4601}
4602
4606static pm_global_variable_write_node_t *
4607pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4608 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4609
4610 *node = (pm_global_variable_write_node_t) {
4611 {
4612 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4613 .node_id = PM_NODE_IDENTIFY(parser),
4614 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4615 .location = {
4616 .start = target->location.start,
4617 .end = value->location.end
4618 },
4619 },
4620 .name = pm_global_variable_write_name(parser, target),
4621 .name_loc = PM_LOCATION_NODE_VALUE(target),
4622 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4623 .value = value
4624 };
4625
4626 return node;
4627}
4628
4632static pm_global_variable_write_node_t *
4633pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4634 pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4635
4636 *node = (pm_global_variable_write_node_t) {
4637 {
4638 .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4639 .node_id = PM_NODE_IDENTIFY(parser),
4640 .location = PM_LOCATION_NULL_VALUE(parser)
4641 },
4642 .name = name,
4643 .name_loc = PM_LOCATION_NULL_VALUE(parser),
4644 .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4645 .value = value
4646 };
4647
4648 return node;
4649}
4650
4654static pm_hash_node_t *
4655pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4656 assert(opening != NULL);
4657 pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4658
4659 *node = (pm_hash_node_t) {
4660 {
4661 .type = PM_HASH_NODE,
4662 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4663 .node_id = PM_NODE_IDENTIFY(parser),
4664 .location = PM_LOCATION_TOKEN_VALUE(opening)
4665 },
4666 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4667 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4668 .elements = { 0 }
4669 };
4670
4671 return node;
4672}
4673
4677static inline void
4678pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4679 pm_node_list_append(&hash->elements, element);
4680
4681 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4682 if (static_literal) {
4683 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4684 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4685 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4686 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4687 }
4688
4689 if (!static_literal) {
4690 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4691 }
4692}
4693
4694static inline void
4695pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4696 hash->base.location.end = token->end;
4697 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4698}
4699
4703static pm_if_node_t *
4704pm_if_node_create(pm_parser_t *parser,
4705 const pm_token_t *if_keyword,
4706 pm_node_t *predicate,
4707 const pm_token_t *then_keyword,
4708 pm_statements_node_t *statements,
4709 pm_node_t *subsequent,
4710 const pm_token_t *end_keyword
4711) {
4712 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4713 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4714
4715 const uint8_t *end;
4716 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4717 end = end_keyword->end;
4718 } else if (subsequent != NULL) {
4719 end = subsequent->location.end;
4720 } else if (pm_statements_node_body_length(statements) != 0) {
4721 end = statements->base.location.end;
4722 } else {
4723 end = predicate->location.end;
4724 }
4725
4726 *node = (pm_if_node_t) {
4727 {
4728 .type = PM_IF_NODE,
4729 .flags = PM_NODE_FLAG_NEWLINE,
4730 .node_id = PM_NODE_IDENTIFY(parser),
4731 .location = {
4732 .start = if_keyword->start,
4733 .end = end
4734 },
4735 },
4736 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4737 .predicate = predicate,
4738 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4739 .statements = statements,
4740 .subsequent = subsequent,
4741 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4742 };
4743
4744 return node;
4745}
4746
4750static pm_if_node_t *
4751pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4752 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4753 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4754
4755 pm_statements_node_t *statements = pm_statements_node_create(parser);
4756 pm_statements_node_body_append(parser, statements, statement, true);
4757
4758 *node = (pm_if_node_t) {
4759 {
4760 .type = PM_IF_NODE,
4761 .flags = PM_NODE_FLAG_NEWLINE,
4762 .node_id = PM_NODE_IDENTIFY(parser),
4763 .location = {
4764 .start = statement->location.start,
4765 .end = predicate->location.end
4766 },
4767 },
4768 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4769 .predicate = predicate,
4770 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4771 .statements = statements,
4772 .subsequent = NULL,
4773 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4774 };
4775
4776 return node;
4777}
4778
4782static pm_if_node_t *
4783pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4784 pm_assert_value_expression(parser, predicate);
4785 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4786
4787 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4788 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4789
4790 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4791 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4792
4793 pm_token_t end_keyword = not_provided(parser);
4794 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4795
4796 pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4797
4798 *node = (pm_if_node_t) {
4799 {
4800 .type = PM_IF_NODE,
4801 .flags = PM_NODE_FLAG_NEWLINE,
4802 .node_id = PM_NODE_IDENTIFY(parser),
4803 .location = {
4804 .start = predicate->location.start,
4805 .end = false_expression->location.end,
4806 },
4807 },
4808 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4809 .predicate = predicate,
4810 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4811 .statements = if_statements,
4812 .subsequent = (pm_node_t *) else_node,
4813 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4814 };
4815
4816 return node;
4817
4818}
4819
4820static inline void
4821pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4822 node->base.location.end = keyword->end;
4823 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4824}
4825
4826static inline void
4827pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4828 node->base.location.end = keyword->end;
4829 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4830}
4831
4835static pm_implicit_node_t *
4836pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4837 pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4838
4839 *node = (pm_implicit_node_t) {
4840 {
4841 .type = PM_IMPLICIT_NODE,
4842 .node_id = PM_NODE_IDENTIFY(parser),
4843 .location = value->location
4844 },
4845 .value = value
4846 };
4847
4848 return node;
4849}
4850
4854static pm_implicit_rest_node_t *
4855pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4856 assert(token->type == PM_TOKEN_COMMA);
4857
4858 pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4859
4860 *node = (pm_implicit_rest_node_t) {
4861 {
4862 .type = PM_IMPLICIT_REST_NODE,
4863 .node_id = PM_NODE_IDENTIFY(parser),
4864 .location = PM_LOCATION_TOKEN_VALUE(token)
4865 }
4866 };
4867
4868 return node;
4869}
4870
4874static pm_integer_node_t *
4875pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4876 assert(token->type == PM_TOKEN_INTEGER);
4877 pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4878
4879 *node = (pm_integer_node_t) {
4880 {
4881 .type = PM_INTEGER_NODE,
4882 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4883 .node_id = PM_NODE_IDENTIFY(parser),
4884 .location = PM_LOCATION_TOKEN_VALUE(token)
4885 },
4886 .value = { 0 }
4887 };
4888
4889 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4890 switch (base) {
4891 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4892 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4893 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4894 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4895 default: assert(false && "unreachable"); break;
4896 }
4897
4898 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4899 return node;
4900}
4901
4906static pm_imaginary_node_t *
4907pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4908 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4909
4910 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4911 *node = (pm_imaginary_node_t) {
4912 {
4913 .type = PM_IMAGINARY_NODE,
4914 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4915 .node_id = PM_NODE_IDENTIFY(parser),
4916 .location = PM_LOCATION_TOKEN_VALUE(token)
4917 },
4918 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4919 .type = PM_TOKEN_INTEGER,
4920 .start = token->start,
4921 .end = token->end - 1
4922 }))
4923 };
4924
4925 return node;
4926}
4927
4932static pm_rational_node_t *
4933pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4934 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4935
4936 pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4937 *node = (pm_rational_node_t) {
4938 {
4939 .type = PM_RATIONAL_NODE,
4940 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4941 .node_id = PM_NODE_IDENTIFY(parser),
4942 .location = PM_LOCATION_TOKEN_VALUE(token)
4943 },
4944 .numerator = { 0 },
4945 .denominator = { .value = 1, 0 }
4946 };
4947
4948 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4949 switch (base) {
4950 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4951 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4952 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4953 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4954 default: assert(false && "unreachable"); break;
4955 }
4956
4957 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4958
4959 return node;
4960}
4961
4966static pm_imaginary_node_t *
4967pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4968 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4969
4970 pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4971 *node = (pm_imaginary_node_t) {
4972 {
4973 .type = PM_IMAGINARY_NODE,
4974 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4975 .node_id = PM_NODE_IDENTIFY(parser),
4976 .location = PM_LOCATION_TOKEN_VALUE(token)
4977 },
4978 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4979 .type = PM_TOKEN_INTEGER_RATIONAL,
4980 .start = token->start,
4981 .end = token->end - 1
4982 }))
4983 };
4984
4985 return node;
4986}
4987
4991static pm_in_node_t *
4992pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4993 pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4994
4995 const uint8_t *end;
4996 if (statements != NULL) {
4997 end = statements->base.location.end;
4998 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4999 end = then_keyword->end;
5000 } else {
5001 end = pattern->location.end;
5002 }
5003
5004 *node = (pm_in_node_t) {
5005 {
5006 .type = PM_IN_NODE,
5007 .node_id = PM_NODE_IDENTIFY(parser),
5008 .location = {
5009 .start = in_keyword->start,
5010 .end = end
5011 },
5012 },
5013 .pattern = pattern,
5014 .statements = statements,
5015 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5016 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5017 };
5018
5019 return node;
5020}
5021
5025static pm_instance_variable_and_write_node_t *
5026pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5027 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5028 pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5029
5030 *node = (pm_instance_variable_and_write_node_t) {
5031 {
5032 .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5033 .node_id = PM_NODE_IDENTIFY(parser),
5034 .location = {
5035 .start = target->base.location.start,
5036 .end = value->location.end
5037 }
5038 },
5039 .name = target->name,
5040 .name_loc = target->base.location,
5041 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5042 .value = value
5043 };
5044
5045 return node;
5046}
5047
5051static pm_instance_variable_operator_write_node_t *
5052pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5053 pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5054
5055 *node = (pm_instance_variable_operator_write_node_t) {
5056 {
5057 .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5058 .node_id = PM_NODE_IDENTIFY(parser),
5059 .location = {
5060 .start = target->base.location.start,
5061 .end = value->location.end
5062 }
5063 },
5064 .name = target->name,
5065 .name_loc = target->base.location,
5066 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5067 .value = value,
5068 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5069 };
5070
5071 return node;
5072}
5073
5077static pm_instance_variable_or_write_node_t *
5078pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5079 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5080 pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5081
5082 *node = (pm_instance_variable_or_write_node_t) {
5083 {
5084 .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5085 .node_id = PM_NODE_IDENTIFY(parser),
5086 .location = {
5087 .start = target->base.location.start,
5088 .end = value->location.end
5089 }
5090 },
5091 .name = target->name,
5092 .name_loc = target->base.location,
5093 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5094 .value = value
5095 };
5096
5097 return node;
5098}
5099
5103static pm_instance_variable_read_node_t *
5104pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5105 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5106 pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5107
5108 *node = (pm_instance_variable_read_node_t) {
5109 {
5110 .type = PM_INSTANCE_VARIABLE_READ_NODE,
5111 .node_id = PM_NODE_IDENTIFY(parser),
5112 .location = PM_LOCATION_TOKEN_VALUE(token)
5113 },
5114 .name = pm_parser_constant_id_token(parser, token)
5115 };
5116
5117 return node;
5118}
5119
5124static pm_instance_variable_write_node_t *
5125pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5126 pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5127 *node = (pm_instance_variable_write_node_t) {
5128 {
5129 .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5130 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5131 .node_id = PM_NODE_IDENTIFY(parser),
5132 .location = {
5133 .start = read_node->base.location.start,
5134 .end = value->location.end
5135 }
5136 },
5137 .name = read_node->name,
5138 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5139 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5140 .value = value
5141 };
5142
5143 return node;
5144}
5145
5151static void
5152pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5153 switch (PM_NODE_TYPE(part)) {
5154 case PM_STRING_NODE:
5155 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5156 break;
5157 case PM_EMBEDDED_STATEMENTS_NODE: {
5158 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5159 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5160
5161 if (embedded == NULL) {
5162 // If there are no statements or more than one statement, then
5163 // we lose the static literal flag.
5164 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5165 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5166 // If the embedded statement is a string, then we can keep the
5167 // static literal flag and mark the string as frozen.
5168 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5169 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5170 // If the embedded statement is an interpolated string and it's
5171 // a static literal, then we can keep the static literal flag.
5172 } else {
5173 // Otherwise we lose the static literal flag.
5174 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5175 }
5176
5177 break;
5178 }
5179 case PM_EMBEDDED_VARIABLE_NODE:
5180 pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5181 break;
5182 default:
5183 assert(false && "unexpected node type");
5184 break;
5185 }
5186
5187 pm_node_list_append(parts, part);
5188}
5189
5193static pm_interpolated_regular_expression_node_t *
5194pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5195 pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5196
5197 *node = (pm_interpolated_regular_expression_node_t) {
5198 {
5199 .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5200 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5201 .node_id = PM_NODE_IDENTIFY(parser),
5202 .location = {
5203 .start = opening->start,
5204 .end = NULL,
5205 },
5206 },
5207 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5208 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5209 .parts = { 0 }
5210 };
5211
5212 return node;
5213}
5214
5215static inline void
5216pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5217 if (node->base.location.start > part->location.start) {
5218 node->base.location.start = part->location.start;
5219 }
5220 if (node->base.location.end < part->location.end) {
5221 node->base.location.end = part->location.end;
5222 }
5223
5224 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5225}
5226
5227static inline void
5228pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5229 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5230 node->base.location.end = closing->end;
5231 pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5232}
5233
5257static inline void
5258pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5259#define CLEAR_FLAGS(node) \
5260 node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5261
5262#define MUTABLE_FLAGS(node) \
5263 node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5264
5265 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5266 node->base.location.start = part->location.start;
5267 }
5268
5269 node->base.location.end = MAX(node->base.location.end, part->location.end);
5270
5271 switch (PM_NODE_TYPE(part)) {
5272 case PM_STRING_NODE:
5273 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5274 break;
5275 case PM_INTERPOLATED_STRING_NODE:
5276 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5277 // If the string that we're concatenating is a static literal,
5278 // then we can keep the static literal flag for this string.
5279 } else {
5280 // Otherwise, we lose the static literal flag here and we should
5281 // also clear the mutability flags.
5282 CLEAR_FLAGS(node);
5283 }
5284 break;
5285 case PM_EMBEDDED_STATEMENTS_NODE: {
5286 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5287 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5288
5289 if (embedded == NULL) {
5290 // If we're embedding multiple statements or no statements, then
5291 // the string is not longer a static literal.
5292 CLEAR_FLAGS(node);
5293 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5294 // If the embedded statement is a string, then we can make that
5295 // string as frozen and static literal, and not touch the static
5296 // literal status of this string.
5297 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5298
5299 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5300 MUTABLE_FLAGS(node);
5301 }
5302 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5303 // If the embedded statement is an interpolated string, but that
5304 // string is marked as static literal, then we can keep our
5305 // static literal status for this string.
5306 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5307 MUTABLE_FLAGS(node);
5308 }
5309 } else {
5310 // In all other cases, we lose the static literal flag here and
5311 // become mutable.
5312 CLEAR_FLAGS(node);
5313 }
5314
5315 break;
5316 }
5317 case PM_EMBEDDED_VARIABLE_NODE:
5318 // Embedded variables clear static literal, which means we also
5319 // should clear the mutability flags.
5320 CLEAR_FLAGS(node);
5321 break;
5322 default:
5323 assert(false && "unexpected node type");
5324 break;
5325 }
5326
5327 pm_node_list_append(&node->parts, part);
5328
5329#undef CLEAR_FLAGS
5330#undef MUTABLE_FLAGS
5331}
5332
5336static pm_interpolated_string_node_t *
5337pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5338 pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5339 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5340
5341 switch (parser->frozen_string_literal) {
5342 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5343 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5344 break;
5345 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5346 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5347 break;
5348 }
5349
5350 *node = (pm_interpolated_string_node_t) {
5351 {
5352 .type = PM_INTERPOLATED_STRING_NODE,
5353 .flags = flags,
5354 .node_id = PM_NODE_IDENTIFY(parser),
5355 .location = {
5356 .start = opening->start,
5357 .end = closing->end,
5358 },
5359 },
5360 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5361 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5362 .parts = { 0 }
5363 };
5364
5365 if (parts != NULL) {
5366 pm_node_t *part;
5367 PM_NODE_LIST_FOREACH(parts, index, part) {
5368 pm_interpolated_string_node_append(node, part);
5369 }
5370 }
5371
5372 return node;
5373}
5374
5378static void
5379pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5380 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5381 node->base.location.end = closing->end;
5382}
5383
5384static void
5385pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5386 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5387 node->base.location.start = part->location.start;
5388 }
5389
5390 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5391 node->base.location.end = MAX(node->base.location.end, part->location.end);
5392}
5393
5394static void
5395pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5396 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5397 node->base.location.end = closing->end;
5398}
5399
5403static pm_interpolated_symbol_node_t *
5404pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5405 pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5406
5407 *node = (pm_interpolated_symbol_node_t) {
5408 {
5409 .type = PM_INTERPOLATED_SYMBOL_NODE,
5410 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5411 .node_id = PM_NODE_IDENTIFY(parser),
5412 .location = {
5413 .start = opening->start,
5414 .end = closing->end,
5415 },
5416 },
5417 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5418 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5419 .parts = { 0 }
5420 };
5421
5422 if (parts != NULL) {
5423 pm_node_t *part;
5424 PM_NODE_LIST_FOREACH(parts, index, part) {
5425 pm_interpolated_symbol_node_append(node, part);
5426 }
5427 }
5428
5429 return node;
5430}
5431
5435static pm_interpolated_x_string_node_t *
5436pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5437 pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5438
5439 *node = (pm_interpolated_x_string_node_t) {
5440 {
5441 .type = PM_INTERPOLATED_X_STRING_NODE,
5442 .node_id = PM_NODE_IDENTIFY(parser),
5443 .location = {
5444 .start = opening->start,
5445 .end = closing->end
5446 },
5447 },
5448 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5449 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5450 .parts = { 0 }
5451 };
5452
5453 return node;
5454}
5455
5456static inline void
5457pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5458 pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5459 node->base.location.end = part->location.end;
5460}
5461
5462static inline void
5463pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5464 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5465 node->base.location.end = closing->end;
5466}
5467
5471static pm_it_local_variable_read_node_t *
5472pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5473 pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5474
5475 *node = (pm_it_local_variable_read_node_t) {
5476 {
5477 .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5478 .node_id = PM_NODE_IDENTIFY(parser),
5479 .location = PM_LOCATION_TOKEN_VALUE(name)
5480 }
5481 };
5482
5483 return node;
5484}
5485
5489static pm_it_parameters_node_t *
5490pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5491 pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5492
5493 *node = (pm_it_parameters_node_t) {
5494 {
5495 .type = PM_IT_PARAMETERS_NODE,
5496 .node_id = PM_NODE_IDENTIFY(parser),
5497 .location = {
5498 .start = opening->start,
5499 .end = closing->end
5500 }
5501 }
5502 };
5503
5504 return node;
5505}
5506
5510static pm_keyword_hash_node_t *
5511pm_keyword_hash_node_create(pm_parser_t *parser) {
5512 pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5513
5514 *node = (pm_keyword_hash_node_t) {
5515 .base = {
5516 .type = PM_KEYWORD_HASH_NODE,
5517 .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5518 .node_id = PM_NODE_IDENTIFY(parser),
5519 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5520 },
5521 .elements = { 0 }
5522 };
5523
5524 return node;
5525}
5526
5530static void
5531pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5532 // If the element being added is not an AssocNode or does not have a symbol
5533 // key, then we want to turn the SYMBOL_KEYS flag off.
5534 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5535 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5536 }
5537
5538 pm_node_list_append(&hash->elements, element);
5539 if (hash->base.location.start == NULL) {
5540 hash->base.location.start = element->location.start;
5541 }
5542 hash->base.location.end = element->location.end;
5543}
5544
5548static pm_required_keyword_parameter_node_t *
5549pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5550 pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5551
5552 *node = (pm_required_keyword_parameter_node_t) {
5553 {
5554 .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5555 .node_id = PM_NODE_IDENTIFY(parser),
5556 .location = {
5557 .start = name->start,
5558 .end = name->end
5559 },
5560 },
5561 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5562 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5563 };
5564
5565 return node;
5566}
5567
5571static pm_optional_keyword_parameter_node_t *
5572pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5573 pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5574
5575 *node = (pm_optional_keyword_parameter_node_t) {
5576 {
5577 .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5578 .node_id = PM_NODE_IDENTIFY(parser),
5579 .location = {
5580 .start = name->start,
5581 .end = value->location.end
5582 },
5583 },
5584 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5585 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5586 .value = value
5587 };
5588
5589 return node;
5590}
5591
5595static pm_keyword_rest_parameter_node_t *
5596pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5597 pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5598
5599 *node = (pm_keyword_rest_parameter_node_t) {
5600 {
5601 .type = PM_KEYWORD_REST_PARAMETER_NODE,
5602 .node_id = PM_NODE_IDENTIFY(parser),
5603 .location = {
5604 .start = operator->start,
5605 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5606 },
5607 },
5608 .name = pm_parser_optional_constant_id_token(parser, name),
5609 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5610 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5611 };
5612
5613 return node;
5614}
5615
5619static pm_lambda_node_t *
5620pm_lambda_node_create(
5621 pm_parser_t *parser,
5622 pm_constant_id_list_t *locals,
5623 const pm_token_t *operator,
5624 const pm_token_t *opening,
5625 const pm_token_t *closing,
5626 pm_node_t *parameters,
5627 pm_node_t *body
5628) {
5629 pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5630
5631 *node = (pm_lambda_node_t) {
5632 {
5633 .type = PM_LAMBDA_NODE,
5634 .node_id = PM_NODE_IDENTIFY(parser),
5635 .location = {
5636 .start = operator->start,
5637 .end = closing->end
5638 },
5639 },
5640 .locals = *locals,
5641 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5642 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5643 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5644 .parameters = parameters,
5645 .body = body
5646 };
5647
5648 return node;
5649}
5650
5654static pm_local_variable_and_write_node_t *
5655pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5656 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE)); assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5657 pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5658
5659 *node = (pm_local_variable_and_write_node_t) {
5660 {
5661 .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5662 .node_id = PM_NODE_IDENTIFY(parser),
5663 .location = {
5664 .start = target->location.start,
5665 .end = value->location.end
5666 }
5667 },
5668 .name_loc = target->location,
5669 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5670 .value = value,
5671 .name = name,
5672 .depth = depth
5673 };
5674
5675 return node;
5676}
5677
5681static pm_local_variable_operator_write_node_t *
5682pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5683 pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5684
5685 *node = (pm_local_variable_operator_write_node_t) {
5686 {
5687 .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5688 .node_id = PM_NODE_IDENTIFY(parser),
5689 .location = {
5690 .start = target->location.start,
5691 .end = value->location.end
5692 }
5693 },
5694 .name_loc = target->location,
5695 .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5696 .value = value,
5697 .name = name,
5698 .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5699 .depth = depth
5700 };
5701
5702 return node;
5703}
5704
5708static pm_local_variable_or_write_node_t *
5709pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5710 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5711 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5712 pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5713
5714 *node = (pm_local_variable_or_write_node_t) {
5715 {
5716 .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5717 .node_id = PM_NODE_IDENTIFY(parser),
5718 .location = {
5719 .start = target->location.start,
5720 .end = value->location.end
5721 }
5722 },
5723 .name_loc = target->location,
5724 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5725 .value = value,
5726 .name = name,
5727 .depth = depth
5728 };
5729
5730 return node;
5731}
5732
5736static pm_local_variable_read_node_t *
5737pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5738 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5739
5740 pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5741
5742 *node = (pm_local_variable_read_node_t) {
5743 {
5744 .type = PM_LOCAL_VARIABLE_READ_NODE,
5745 .node_id = PM_NODE_IDENTIFY(parser),
5746 .location = PM_LOCATION_TOKEN_VALUE(name)
5747 },
5748 .name = name_id,
5749 .depth = depth
5750 };
5751
5752 return node;
5753}
5754
5758static pm_local_variable_read_node_t *
5759pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5760 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5761 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5762}
5763
5768static pm_local_variable_read_node_t *
5769pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5770 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5771 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5772}
5773
5777static pm_local_variable_write_node_t *
5778pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5779 pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5780
5781 *node = (pm_local_variable_write_node_t) {
5782 {
5783 .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5784 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5785 .node_id = PM_NODE_IDENTIFY(parser),
5786 .location = {
5787 .start = name_loc->start,
5788 .end = value->location.end
5789 }
5790 },
5791 .name = name,
5792 .depth = depth,
5793 .value = value,
5794 .name_loc = *name_loc,
5795 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5796 };
5797
5798 return node;
5799}
5800
5804static inline bool
5805pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5806 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5807}
5808
5813static inline bool
5814pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5815 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5816}
5817
5822static inline void
5823pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5824 if (pm_token_is_numbered_parameter(start, end)) {
5825 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5826 }
5827}
5828
5833static pm_local_variable_target_node_t *
5834pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5835 pm_refute_numbered_parameter(parser, location->start, location->end);
5836 pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5837
5838 *node = (pm_local_variable_target_node_t) {
5839 {
5840 .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5841 .node_id = PM_NODE_IDENTIFY(parser),
5842 .location = *location
5843 },
5844 .name = name,
5845 .depth = depth
5846 };
5847
5848 return node;
5849}
5850
5854static pm_match_predicate_node_t *
5855pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5856 pm_assert_value_expression(parser, value);
5857
5858 pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5859
5860 *node = (pm_match_predicate_node_t) {
5861 {
5862 .type = PM_MATCH_PREDICATE_NODE,
5863 .node_id = PM_NODE_IDENTIFY(parser),
5864 .location = {
5865 .start = value->location.start,
5866 .end = pattern->location.end
5867 }
5868 },
5869 .value = value,
5870 .pattern = pattern,
5871 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5872 };
5873
5874 return node;
5875}
5876
5880static pm_match_required_node_t *
5881pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5882 pm_assert_value_expression(parser, value);
5883
5884 pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5885
5886 *node = (pm_match_required_node_t) {
5887 {
5888 .type = PM_MATCH_REQUIRED_NODE,
5889 .node_id = PM_NODE_IDENTIFY(parser),
5890 .location = {
5891 .start = value->location.start,
5892 .end = pattern->location.end
5893 }
5894 },
5895 .value = value,
5896 .pattern = pattern,
5897 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5898 };
5899
5900 return node;
5901}
5902
5906static pm_match_write_node_t *
5907pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5908 pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5909
5910 *node = (pm_match_write_node_t) {
5911 {
5912 .type = PM_MATCH_WRITE_NODE,
5913 .node_id = PM_NODE_IDENTIFY(parser),
5914 .location = call->base.location
5915 },
5916 .call = call,
5917 .targets = { 0 }
5918 };
5919
5920 return node;
5921}
5922
5926static pm_module_node_t *
5927pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5928 pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5929
5930 *node = (pm_module_node_t) {
5931 {
5932 .type = PM_MODULE_NODE,
5933 .node_id = PM_NODE_IDENTIFY(parser),
5934 .location = {
5935 .start = module_keyword->start,
5936 .end = end_keyword->end
5937 }
5938 },
5939 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5940 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5941 .constant_path = constant_path,
5942 .body = body,
5943 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5944 .name = pm_parser_constant_id_token(parser, name)
5945 };
5946
5947 return node;
5948}
5949
5953static pm_multi_target_node_t *
5954pm_multi_target_node_create(pm_parser_t *parser) {
5955 pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5956
5957 *node = (pm_multi_target_node_t) {
5958 {
5959 .type = PM_MULTI_TARGET_NODE,
5960 .node_id = PM_NODE_IDENTIFY(parser),
5961 .location = { .start = NULL, .end = NULL }
5962 },
5963 .lefts = { 0 },
5964 .rest = NULL,
5965 .rights = { 0 },
5966 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5967 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5968 };
5969
5970 return node;
5971}
5972
5976static void
5977pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5978 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5979 if (node->rest == NULL) {
5980 node->rest = target;
5981 } else {
5982 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5983 pm_node_list_append(&node->rights, target);
5984 }
5985 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5986 if (node->rest == NULL) {
5987 node->rest = target;
5988 } else {
5989 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5990 pm_node_list_append(&node->rights, target);
5991 }
5992 } else if (node->rest == NULL) {
5993 pm_node_list_append(&node->lefts, target);
5994 } else {
5995 pm_node_list_append(&node->rights, target);
5996 }
5997
5998 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
5999 node->base.location.start = target->location.start;
6000 }
6001
6002 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6003 node->base.location.end = target->location.end;
6004 }
6005}
6006
6010static void
6011pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6012 node->base.location.start = lparen->start;
6013 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6014}
6015
6019static void
6020pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6021 node->base.location.end = rparen->end;
6022 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6023}
6024
6028static pm_multi_write_node_t *
6029pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6030 pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6031
6032 *node = (pm_multi_write_node_t) {
6033 {
6034 .type = PM_MULTI_WRITE_NODE,
6035 .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6036 .node_id = PM_NODE_IDENTIFY(parser),
6037 .location = {
6038 .start = target->base.location.start,
6039 .end = value->location.end
6040 }
6041 },
6042 .lefts = target->lefts,
6043 .rest = target->rest,
6044 .rights = target->rights,
6045 .lparen_loc = target->lparen_loc,
6046 .rparen_loc = target->rparen_loc,
6047 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6048 .value = value
6049 };
6050
6051 // Explicitly do not call pm_node_destroy here because we want to keep
6052 // around all of the information within the MultiWriteNode node.
6053 xfree(target);
6054
6055 return node;
6056}
6057
6061static pm_next_node_t *
6062pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6063 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6064 pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6065
6066 *node = (pm_next_node_t) {
6067 {
6068 .type = PM_NEXT_NODE,
6069 .node_id = PM_NODE_IDENTIFY(parser),
6070 .location = {
6071 .start = keyword->start,
6072 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6073 }
6074 },
6075 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6076 .arguments = arguments
6077 };
6078
6079 return node;
6080}
6081
6085static pm_nil_node_t *
6086pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6087 assert(token->type == PM_TOKEN_KEYWORD_NIL);
6088 pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6089
6090 *node = (pm_nil_node_t) {{
6091 .type = PM_NIL_NODE,
6092 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6093 .node_id = PM_NODE_IDENTIFY(parser),
6094 .location = PM_LOCATION_TOKEN_VALUE(token)
6095 }};
6096
6097 return node;
6098}
6099
6103static pm_no_keywords_parameter_node_t *
6104pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6105 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6106 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6107 pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6108
6109 *node = (pm_no_keywords_parameter_node_t) {
6110 {
6111 .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6112 .node_id = PM_NODE_IDENTIFY(parser),
6113 .location = {
6114 .start = operator->start,
6115 .end = keyword->end
6116 }
6117 },
6118 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6119 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6120 };
6121
6122 return node;
6123}
6124
6128static pm_numbered_parameters_node_t *
6129pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6130 pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6131
6132 *node = (pm_numbered_parameters_node_t) {
6133 {
6134 .type = PM_NUMBERED_PARAMETERS_NODE,
6135 .node_id = PM_NODE_IDENTIFY(parser),
6136 .location = *location
6137 },
6138 .maximum = maximum
6139 };
6140
6141 return node;
6142}
6143
6148#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6149
6156static uint32_t
6157pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6158 const uint8_t *start = token->start + 1;
6159 const uint8_t *end = token->end;
6160
6161 ptrdiff_t diff = end - start;
6162 assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
6163 size_t length = (size_t) diff;
6164
6165 char *digits = xcalloc(length + 1, sizeof(char));
6166 memcpy(digits, start, length);
6167 digits[length] = '\0';
6168
6169 char *endptr;
6170 errno = 0;
6171 unsigned long value = strtoul(digits, &endptr, 10);
6172
6173 if ((digits == endptr) || (*endptr != '\0')) {
6174 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6175 value = 0;
6176 }
6177
6178 xfree(digits);
6179
6180 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6181 PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6182 value = 0;
6183 }
6184
6185 return (uint32_t) value;
6186}
6187
6188#undef NTH_REF_MAX
6189
6193static pm_numbered_reference_read_node_t *
6194pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6195 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6196 pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6197
6198 *node = (pm_numbered_reference_read_node_t) {
6199 {
6200 .type = PM_NUMBERED_REFERENCE_READ_NODE,
6201 .node_id = PM_NODE_IDENTIFY(parser),
6202 .location = PM_LOCATION_TOKEN_VALUE(name),
6203 },
6204 .number = pm_numbered_reference_read_node_number(parser, name)
6205 };
6206
6207 return node;
6208}
6209
6213static pm_optional_parameter_node_t *
6214pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6215 pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6216
6217 *node = (pm_optional_parameter_node_t) {
6218 {
6219 .type = PM_OPTIONAL_PARAMETER_NODE,
6220 .node_id = PM_NODE_IDENTIFY(parser),
6221 .location = {
6222 .start = name->start,
6223 .end = value->location.end
6224 }
6225 },
6226 .name = pm_parser_constant_id_token(parser, name),
6227 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6228 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6229 .value = value
6230 };
6231
6232 return node;
6233}
6234
6238static pm_or_node_t *
6239pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6240 pm_assert_value_expression(parser, left);
6241
6242 pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6243
6244 *node = (pm_or_node_t) {
6245 {
6246 .type = PM_OR_NODE,
6247 .node_id = PM_NODE_IDENTIFY(parser),
6248 .location = {
6249 .start = left->location.start,
6250 .end = right->location.end
6251 }
6252 },
6253 .left = left,
6254 .right = right,
6255 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6256 };
6257
6258 return node;
6259}
6260
6264static pm_parameters_node_t *
6265pm_parameters_node_create(pm_parser_t *parser) {
6266 pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6267
6268 *node = (pm_parameters_node_t) {
6269 {
6270 .type = PM_PARAMETERS_NODE,
6271 .node_id = PM_NODE_IDENTIFY(parser),
6272 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6273 },
6274 .rest = NULL,
6275 .keyword_rest = NULL,
6276 .block = NULL,
6277 .requireds = { 0 },
6278 .optionals = { 0 },
6279 .posts = { 0 },
6280 .keywords = { 0 }
6281 };
6282
6283 return node;
6284}
6285
6289static void
6290pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6291 if (params->base.location.start == NULL) {
6292 params->base.location.start = param->location.start;
6293 } else {
6294 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6295 }
6296
6297 if (params->base.location.end == NULL) {
6298 params->base.location.end = param->location.end;
6299 } else {
6300 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6301 }
6302}
6303
6307static void
6308pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6309 pm_parameters_node_location_set(params, param);
6310 pm_node_list_append(&params->requireds, param);
6311}
6312
6316static void
6317pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6318 pm_parameters_node_location_set(params, (pm_node_t *) param);
6319 pm_node_list_append(&params->optionals, (pm_node_t *) param);
6320}
6321
6325static void
6326pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6327 pm_parameters_node_location_set(params, param);
6328 pm_node_list_append(&params->posts, param);
6329}
6330
6334static void
6335pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6336 pm_parameters_node_location_set(params, param);
6337 params->rest = param;
6338}
6339
6343static void
6344pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6345 pm_parameters_node_location_set(params, param);
6346 pm_node_list_append(&params->keywords, param);
6347}
6348
6352static void
6353pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6354 assert(params->keyword_rest == NULL);
6355 pm_parameters_node_location_set(params, param);
6356 params->keyword_rest = param;
6357}
6358
6362static void
6363pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6364 assert(params->block == NULL);
6365 pm_parameters_node_location_set(params, (pm_node_t *) param);
6366 params->block = param;
6367}
6368
6372static pm_program_node_t *
6373pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6374 pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6375
6376 *node = (pm_program_node_t) {
6377 {
6378 .type = PM_PROGRAM_NODE,
6379 .node_id = PM_NODE_IDENTIFY(parser),
6380 .location = {
6381 .start = statements == NULL ? parser->start : statements->base.location.start,
6382 .end = statements == NULL ? parser->end : statements->base.location.end
6383 }
6384 },
6385 .locals = *locals,
6386 .statements = statements
6387 };
6388
6389 return node;
6390}
6391
6395static pm_parentheses_node_t *
6396pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
6397 pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6398
6399 *node = (pm_parentheses_node_t) {
6400 {
6401 .type = PM_PARENTHESES_NODE,
6402 .node_id = PM_NODE_IDENTIFY(parser),
6403 .location = {
6404 .start = opening->start,
6405 .end = closing->end
6406 }
6407 },
6408 .body = body,
6409 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6410 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6411 };
6412
6413 return node;
6414}
6415
6419static pm_pinned_expression_node_t *
6420pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6421 pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6422
6423 *node = (pm_pinned_expression_node_t) {
6424 {
6425 .type = PM_PINNED_EXPRESSION_NODE,
6426 .node_id = PM_NODE_IDENTIFY(parser),
6427 .location = {
6428 .start = operator->start,
6429 .end = rparen->end
6430 }
6431 },
6432 .expression = expression,
6433 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6434 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6435 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6436 };
6437
6438 return node;
6439}
6440
6444static pm_pinned_variable_node_t *
6445pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6446 pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6447
6448 *node = (pm_pinned_variable_node_t) {
6449 {
6450 .type = PM_PINNED_VARIABLE_NODE,
6451 .node_id = PM_NODE_IDENTIFY(parser),
6452 .location = {
6453 .start = operator->start,
6454 .end = variable->location.end
6455 }
6456 },
6457 .variable = variable,
6458 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6459 };
6460
6461 return node;
6462}
6463
6467static pm_post_execution_node_t *
6468pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6469 pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6470
6471 *node = (pm_post_execution_node_t) {
6472 {
6473 .type = PM_POST_EXECUTION_NODE,
6474 .node_id = PM_NODE_IDENTIFY(parser),
6475 .location = {
6476 .start = keyword->start,
6477 .end = closing->end
6478 }
6479 },
6480 .statements = statements,
6481 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6482 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6483 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6484 };
6485
6486 return node;
6487}
6488
6492static pm_pre_execution_node_t *
6493pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6494 pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6495
6496 *node = (pm_pre_execution_node_t) {
6497 {
6498 .type = PM_PRE_EXECUTION_NODE,
6499 .node_id = PM_NODE_IDENTIFY(parser),
6500 .location = {
6501 .start = keyword->start,
6502 .end = closing->end
6503 }
6504 },
6505 .statements = statements,
6506 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6507 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6508 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6509 };
6510
6511 return node;
6512}
6513
6517static pm_range_node_t *
6518pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6519 pm_assert_value_expression(parser, left);
6520 pm_assert_value_expression(parser, right);
6521
6522 pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6523 pm_node_flags_t flags = 0;
6524
6525 // Indicate that this node is an exclusive range if the operator is `...`.
6526 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6527 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6528 }
6529
6530 // Indicate that this node is a static literal (i.e., can be compiled with
6531 // a putobject in CRuby) if the left and right are implicit nil, explicit
6532 // nil, or integers.
6533 if (
6534 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6535 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6536 ) {
6537 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6538 }
6539
6540 *node = (pm_range_node_t) {
6541 {
6542 .type = PM_RANGE_NODE,
6543 .flags = flags,
6544 .node_id = PM_NODE_IDENTIFY(parser),
6545 .location = {
6546 .start = (left == NULL ? operator->start : left->location.start),
6547 .end = (right == NULL ? operator->end : right->location.end)
6548 }
6549 },
6550 .left = left,
6551 .right = right,
6552 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6553 };
6554
6555 return node;
6556}
6557
6561static pm_redo_node_t *
6562pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6563 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6564 pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6565
6566 *node = (pm_redo_node_t) {{
6567 .type = PM_REDO_NODE,
6568 .node_id = PM_NODE_IDENTIFY(parser),
6569 .location = PM_LOCATION_TOKEN_VALUE(token)
6570 }};
6571
6572 return node;
6573}
6574
6579static pm_regular_expression_node_t *
6580pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6581 pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6582
6583 *node = (pm_regular_expression_node_t) {
6584 {
6585 .type = PM_REGULAR_EXPRESSION_NODE,
6586 .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6587 .node_id = PM_NODE_IDENTIFY(parser),
6588 .location = {
6589 .start = MIN(opening->start, closing->start),
6590 .end = MAX(opening->end, closing->end)
6591 }
6592 },
6593 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6594 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6595 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6596 .unescaped = *unescaped
6597 };
6598
6599 return node;
6600}
6601
6605static inline pm_regular_expression_node_t *
6606pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6607 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6608}
6609
6613static pm_required_parameter_node_t *
6614pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6615 pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6616
6617 *node = (pm_required_parameter_node_t) {
6618 {
6619 .type = PM_REQUIRED_PARAMETER_NODE,
6620 .node_id = PM_NODE_IDENTIFY(parser),
6621 .location = PM_LOCATION_TOKEN_VALUE(token)
6622 },
6623 .name = pm_parser_constant_id_token(parser, token)
6624 };
6625
6626 return node;
6627}
6628
6632static pm_rescue_modifier_node_t *
6633pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6634 pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6635
6636 *node = (pm_rescue_modifier_node_t) {
6637 {
6638 .type = PM_RESCUE_MODIFIER_NODE,
6639 .node_id = PM_NODE_IDENTIFY(parser),
6640 .location = {
6641 .start = expression->location.start,
6642 .end = rescue_expression->location.end
6643 }
6644 },
6645 .expression = expression,
6646 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6647 .rescue_expression = rescue_expression
6648 };
6649
6650 return node;
6651}
6652
6656static pm_rescue_node_t *
6657pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6658 pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6659
6660 *node = (pm_rescue_node_t) {
6661 {
6662 .type = PM_RESCUE_NODE,
6663 .node_id = PM_NODE_IDENTIFY(parser),
6664 .location = PM_LOCATION_TOKEN_VALUE(keyword)
6665 },
6666 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6667 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6668 .reference = NULL,
6669 .statements = NULL,
6670 .subsequent = NULL,
6671 .exceptions = { 0 }
6672 };
6673
6674 return node;
6675}
6676
6677static inline void
6678pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6679 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6680}
6681
6685static void
6686pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6687 node->reference = reference;
6688 node->base.location.end = reference->location.end;
6689}
6690
6694static void
6695pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6696 node->statements = statements;
6697 if (pm_statements_node_body_length(statements) > 0) {
6698 node->base.location.end = statements->base.location.end;
6699 }
6700}
6701
6705static void
6706pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6707 node->subsequent = subsequent;
6708 node->base.location.end = subsequent->base.location.end;
6709}
6710
6714static void
6715pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6716 pm_node_list_append(&node->exceptions, exception);
6717 node->base.location.end = exception->location.end;
6718}
6719
6723static pm_rest_parameter_node_t *
6724pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6725 pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6726
6727 *node = (pm_rest_parameter_node_t) {
6728 {
6729 .type = PM_REST_PARAMETER_NODE,
6730 .node_id = PM_NODE_IDENTIFY(parser),
6731 .location = {
6732 .start = operator->start,
6733 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6734 }
6735 },
6736 .name = pm_parser_optional_constant_id_token(parser, name),
6737 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6738 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6739 };
6740
6741 return node;
6742}
6743
6747static pm_retry_node_t *
6748pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6749 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6750 pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6751
6752 *node = (pm_retry_node_t) {{
6753 .type = PM_RETRY_NODE,
6754 .node_id = PM_NODE_IDENTIFY(parser),
6755 .location = PM_LOCATION_TOKEN_VALUE(token)
6756 }};
6757
6758 return node;
6759}
6760
6764static pm_return_node_t *
6765pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6766 pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6767
6768 *node = (pm_return_node_t) {
6769 {
6770 .type = PM_RETURN_NODE,
6771 .node_id = PM_NODE_IDENTIFY(parser),
6772 .location = {
6773 .start = keyword->start,
6774 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6775 }
6776 },
6777 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6778 .arguments = arguments
6779 };
6780
6781 return node;
6782}
6783
6787static pm_self_node_t *
6788pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6789 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6790 pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6791
6792 *node = (pm_self_node_t) {{
6793 .type = PM_SELF_NODE,
6794 .node_id = PM_NODE_IDENTIFY(parser),
6795 .location = PM_LOCATION_TOKEN_VALUE(token)
6796 }};
6797
6798 return node;
6799}
6800
6804static pm_shareable_constant_node_t *
6805pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6806 pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6807
6808 *node = (pm_shareable_constant_node_t) {
6809 {
6810 .type = PM_SHAREABLE_CONSTANT_NODE,
6811 .flags = (pm_node_flags_t) value,
6812 .node_id = PM_NODE_IDENTIFY(parser),
6813 .location = PM_LOCATION_NODE_VALUE(write)
6814 },
6815 .write = write
6816 };
6817
6818 return node;
6819}
6820
6824static pm_singleton_class_node_t *
6825pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6826 pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6827
6828 *node = (pm_singleton_class_node_t) {
6829 {
6830 .type = PM_SINGLETON_CLASS_NODE,
6831 .node_id = PM_NODE_IDENTIFY(parser),
6832 .location = {
6833 .start = class_keyword->start,
6834 .end = end_keyword->end
6835 }
6836 },
6837 .locals = *locals,
6838 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6839 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6840 .expression = expression,
6841 .body = body,
6842 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6843 };
6844
6845 return node;
6846}
6847
6851static pm_source_encoding_node_t *
6852pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6853 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6854 pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6855
6856 *node = (pm_source_encoding_node_t) {{
6857 .type = PM_SOURCE_ENCODING_NODE,
6858 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6859 .node_id = PM_NODE_IDENTIFY(parser),
6860 .location = PM_LOCATION_TOKEN_VALUE(token)
6861 }};
6862
6863 return node;
6864}
6865
6869static pm_source_file_node_t*
6870pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6871 pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6872 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6873
6874 pm_node_flags_t flags = 0;
6875
6876 switch (parser->frozen_string_literal) {
6877 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6878 flags |= PM_STRING_FLAGS_MUTABLE;
6879 break;
6880 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6881 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6882 break;
6883 }
6884
6885 *node = (pm_source_file_node_t) {
6886 {
6887 .type = PM_SOURCE_FILE_NODE,
6888 .flags = flags,
6889 .node_id = PM_NODE_IDENTIFY(parser),
6890 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6891 },
6892 .filepath = parser->filepath
6893 };
6894
6895 return node;
6896}
6897
6901static pm_source_line_node_t *
6902pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6903 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6904 pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6905
6906 *node = (pm_source_line_node_t) {{
6907 .type = PM_SOURCE_LINE_NODE,
6908 .flags = PM_NODE_FLAG_STATIC_LITERAL,
6909 .node_id = PM_NODE_IDENTIFY(parser),
6910 .location = PM_LOCATION_TOKEN_VALUE(token)
6911 }};
6912
6913 return node;
6914}
6915
6919static pm_splat_node_t *
6920pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6921 pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6922
6923 *node = (pm_splat_node_t) {
6924 {
6925 .type = PM_SPLAT_NODE,
6926 .node_id = PM_NODE_IDENTIFY(parser),
6927 .location = {
6928 .start = operator->start,
6929 .end = (expression == NULL ? operator->end : expression->location.end)
6930 }
6931 },
6932 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6933 .expression = expression
6934 };
6935
6936 return node;
6937}
6938
6942static pm_statements_node_t *
6943pm_statements_node_create(pm_parser_t *parser) {
6944 pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6945
6946 *node = (pm_statements_node_t) {
6947 {
6948 .type = PM_STATEMENTS_NODE,
6949 .node_id = PM_NODE_IDENTIFY(parser),
6950 .location = PM_LOCATION_NULL_VALUE(parser)
6951 },
6952 .body = { 0 }
6953 };
6954
6955 return node;
6956}
6957
6961static size_t
6962pm_statements_node_body_length(pm_statements_node_t *node) {
6963 return node && node->body.size;
6964}
6965
6969static void
6970pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6971 node->base.location = (pm_location_t) { .start = start, .end = end };
6972}
6973
6978static inline void
6979pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6980 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6981 node->base.location.start = statement->location.start;
6982 }
6983
6984 if (statement->location.end > node->base.location.end) {
6985 node->base.location.end = statement->location.end;
6986 }
6987}
6988
6992static void
6993pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6994 pm_statements_node_body_update(node, statement);
6995
6996 if (node->body.size > 0) {
6997 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6998
6999 switch (PM_NODE_TYPE(previous)) {
7000 case PM_BREAK_NODE:
7001 case PM_NEXT_NODE:
7002 case PM_REDO_NODE:
7003 case PM_RETRY_NODE:
7004 case PM_RETURN_NODE:
7005 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7006 break;
7007 default:
7008 break;
7009 }
7010 }
7011
7012 pm_node_list_append(&node->body, statement);
7013 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7014}
7015
7019static void
7020pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7021 pm_statements_node_body_update(node, statement);
7022 pm_node_list_prepend(&node->body, statement);
7023 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7024}
7025
7029static inline pm_string_node_t *
7030pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7031 pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7032 pm_node_flags_t flags = 0;
7033
7034 switch (parser->frozen_string_literal) {
7035 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7036 flags = PM_STRING_FLAGS_MUTABLE;
7037 break;
7038 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7039 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7040 break;
7041 }
7042
7043 *node = (pm_string_node_t) {
7044 {
7045 .type = PM_STRING_NODE,
7046 .flags = flags,
7047 .node_id = PM_NODE_IDENTIFY(parser),
7048 .location = {
7049 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7050 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7051 }
7052 },
7053 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7054 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7055 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7056 .unescaped = *string
7057 };
7058
7059 return node;
7060}
7061
7065static pm_string_node_t *
7066pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7067 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7068}
7069
7074static pm_string_node_t *
7075pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7076 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7077 parser->current_string = PM_STRING_EMPTY;
7078 return node;
7079}
7080
7084static pm_super_node_t *
7085pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7086 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7087 pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7088
7089 const uint8_t *end = pm_arguments_end(arguments);
7090 if (end == NULL) {
7091 assert(false && "unreachable");
7092 }
7093
7094 *node = (pm_super_node_t) {
7095 {
7096 .type = PM_SUPER_NODE,
7097 .node_id = PM_NODE_IDENTIFY(parser),
7098 .location = {
7099 .start = keyword->start,
7100 .end = end,
7101 }
7102 },
7103 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7104 .lparen_loc = arguments->opening_loc,
7105 .arguments = arguments->arguments,
7106 .rparen_loc = arguments->closing_loc,
7107 .block = arguments->block
7108 };
7109
7110 return node;
7111}
7112
7117static bool
7118pm_ascii_only_p(const pm_string_t *contents) {
7119 const size_t length = pm_string_length(contents);
7120 const uint8_t *source = pm_string_source(contents);
7121
7122 for (size_t index = 0; index < length; index++) {
7123 if (source[index] & 0x80) return false;
7124 }
7125
7126 return true;
7127}
7128
7132static void
7133parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7134 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7135 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7136
7137 if (width == 0) {
7138 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7139 break;
7140 }
7141
7142 cursor += width;
7143 }
7144}
7145
7150static void
7151parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7152 const pm_encoding_t *encoding = parser->encoding;
7153
7154 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7155 size_t width = encoding->char_width(cursor, end - cursor);
7156
7157 if (width == 0) {
7158 pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7159 break;
7160 }
7161
7162 cursor += width;
7163 }
7164}
7165
7175static inline pm_node_flags_t
7176parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7177 if (parser->explicit_encoding != NULL) {
7178 // A Symbol may optionally have its encoding explicitly set. This will
7179 // happen if an escape sequence results in a non-ASCII code point.
7180 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7181 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7182 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7183 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7184 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7185 } else if (validate) {
7186 parse_symbol_encoding_validate_other(parser, location, contents);
7187 }
7188 } else if (pm_ascii_only_p(contents)) {
7189 // Ruby stipulates that all source files must use an ASCII-compatible
7190 // encoding. Thus, all symbols appearing in source are eligible for
7191 // "downgrading" to US-ASCII.
7192 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7193 } else if (validate) {
7194 parse_symbol_encoding_validate_other(parser, location, contents);
7195 }
7196
7197 return 0;
7198}
7199
7200static pm_node_flags_t
7201parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7202 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7203 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7204 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7205 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7206
7207 // There's special validation logic used if a string does not contain any character escape sequences.
7208 if (parser->explicit_encoding == NULL) {
7209 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7210 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7211 // the US-ASCII encoding.
7212 if (ascii_only) {
7213 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7214 }
7215
7216 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7217 if (!ascii_only) {
7218 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7219 }
7220 } else if (parser->encoding != modifier_encoding) {
7221 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7222
7223 if (modifier == 'n' && !ascii_only) {
7224 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7225 }
7226 }
7227
7228 return flags;
7229 }
7230
7231 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7232 bool mixed_encoding = false;
7233
7234 if (mixed_encoding) {
7235 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7236 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7237 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7238 bool valid_string_in_modifier_encoding = true;
7239
7240 if (!valid_string_in_modifier_encoding) {
7241 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7242 }
7243 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7244 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7245 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7246 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7247 }
7248 }
7249
7250 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7251 return flags;
7252}
7253
7260static pm_node_flags_t
7261parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7262 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7263 bool valid_unicode_range = true;
7264 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7265 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7266 return flags;
7267 }
7268
7269 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7270 // to multi-byte characters are allowed.
7271 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7272 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7273 // following error message appearing twice. We do the same for compatibility.
7274 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7275 }
7276
7284
7285 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7286 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7287 }
7288
7289 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7290 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7291 }
7292
7293 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7294 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7295 }
7296
7297 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7298 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7299 }
7300
7301 // At this point no encoding modifiers will be present on the regular expression as they would have already
7302 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7303 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7304 if (ascii_only) {
7305 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7306 }
7307
7308 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7309 // or by specifying a modifier.
7310 //
7311 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7312 if (parser->explicit_encoding != NULL) {
7313 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7314 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7315 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7316 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7317 }
7318 }
7319
7320 return 0;
7321}
7322
7327static pm_symbol_node_t *
7328pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7329 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7330
7331 *node = (pm_symbol_node_t) {
7332 {
7333 .type = PM_SYMBOL_NODE,
7334 .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7335 .node_id = PM_NODE_IDENTIFY(parser),
7336 .location = {
7337 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7338 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7339 }
7340 },
7341 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7342 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7343 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7344 .unescaped = *unescaped
7345 };
7346
7347 return node;
7348}
7349
7353static inline pm_symbol_node_t *
7354pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7355 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7356}
7357
7361static pm_symbol_node_t *
7362pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7363 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7364 parser->current_string = PM_STRING_EMPTY;
7365 return node;
7366}
7367
7371static pm_symbol_node_t *
7372pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7373 pm_symbol_node_t *node;
7374
7375 switch (token->type) {
7376 case PM_TOKEN_LABEL: {
7377 pm_token_t opening = not_provided(parser);
7378 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7379
7380 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7381 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7382
7383 assert((label.end - label.start) >= 0);
7384 pm_string_shared_init(&node->unescaped, label.start, label.end);
7385 pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7386
7387 break;
7388 }
7389 case PM_TOKEN_MISSING: {
7390 pm_token_t opening = not_provided(parser);
7391 pm_token_t closing = not_provided(parser);
7392
7393 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7394 node = pm_symbol_node_create(parser, &opening, &label, &closing);
7395 break;
7396 }
7397 default:
7398 assert(false && "unreachable");
7399 node = NULL;
7400 break;
7401 }
7402
7403 return node;
7404}
7405
7409static pm_symbol_node_t *
7410pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7411 pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7412
7413 *node = (pm_symbol_node_t) {
7414 {
7415 .type = PM_SYMBOL_NODE,
7416 .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7417 .node_id = PM_NODE_IDENTIFY(parser),
7418 .location = PM_LOCATION_NULL_VALUE(parser)
7419 },
7420 .value_loc = PM_LOCATION_NULL_VALUE(parser),
7421 .unescaped = { 0 }
7422 };
7423
7424 pm_string_constant_init(&node->unescaped, content, strlen(content));
7425 return node;
7426}
7427
7431static bool
7432pm_symbol_node_label_p(pm_node_t *node) {
7433 const uint8_t *end = NULL;
7434
7435 switch (PM_NODE_TYPE(node)) {
7436 case PM_SYMBOL_NODE:
7437 end = ((pm_symbol_node_t *) node)->closing_loc.end;
7438 break;
7439 case PM_INTERPOLATED_SYMBOL_NODE:
7440 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7441 break;
7442 default:
7443 return false;
7444 }
7445
7446 return (end != NULL) && (end[-1] == ':');
7447}
7448
7452static pm_symbol_node_t *
7453pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7454 pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7455
7456 *new_node = (pm_symbol_node_t) {
7457 {
7458 .type = PM_SYMBOL_NODE,
7459 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7460 .node_id = PM_NODE_IDENTIFY(parser),
7461 .location = {
7462 .start = opening->start,
7463 .end = closing->end
7464 }
7465 },
7466 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7467 .value_loc = node->content_loc,
7468 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7469 .unescaped = node->unescaped
7470 };
7471
7472 pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7473 pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7474
7475 // We are explicitly _not_ using pm_node_destroy here because we don't want
7476 // to trash the unescaped string. We could instead copy the string if we
7477 // know that it is owned, but we're taking the fast path for now.
7478 xfree(node);
7479
7480 return new_node;
7481}
7482
7486static pm_string_node_t *
7487pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7488 pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7489 pm_node_flags_t flags = 0;
7490
7491 switch (parser->frozen_string_literal) {
7492 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7493 flags = PM_STRING_FLAGS_MUTABLE;
7494 break;
7495 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7496 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7497 break;
7498 }
7499
7500 *new_node = (pm_string_node_t) {
7501 {
7502 .type = PM_STRING_NODE,
7503 .flags = flags,
7504 .node_id = PM_NODE_IDENTIFY(parser),
7505 .location = node->base.location
7506 },
7507 .opening_loc = node->opening_loc,
7508 .content_loc = node->value_loc,
7509 .closing_loc = node->closing_loc,
7510 .unescaped = node->unescaped
7511 };
7512
7513 // We are explicitly _not_ using pm_node_destroy here because we don't want
7514 // to trash the unescaped string. We could instead copy the string if we
7515 // know that it is owned, but we're taking the fast path for now.
7516 xfree(node);
7517
7518 return new_node;
7519}
7520
7524static pm_true_node_t *
7525pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7526 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7527 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7528
7529 *node = (pm_true_node_t) {{
7530 .type = PM_TRUE_NODE,
7531 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7532 .node_id = PM_NODE_IDENTIFY(parser),
7533 .location = PM_LOCATION_TOKEN_VALUE(token)
7534 }};
7535
7536 return node;
7537}
7538
7542static pm_true_node_t *
7543pm_true_node_synthesized_create(pm_parser_t *parser) {
7544 pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7545
7546 *node = (pm_true_node_t) {{
7547 .type = PM_TRUE_NODE,
7548 .flags = PM_NODE_FLAG_STATIC_LITERAL,
7549 .node_id = PM_NODE_IDENTIFY(parser),
7550 .location = { .start = parser->start, .end = parser->end }
7551 }};
7552
7553 return node;
7554}
7555
7559static pm_undef_node_t *
7560pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7561 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7562 pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7563
7564 *node = (pm_undef_node_t) {
7565 {
7566 .type = PM_UNDEF_NODE,
7567 .node_id = PM_NODE_IDENTIFY(parser),
7568 .location = PM_LOCATION_TOKEN_VALUE(token),
7569 },
7570 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7571 .names = { 0 }
7572 };
7573
7574 return node;
7575}
7576
7580static void
7581pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7582 node->base.location.end = name->location.end;
7583 pm_node_list_append(&node->names, name);
7584}
7585
7589static pm_unless_node_t *
7590pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7591 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7592 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7593
7594 const uint8_t *end;
7595 if (statements != NULL) {
7596 end = statements->base.location.end;
7597 } else {
7598 end = predicate->location.end;
7599 }
7600
7601 *node = (pm_unless_node_t) {
7602 {
7603 .type = PM_UNLESS_NODE,
7604 .flags = PM_NODE_FLAG_NEWLINE,
7605 .node_id = PM_NODE_IDENTIFY(parser),
7606 .location = {
7607 .start = keyword->start,
7608 .end = end
7609 },
7610 },
7611 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7612 .predicate = predicate,
7613 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7614 .statements = statements,
7615 .else_clause = NULL,
7616 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7617 };
7618
7619 return node;
7620}
7621
7625static pm_unless_node_t *
7626pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7627 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7628 pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7629
7630 pm_statements_node_t *statements = pm_statements_node_create(parser);
7631 pm_statements_node_body_append(parser, statements, statement, true);
7632
7633 *node = (pm_unless_node_t) {
7634 {
7635 .type = PM_UNLESS_NODE,
7636 .flags = PM_NODE_FLAG_NEWLINE,
7637 .node_id = PM_NODE_IDENTIFY(parser),
7638 .location = {
7639 .start = statement->location.start,
7640 .end = predicate->location.end
7641 },
7642 },
7643 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7644 .predicate = predicate,
7645 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7646 .statements = statements,
7647 .else_clause = NULL,
7648 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7649 };
7650
7651 return node;
7652}
7653
7654static inline void
7655pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7656 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7657 node->base.location.end = end_keyword->end;
7658}
7659
7665static void
7666pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7667 assert(parser->current_block_exits != NULL);
7668
7669 // All of the block exits that we want to remove should be within the
7670 // statements, and since we are modifying the statements, we shouldn't have
7671 // to check the end location.
7672 const uint8_t *start = statements->base.location.start;
7673
7674 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7675 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7676 if (block_exit->location.start < start) break;
7677
7678 // Implicitly remove from the list by lowering the size.
7679 parser->current_block_exits->size--;
7680 }
7681}
7682
7686static pm_until_node_t *
7687pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7688 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7689 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7690
7691 *node = (pm_until_node_t) {
7692 {
7693 .type = PM_UNTIL_NODE,
7694 .flags = flags,
7695 .node_id = PM_NODE_IDENTIFY(parser),
7696 .location = {
7697 .start = keyword->start,
7698 .end = closing->end,
7699 },
7700 },
7701 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7702 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7703 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7704 .predicate = predicate,
7705 .statements = statements
7706 };
7707
7708 return node;
7709}
7710
7714static pm_until_node_t *
7715pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7716 pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7717 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7718 pm_loop_modifier_block_exits(parser, statements);
7719
7720 *node = (pm_until_node_t) {
7721 {
7722 .type = PM_UNTIL_NODE,
7723 .flags = flags,
7724 .node_id = PM_NODE_IDENTIFY(parser),
7725 .location = {
7726 .start = statements->base.location.start,
7727 .end = predicate->location.end,
7728 },
7729 },
7730 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7731 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7732 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7733 .predicate = predicate,
7734 .statements = statements
7735 };
7736
7737 return node;
7738}
7739
7743static pm_when_node_t *
7744pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7745 pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7746
7747 *node = (pm_when_node_t) {
7748 {
7749 .type = PM_WHEN_NODE,
7750 .node_id = PM_NODE_IDENTIFY(parser),
7751 .location = {
7752 .start = keyword->start,
7753 .end = NULL
7754 }
7755 },
7756 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7757 .statements = NULL,
7758 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7759 .conditions = { 0 }
7760 };
7761
7762 return node;
7763}
7764
7768static void
7769pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7770 node->base.location.end = condition->location.end;
7771 pm_node_list_append(&node->conditions, condition);
7772}
7773
7777static inline void
7778pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7779 node->base.location.end = then_keyword->end;
7780 node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7781}
7782
7786static void
7787pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7788 if (statements->base.location.end > node->base.location.end) {
7789 node->base.location.end = statements->base.location.end;
7790 }
7791
7792 node->statements = statements;
7793}
7794
7798static pm_while_node_t *
7799pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7800 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7801 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7802
7803 *node = (pm_while_node_t) {
7804 {
7805 .type = PM_WHILE_NODE,
7806 .flags = flags,
7807 .node_id = PM_NODE_IDENTIFY(parser),
7808 .location = {
7809 .start = keyword->start,
7810 .end = closing->end
7811 },
7812 },
7813 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7814 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7815 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7816 .predicate = predicate,
7817 .statements = statements
7818 };
7819
7820 return node;
7821}
7822
7826static pm_while_node_t *
7827pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7828 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7829 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7830 pm_loop_modifier_block_exits(parser, statements);
7831
7832 *node = (pm_while_node_t) {
7833 {
7834 .type = PM_WHILE_NODE,
7835 .flags = flags,
7836 .node_id = PM_NODE_IDENTIFY(parser),
7837 .location = {
7838 .start = statements->base.location.start,
7839 .end = predicate->location.end
7840 },
7841 },
7842 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7843 .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7844 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7845 .predicate = predicate,
7846 .statements = statements
7847 };
7848
7849 return node;
7850}
7851
7855static pm_while_node_t *
7856pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7857 pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7858
7859 *node = (pm_while_node_t) {
7860 {
7861 .type = PM_WHILE_NODE,
7862 .node_id = PM_NODE_IDENTIFY(parser),
7863 .location = PM_LOCATION_NULL_VALUE(parser)
7864 },
7865 .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7866 .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7867 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7868 .predicate = predicate,
7869 .statements = statements
7870 };
7871
7872 return node;
7873}
7874
7879static pm_x_string_node_t *
7880pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7881 pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7882
7883 *node = (pm_x_string_node_t) {
7884 {
7885 .type = PM_X_STRING_NODE,
7886 .flags = PM_STRING_FLAGS_FROZEN,
7887 .node_id = PM_NODE_IDENTIFY(parser),
7888 .location = {
7889 .start = opening->start,
7890 .end = closing->end
7891 },
7892 },
7893 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7894 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7895 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7896 .unescaped = *unescaped
7897 };
7898
7899 return node;
7900}
7901
7905static inline pm_x_string_node_t *
7906pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7907 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7908}
7909
7913static pm_yield_node_t *
7914pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7915 pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7916
7917 const uint8_t *end;
7918 if (rparen_loc->start != NULL) {
7919 end = rparen_loc->end;
7920 } else if (arguments != NULL) {
7921 end = arguments->base.location.end;
7922 } else if (lparen_loc->start != NULL) {
7923 end = lparen_loc->end;
7924 } else {
7925 end = keyword->end;
7926 }
7927
7928 *node = (pm_yield_node_t) {
7929 {
7930 .type = PM_YIELD_NODE,
7931 .node_id = PM_NODE_IDENTIFY(parser),
7932 .location = {
7933 .start = keyword->start,
7934 .end = end
7935 },
7936 },
7937 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7938 .lparen_loc = *lparen_loc,
7939 .arguments = arguments,
7940 .rparen_loc = *rparen_loc
7941 };
7942
7943 return node;
7944}
7945
7946#undef PM_NODE_ALLOC
7947#undef PM_NODE_IDENTIFY
7948
7953static int
7954pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7955 pm_scope_t *scope = parser->current_scope;
7956 int depth = 0;
7957
7958 while (scope != NULL) {
7959 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7960 if (scope->closed) break;
7961
7962 scope = scope->previous;
7963 depth++;
7964 }
7965
7966 return -1;
7967}
7968
7974static inline int
7975pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7976 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7977}
7978
7982static inline void
7983pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7984 pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7985}
7986
7990static pm_constant_id_t
7991pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7992 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7993 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7994 return constant_id;
7995}
7996
8000static inline pm_constant_id_t
8001pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8002 return pm_parser_local_add_location(parser, token->start, token->end, reads);
8003}
8004
8008static pm_constant_id_t
8009pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8010 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8011 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8012 return constant_id;
8013}
8014
8018static pm_constant_id_t
8019pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8020 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8021 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8022 return constant_id;
8023}
8024
8032static bool
8033pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8034 // We want to check whether the parameter name is a numbered parameter or
8035 // not.
8036 pm_refute_numbered_parameter(parser, name->start, name->end);
8037
8038 // Otherwise we'll fetch the constant id for the parameter name and check
8039 // whether it's already in the current scope.
8040 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8041
8042 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8043 // Add an error if the parameter doesn't start with _ and has been seen before
8044 if ((name->start < name->end) && (*name->start != '_')) {
8045 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8046 }
8047 return true;
8048 }
8049 return false;
8050}
8051
8055static void
8056pm_parser_scope_pop(pm_parser_t *parser) {
8057 pm_scope_t *scope = parser->current_scope;
8058 parser->current_scope = scope->previous;
8059 pm_locals_free(&scope->locals);
8060 pm_node_list_free(&scope->implicit_parameters);
8061 xfree(scope);
8062}
8063
8064/******************************************************************************/
8065/* Stack helpers */
8066/******************************************************************************/
8067
8071static inline void
8072pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8073 *stack = (*stack << 1) | (value & 1);
8074}
8075
8079static inline void
8080pm_state_stack_pop(pm_state_stack_t *stack) {
8081 *stack >>= 1;
8082}
8083
8087static inline bool
8088pm_state_stack_p(const pm_state_stack_t *stack) {
8089 return *stack & 1;
8090}
8091
8092static inline void
8093pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8094 // Use the negation of the value to prevent stack overflow.
8095 pm_state_stack_push(&parser->accepts_block_stack, !value);
8096}
8097
8098static inline void
8099pm_accepts_block_stack_pop(pm_parser_t *parser) {
8100 pm_state_stack_pop(&parser->accepts_block_stack);
8101}
8102
8103static inline bool
8104pm_accepts_block_stack_p(pm_parser_t *parser) {
8105 return !pm_state_stack_p(&parser->accepts_block_stack);
8106}
8107
8108static inline void
8109pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8110 pm_state_stack_push(&parser->do_loop_stack, value);
8111}
8112
8113static inline void
8114pm_do_loop_stack_pop(pm_parser_t *parser) {
8115 pm_state_stack_pop(&parser->do_loop_stack);
8116}
8117
8118static inline bool
8119pm_do_loop_stack_p(pm_parser_t *parser) {
8120 return pm_state_stack_p(&parser->do_loop_stack);
8121}
8122
8123/******************************************************************************/
8124/* Lexer check helpers */
8125/******************************************************************************/
8126
8131static inline uint8_t
8132peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8133 if (cursor < parser->end) {
8134 return *cursor;
8135 } else {
8136 return '\0';
8137 }
8138}
8139
8145static inline uint8_t
8146peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8147 return peek_at(parser, parser->current.end + offset);
8148}
8149
8154static inline uint8_t
8155peek(const pm_parser_t *parser) {
8156 return peek_at(parser, parser->current.end);
8157}
8158
8163static inline bool
8164match(pm_parser_t *parser, uint8_t value) {
8165 if (peek(parser) == value) {
8166 parser->current.end++;
8167 return true;
8168 }
8169 return false;
8170}
8171
8176static inline size_t
8177match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8178 if (peek_at(parser, cursor) == '\n') {
8179 return 1;
8180 }
8181 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8182 return 2;
8183 }
8184 return 0;
8185}
8186
8192static inline size_t
8193match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8194 return match_eol_at(parser, parser->current.end + offset);
8195}
8196
8202static inline size_t
8203match_eol(pm_parser_t *parser) {
8204 return match_eol_at(parser, parser->current.end);
8205}
8206
8210static inline const uint8_t *
8211next_newline(const uint8_t *cursor, ptrdiff_t length) {
8212 assert(length >= 0);
8213
8214 // Note that it's okay for us to use memchr here to look for \n because none
8215 // of the encodings that we support have \n as a component of a multi-byte
8216 // character.
8217 return memchr(cursor, '\n', (size_t) length);
8218}
8219
8223static inline bool
8224ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8225 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8226}
8227
8232static bool
8233parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8234 const pm_encoding_t *encoding = pm_encoding_find(start, end);
8235
8236 if (encoding != NULL) {
8237 if (parser->encoding != encoding) {
8238 parser->encoding = encoding;
8239 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8240 }
8241
8242 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8243 return true;
8244 }
8245
8246 return false;
8247}
8248
8253static void
8254parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8255 const uint8_t *cursor = parser->current.start + 1;
8256 const uint8_t *end = parser->current.end;
8257
8258 bool separator = false;
8259 while (true) {
8260 if (end - cursor <= 6) return;
8261 switch (cursor[6]) {
8262 case 'C': case 'c': cursor += 6; continue;
8263 case 'O': case 'o': cursor += 5; continue;
8264 case 'D': case 'd': cursor += 4; continue;
8265 case 'I': case 'i': cursor += 3; continue;
8266 case 'N': case 'n': cursor += 2; continue;
8267 case 'G': case 'g': cursor += 1; continue;
8268 case '=': case ':':
8269 separator = true;
8270 cursor += 6;
8271 break;
8272 default:
8273 cursor += 6;
8274 if (pm_char_is_whitespace(*cursor)) break;
8275 continue;
8276 }
8277 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8278 separator = false;
8279 }
8280
8281 while (true) {
8282 do {
8283 if (++cursor >= end) return;
8284 } while (pm_char_is_whitespace(*cursor));
8285
8286 if (separator) break;
8287 if (*cursor != '=' && *cursor != ':') return;
8288
8289 separator = true;
8290 cursor++;
8291 }
8292
8293 const uint8_t *value_start = cursor;
8294 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8295
8296 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8297 // If we were unable to parse the encoding value, then we've got an
8298 // issue because we didn't understand the encoding that the user was
8299 // trying to use. In this case we'll keep using the default encoding but
8300 // add an error to the parser to indicate an unsuccessful parse.
8301 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8302 }
8303}
8304
8305typedef enum {
8306 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8307 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8308 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8309} pm_magic_comment_boolean_value_t;
8310
8315static pm_magic_comment_boolean_value_t
8316parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8317 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8318 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8319 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8320 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8321 } else {
8322 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8323 }
8324}
8325
8326static inline bool
8327pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8328 return b == '\'' || b == '"' || b == ':' || b == ';';
8329}
8330
8336static inline const uint8_t *
8337parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8338 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8339 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8340 return cursor;
8341 }
8342 cursor++;
8343 }
8344 return NULL;
8345}
8346
8357static inline bool
8358parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8359 bool result = true;
8360
8361 const uint8_t *start = parser->current.start + 1;
8362 const uint8_t *end = parser->current.end;
8363 if (end - start <= 7) return false;
8364
8365 const uint8_t *cursor;
8366 bool indicator = false;
8367
8368 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8369 start = cursor + 3;
8370
8371 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8372 end = cursor;
8373 indicator = true;
8374 } else {
8375 // If we have a start marker but not an end marker, then we cannot
8376 // have a magic comment.
8377 return false;
8378 }
8379 }
8380
8381 cursor = start;
8382 while (cursor < end) {
8383 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8384
8385 const uint8_t *key_start = cursor;
8386 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8387
8388 const uint8_t *key_end = cursor;
8389 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8390 if (cursor == end) break;
8391
8392 if (*cursor == ':') {
8393 cursor++;
8394 } else {
8395 if (!indicator) return false;
8396 continue;
8397 }
8398
8399 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8400 if (cursor == end) break;
8401
8402 const uint8_t *value_start;
8403 const uint8_t *value_end;
8404
8405 if (*cursor == '"') {
8406 value_start = ++cursor;
8407 for (; cursor < end && *cursor != '"'; cursor++) {
8408 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8409 }
8410 value_end = cursor;
8411 if (*cursor == '"') cursor++;
8412 } else {
8413 value_start = cursor;
8414 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8415 value_end = cursor;
8416 }
8417
8418 if (indicator) {
8419 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8420 } else {
8421 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8422 if (cursor != end) return false;
8423 }
8424
8425 // Here, we need to do some processing on the key to swap out dashes for
8426 // underscores. We only need to do this if there _is_ a dash in the key.
8427 pm_string_t key;
8428 const size_t key_length = (size_t) (key_end - key_start);
8429 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8430
8431 if (dash == NULL) {
8432 pm_string_shared_init(&key, key_start, key_end);
8433 } else {
8434 uint8_t *buffer = xmalloc(key_length);
8435 if (buffer == NULL) break;
8436
8437 memcpy(buffer, key_start, key_length);
8438 buffer[dash - key_start] = '_';
8439
8440 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8441 buffer[dash - key_start] = '_';
8442 }
8443
8444 pm_string_owned_init(&key, buffer, key_length);
8445 }
8446
8447 // Finally, we can start checking the key against the list of known
8448 // magic comment keys, and potentially change state based on that.
8449 const uint8_t *key_source = pm_string_source(&key);
8450 uint32_t value_length = (uint32_t) (value_end - value_start);
8451
8452 // We only want to attempt to compare against encoding comments if it's
8453 // the first line in the file (or the second in the case of a shebang).
8454 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8455 if (
8456 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8457 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8458 ) {
8459 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8460 }
8461 }
8462
8463 if (key_length == 11) {
8464 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8465 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8466 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8467 PM_PARSER_WARN_TOKEN_FORMAT(
8468 parser,
8469 parser->current,
8470 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8471 (int) key_length,
8472 (const char *) key_source,
8473 (int) value_length,
8474 (const char *) value_start
8475 );
8476 break;
8477 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8478 parser->warn_mismatched_indentation = false;
8479 break;
8480 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8481 parser->warn_mismatched_indentation = true;
8482 break;
8483 }
8484 }
8485 } else if (key_length == 21) {
8486 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8487 // We only want to handle frozen string literal comments if it's
8488 // before any semantic tokens have been seen.
8489 if (semantic_token_seen) {
8490 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8491 } else {
8492 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8493 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8494 PM_PARSER_WARN_TOKEN_FORMAT(
8495 parser,
8496 parser->current,
8497 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8498 (int) key_length,
8499 (const char *) key_source,
8500 (int) value_length,
8501 (const char *) value_start
8502 );
8503 break;
8504 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8506 break;
8507 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8509 break;
8510 }
8511 }
8512 }
8513 } else if (key_length == 24) {
8514 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8515 const uint8_t *cursor = parser->current.start;
8516 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8517
8518 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8519 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8520 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8521 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8522 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8523 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8524 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8525 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8526 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8527 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8528 } else {
8529 PM_PARSER_WARN_TOKEN_FORMAT(
8530 parser,
8531 parser->current,
8532 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8533 (int) key_length,
8534 (const char *) key_source,
8535 (int) value_length,
8536 (const char *) value_start
8537 );
8538 }
8539 }
8540 }
8541
8542 // When we're done, we want to free the string in case we had to
8543 // allocate memory for it.
8544 pm_string_free(&key);
8545
8546 // Allocate a new magic comment node to append to the parser's list.
8548 if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8549 magic_comment->key_start = key_start;
8550 magic_comment->value_start = value_start;
8551 magic_comment->key_length = (uint32_t) key_length;
8552 magic_comment->value_length = value_length;
8553 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
8554 }
8555 }
8556
8557 return result;
8558}
8559
8560/******************************************************************************/
8561/* Context manipulations */
8562/******************************************************************************/
8563
8564static bool
8565context_terminator(pm_context_t context, pm_token_t *token) {
8566 switch (context) {
8567 case PM_CONTEXT_MAIN:
8569 case PM_CONTEXT_DEFINED:
8571 case PM_CONTEXT_TERNARY:
8573 return token->type == PM_TOKEN_EOF;
8575 return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8576 case PM_CONTEXT_PREEXE:
8577 case PM_CONTEXT_POSTEXE:
8578 return token->type == PM_TOKEN_BRACE_RIGHT;
8579 case PM_CONTEXT_MODULE:
8580 case PM_CONTEXT_CLASS:
8581 case PM_CONTEXT_SCLASS:
8583 case PM_CONTEXT_DEF:
8585 return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8586 case PM_CONTEXT_WHILE:
8587 case PM_CONTEXT_UNTIL:
8588 case PM_CONTEXT_ELSE:
8589 case PM_CONTEXT_FOR:
8597 return token->type == PM_TOKEN_KEYWORD_END;
8599 return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8601 return token->type == PM_TOKEN_KEYWORD_IN;
8603 return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8604 case PM_CONTEXT_CASE_IN:
8605 return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8606 case PM_CONTEXT_IF:
8607 case PM_CONTEXT_ELSIF:
8608 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8609 case PM_CONTEXT_UNLESS:
8610 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8611 case PM_CONTEXT_EMBEXPR:
8612 return token->type == PM_TOKEN_EMBEXPR_END;
8614 return token->type == PM_TOKEN_BRACE_RIGHT;
8615 case PM_CONTEXT_PARENS:
8616 return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8617 case PM_CONTEXT_BEGIN:
8625 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8633 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8635 return token->type == PM_TOKEN_BRACE_RIGHT;
8637 return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8638 case PM_CONTEXT_NONE:
8639 return false;
8640 }
8641
8642 return false;
8643}
8644
8649static pm_context_t
8650context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8651 pm_context_node_t *context_node = parser->current_context;
8652
8653 while (context_node != NULL) {
8654 if (context_terminator(context_node->context, token)) return context_node->context;
8655 context_node = context_node->prev;
8656 }
8657
8658 return PM_CONTEXT_NONE;
8659}
8660
8661static bool
8662context_push(pm_parser_t *parser, pm_context_t context) {
8663 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8664 if (context_node == NULL) return false;
8665
8666 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8667
8668 if (parser->current_context == NULL) {
8669 parser->current_context = context_node;
8670 } else {
8671 context_node->prev = parser->current_context;
8672 parser->current_context = context_node;
8673 }
8674
8675 return true;
8676}
8677
8678static void
8679context_pop(pm_parser_t *parser) {
8680 pm_context_node_t *prev = parser->current_context->prev;
8681 xfree(parser->current_context);
8682 parser->current_context = prev;
8683}
8684
8685static bool
8686context_p(const pm_parser_t *parser, pm_context_t context) {
8687 pm_context_node_t *context_node = parser->current_context;
8688
8689 while (context_node != NULL) {
8690 if (context_node->context == context) return true;
8691 context_node = context_node->prev;
8692 }
8693
8694 return false;
8695}
8696
8697static bool
8698context_def_p(const pm_parser_t *parser) {
8699 pm_context_node_t *context_node = parser->current_context;
8700
8701 while (context_node != NULL) {
8702 switch (context_node->context) {
8703 case PM_CONTEXT_DEF:
8708 return true;
8709 case PM_CONTEXT_CLASS:
8713 case PM_CONTEXT_MODULE:
8717 case PM_CONTEXT_SCLASS:
8721 return false;
8722 default:
8723 context_node = context_node->prev;
8724 }
8725 }
8726
8727 return false;
8728}
8729
8734static const char *
8735context_human(pm_context_t context) {
8736 switch (context) {
8737 case PM_CONTEXT_NONE:
8738 assert(false && "unreachable");
8739 return "";
8740 case PM_CONTEXT_BEGIN: return "begin statement";
8741 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8742 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8743 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8744 case PM_CONTEXT_CASE_IN: return "'in' clause";
8745 case PM_CONTEXT_CLASS: return "class definition";
8746 case PM_CONTEXT_DEF: return "method definition";
8747 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8748 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8749 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8750 case PM_CONTEXT_ELSE:
8757 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8758 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8759 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8766 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8767 case PM_CONTEXT_FOR: return "for loop";
8768 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8769 case PM_CONTEXT_IF: return "if statement";
8770 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8771 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8772 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8773 case PM_CONTEXT_MAIN: return "top level context";
8774 case PM_CONTEXT_MODULE: return "module definition";
8775 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8776 case PM_CONTEXT_PARENS: return "parentheses";
8777 case PM_CONTEXT_POSTEXE: return "'END' block";
8778 case PM_CONTEXT_PREDICATE: return "predicate";
8779 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8787 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8788 case PM_CONTEXT_SCLASS: return "singleton class definition";
8789 case PM_CONTEXT_TERNARY: return "ternary expression";
8790 case PM_CONTEXT_UNLESS: return "unless statement";
8791 case PM_CONTEXT_UNTIL: return "until statement";
8792 case PM_CONTEXT_WHILE: return "while statement";
8793 }
8794
8795 assert(false && "unreachable");
8796 return "";
8797}
8798
8799/******************************************************************************/
8800/* Specific token lexers */
8801/******************************************************************************/
8802
8803static inline void
8804pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8805 if (invalid != NULL) {
8806 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8807 pm_parser_err(parser, invalid, invalid + 1, diag_id);
8808 }
8809}
8810
8811static size_t
8812pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8813 const uint8_t *invalid = NULL;
8814 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8815 pm_strspn_number_validate(parser, string, length, invalid);
8816 return length;
8817}
8818
8819static size_t
8820pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8821 const uint8_t *invalid = NULL;
8822 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8823 pm_strspn_number_validate(parser, string, length, invalid);
8824 return length;
8825}
8826
8827static size_t
8828pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8829 const uint8_t *invalid = NULL;
8830 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8831 pm_strspn_number_validate(parser, string, length, invalid);
8832 return length;
8833}
8834
8835static size_t
8836pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8837 const uint8_t *invalid = NULL;
8838 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8839 pm_strspn_number_validate(parser, string, length, invalid);
8840 return length;
8841}
8842
8843static pm_token_type_t
8844lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8846
8847 // Here we're going to attempt to parse the optional decimal portion of a
8848 // float. If it's not there, then it's okay and we'll just continue on.
8849 if (peek(parser) == '.') {
8850 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8851 parser->current.end += 2;
8852 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8854 } else {
8855 // If we had a . and then something else, then it's not a float
8856 // suffix on a number it's a method call or something else.
8857 return type;
8858 }
8859 }
8860
8861 // Here we're going to attempt to parse the optional exponent portion of a
8862 // float. If it's not there, it's okay and we'll just continue on.
8863 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8864 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8865 parser->current.end += 2;
8866
8867 if (pm_char_is_decimal_digit(peek(parser))) {
8868 parser->current.end++;
8869 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8870 } else {
8871 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8872 }
8873 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8874 parser->current.end++;
8875 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8876 } else {
8877 return type;
8878 }
8879
8880 *seen_e = true;
8882 }
8883
8884 return type;
8885}
8886
8887static pm_token_type_t
8888lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8890 *seen_e = false;
8891
8892 if (peek_offset(parser, -1) == '0') {
8893 switch (*parser->current.end) {
8894 // 0d1111 is a decimal number
8895 case 'd':
8896 case 'D':
8897 parser->current.end++;
8898 if (pm_char_is_decimal_digit(peek(parser))) {
8899 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8900 } else {
8901 match(parser, '_');
8902 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8903 }
8904
8905 break;
8906
8907 // 0b1111 is a binary number
8908 case 'b':
8909 case 'B':
8910 parser->current.end++;
8911 if (pm_char_is_binary_digit(peek(parser))) {
8912 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8913 } else {
8914 match(parser, '_');
8915 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8916 }
8917
8919 break;
8920
8921 // 0o1111 is an octal number
8922 case 'o':
8923 case 'O':
8924 parser->current.end++;
8925 if (pm_char_is_octal_digit(peek(parser))) {
8926 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8927 } else {
8928 match(parser, '_');
8929 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8930 }
8931
8933 break;
8934
8935 // 01111 is an octal number
8936 case '_':
8937 case '0':
8938 case '1':
8939 case '2':
8940 case '3':
8941 case '4':
8942 case '5':
8943 case '6':
8944 case '7':
8945 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8947 break;
8948
8949 // 0x1111 is a hexadecimal number
8950 case 'x':
8951 case 'X':
8952 parser->current.end++;
8953 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8954 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8955 } else {
8956 match(parser, '_');
8957 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8958 }
8959
8961 break;
8962
8963 // 0.xxx is a float
8964 case '.': {
8965 type = lex_optional_float_suffix(parser, seen_e);
8966 break;
8967 }
8968
8969 // 0exxx is a float
8970 case 'e':
8971 case 'E': {
8972 type = lex_optional_float_suffix(parser, seen_e);
8973 break;
8974 }
8975 }
8976 } else {
8977 // If it didn't start with a 0, then we'll lex as far as we can into a
8978 // decimal number.
8979 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8980
8981 // Afterward, we'll lex as far as we can into an optional float suffix.
8982 type = lex_optional_float_suffix(parser, seen_e);
8983 }
8984
8985 // At this point we have a completed number, but we want to provide the user
8986 // with a good experience if they put an additional .xxx fractional
8987 // component on the end, so we'll check for that here.
8988 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8989 const uint8_t *fraction_start = parser->current.end;
8990 const uint8_t *fraction_end = parser->current.end + 2;
8991 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8992 pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8993 }
8994
8995 return type;
8996}
8997
8998static pm_token_type_t
8999lex_numeric(pm_parser_t *parser) {
9002
9003 if (parser->current.end < parser->end) {
9004 bool seen_e = false;
9005 type = lex_numeric_prefix(parser, &seen_e);
9006
9007 const uint8_t *end = parser->current.end;
9008 pm_token_type_t suffix_type = type;
9009
9010 if (type == PM_TOKEN_INTEGER) {
9011 if (match(parser, 'r')) {
9012 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9013
9014 if (match(parser, 'i')) {
9016 }
9017 } else if (match(parser, 'i')) {
9018 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9019 }
9020 } else {
9021 if (!seen_e && match(parser, 'r')) {
9022 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9023
9024 if (match(parser, 'i')) {
9026 }
9027 } else if (match(parser, 'i')) {
9028 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9029 }
9030 }
9031
9032 const uint8_t b = peek(parser);
9033 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9034 parser->current.end = end;
9035 } else {
9036 type = suffix_type;
9037 }
9038 }
9039
9040 return type;
9041}
9042
9043static pm_token_type_t
9044lex_global_variable(pm_parser_t *parser) {
9045 if (parser->current.end >= parser->end) {
9046 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9048 }
9049
9050 // True if multiple characters are allowed after the declaration of the
9051 // global variable. Not true when it starts with "$-".
9052 bool allow_multiple = true;
9053
9054 switch (*parser->current.end) {
9055 case '~': // $~: match-data
9056 case '*': // $*: argv
9057 case '$': // $$: pid
9058 case '?': // $?: last status
9059 case '!': // $!: error string
9060 case '@': // $@: error position
9061 case '/': // $/: input record separator
9062 case '\\': // $\: output record separator
9063 case ';': // $;: field separator
9064 case ',': // $,: output field separator
9065 case '.': // $.: last read line number
9066 case '=': // $=: ignorecase
9067 case ':': // $:: load path
9068 case '<': // $<: reading filename
9069 case '>': // $>: default output handle
9070 case '\"': // $": already loaded files
9071 parser->current.end++;
9073
9074 case '&': // $&: last match
9075 case '`': // $`: string before last match
9076 case '\'': // $': string after last match
9077 case '+': // $+: string matches last paren.
9078 parser->current.end++;
9079 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9080
9081 case '0': {
9082 parser->current.end++;
9083 size_t width;
9084
9085 if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
9086 do {
9087 parser->current.end += width;
9088 } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9089
9090 // $0 isn't allowed to be followed by anything.
9091 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9092 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9093 }
9094
9096 }
9097
9098 case '1':
9099 case '2':
9100 case '3':
9101 case '4':
9102 case '5':
9103 case '6':
9104 case '7':
9105 case '8':
9106 case '9':
9107 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9108 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9109
9110 case '-':
9111 parser->current.end++;
9112 allow_multiple = false;
9113 /* fallthrough */
9114 default: {
9115 size_t width;
9116
9117 if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
9118 do {
9119 parser->current.end += width;
9120 } while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9121 } else if (pm_char_is_whitespace(peek(parser))) {
9122 // If we get here, then we have a $ followed by whitespace,
9123 // which is not allowed.
9124 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9125 } else {
9126 // If we get here, then we have a $ followed by something that
9127 // isn't recognized as a global variable.
9128 pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9129 const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9130 PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9131 }
9132
9134 }
9135 }
9136}
9137
9150static inline pm_token_type_t
9151lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9152 if (memcmp(current_start, value, vlen) == 0) {
9153 pm_lex_state_t last_state = parser->lex_state;
9154
9155 if (parser->lex_state & PM_LEX_STATE_FNAME) {
9156 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9157 } else {
9158 lex_state_set(parser, state);
9159 if (state == PM_LEX_STATE_BEG) {
9160 parser->command_start = true;
9161 }
9162
9163 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9164 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9165 return modifier_type;
9166 }
9167 }
9168
9169 return type;
9170 }
9171
9172 return PM_TOKEN_EOF;
9173}
9174
9175static pm_token_type_t
9176lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9177 // Lex as far as we can into the current identifier.
9178 size_t width;
9179 const uint8_t *end = parser->end;
9180 const uint8_t *current_start = parser->current.start;
9181 const uint8_t *current_end = parser->current.end;
9182 bool encoding_changed = parser->encoding_changed;
9183
9184 if (encoding_changed) {
9185 while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
9186 current_end += width;
9187 }
9188 } else {
9189 while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
9190 current_end += width;
9191 }
9192 }
9193 parser->current.end = current_end;
9194
9195 // Now cache the length of the identifier so that we can quickly compare it
9196 // against known keywords.
9197 width = (size_t) (current_end - current_start);
9198
9199 if (current_end < end) {
9200 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9201 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9202 // check if we're returning the defined? keyword or just an identifier.
9203 width++;
9204
9205 if (
9206 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9207 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9208 ) {
9209 // If we're in a position where we can accept a : at the end of an
9210 // identifier, then we'll optionally accept it.
9211 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9212 (void) match(parser, ':');
9213 return PM_TOKEN_LABEL;
9214 }
9215
9216 if (parser->lex_state != PM_LEX_STATE_DOT) {
9217 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9219 }
9220 }
9221
9222 return PM_TOKEN_METHOD_NAME;
9223 }
9224
9225 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9226 // If we're in a position where we can accept a = at the end of an
9227 // identifier, then we'll optionally accept it.
9228 return PM_TOKEN_IDENTIFIER;
9229 }
9230
9231 if (
9232 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9233 peek(parser) == ':' && peek_offset(parser, 1) != ':'
9234 ) {
9235 // If we're in a position where we can accept a : at the end of an
9236 // identifier, then we'll optionally accept it.
9237 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9238 (void) match(parser, ':');
9239 return PM_TOKEN_LABEL;
9240 }
9241 }
9242
9243 if (parser->lex_state != PM_LEX_STATE_DOT) {
9245 switch (width) {
9246 case 2:
9247 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9248 if (pm_do_loop_stack_p(parser)) {
9250 }
9251 return PM_TOKEN_KEYWORD_DO;
9252 }
9253
9254 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9255 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9256 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9257 break;
9258 case 3:
9259 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9260 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9261 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9262 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9263 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9264 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9265 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9266 break;
9267 case 4:
9268 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9269 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9270 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9271 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9272 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9273 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9274 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276 break;
9277 case 5:
9278 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9279 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9280 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9284 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9288 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9289 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9290 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9291 break;
9292 case 6:
9293 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9294 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9295 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9296 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9298 break;
9299 case 8:
9300 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9301 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9302 break;
9303 case 12:
9304 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9305 break;
9306 }
9307 }
9308
9309 if (encoding_changed) {
9310 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9311 }
9312 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9313}
9314
9319static bool
9320current_token_starts_line(pm_parser_t *parser) {
9321 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9322}
9323
9338static pm_token_type_t
9339lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9340 // If there is no content following this #, then we're at the end of
9341 // the string and we can safely return string content.
9342 if (pound + 1 >= parser->end) {
9343 parser->current.end = pound + 1;
9345 }
9346
9347 // Now we'll check against the character that follows the #. If it constitutes
9348 // valid interplation, we'll handle that, otherwise we'll return
9349 // PM_TOKEN_NOT_PROVIDED.
9350 switch (pound[1]) {
9351 case '@': {
9352 // In this case we may have hit an embedded instance or class variable.
9353 if (pound + 2 >= parser->end) {
9354 parser->current.end = pound + 1;
9356 }
9357
9358 // If we're looking at a @ and there's another @, then we'll skip past the
9359 // second @.
9360 const uint8_t *variable = pound + 2;
9361 if (*variable == '@' && pound + 3 < parser->end) variable++;
9362
9363 if (char_is_identifier_start(parser, variable)) {
9364 // At this point we're sure that we've either hit an embedded instance
9365 // or class variable. In this case we'll first need to check if we've
9366 // already consumed content.
9367 if (pound > parser->current.start) {
9368 parser->current.end = pound;
9370 }
9371
9372 // Otherwise we need to return the embedded variable token
9373 // and then switch to the embedded variable lex mode.
9374 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9375 parser->current.end = pound + 1;
9376 return PM_TOKEN_EMBVAR;
9377 }
9378
9379 // If we didn't get a valid interpolation, then this is just regular
9380 // string content. This is like if we get "#@-". In this case the caller
9381 // should keep lexing.
9382 parser->current.end = pound + 1;
9383 return PM_TOKEN_NOT_PROVIDED;
9384 }
9385 case '$':
9386 // In this case we may have hit an embedded global variable. If there's
9387 // not enough room, then we'll just return string content.
9388 if (pound + 2 >= parser->end) {
9389 parser->current.end = pound + 1;
9391 }
9392
9393 // This is the character that we're going to check to see if it is the
9394 // start of an identifier that would indicate that this is a global
9395 // variable.
9396 const uint8_t *check = pound + 2;
9397
9398 if (pound[2] == '-') {
9399 if (pound + 3 >= parser->end) {
9400 parser->current.end = pound + 2;
9402 }
9403
9404 check++;
9405 }
9406
9407 // If the character that we're going to check is the start of an
9408 // identifier, or we don't have a - and the character is a decimal number
9409 // or a global name punctuation character, then we've hit an embedded
9410 // global variable.
9411 if (
9412 char_is_identifier_start(parser, check) ||
9413 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9414 ) {
9415 // In this case we've hit an embedded global variable. First check to
9416 // see if we've already consumed content. If we have, then we need to
9417 // return that content as string content first.
9418 if (pound > parser->current.start) {
9419 parser->current.end = pound;
9421 }
9422
9423 // Otherwise, we need to return the embedded variable token and switch
9424 // to the embedded variable lex mode.
9425 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9426 parser->current.end = pound + 1;
9427 return PM_TOKEN_EMBVAR;
9428 }
9429
9430 // In this case we've hit a #$ that does not indicate a global variable.
9431 // In this case we'll continue lexing past it.
9432 parser->current.end = pound + 1;
9433 return PM_TOKEN_NOT_PROVIDED;
9434 case '{':
9435 // In this case it's the start of an embedded expression. If we have
9436 // already consumed content, then we need to return that content as string
9437 // content first.
9438 if (pound > parser->current.start) {
9439 parser->current.end = pound;
9441 }
9442
9443 parser->enclosure_nesting++;
9444
9445 // Otherwise we'll skip past the #{ and begin lexing the embedded
9446 // expression.
9447 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9448 parser->current.end = pound + 2;
9449 parser->command_start = true;
9450 pm_do_loop_stack_push(parser, false);
9452 default:
9453 // In this case we've hit a # that doesn't constitute interpolation. We'll
9454 // mark that by returning the not provided token type. This tells the
9455 // consumer to keep lexing forward.
9456 parser->current.end = pound + 1;
9457 return PM_TOKEN_NOT_PROVIDED;
9458 }
9459}
9460
9461static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9462static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9463static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9464static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9465static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9466
9470static const bool ascii_printable_chars[] = {
9471 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9473 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9474 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9475 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9476 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9477 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9478 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9479};
9480
9481static inline bool
9482char_is_ascii_printable(const uint8_t b) {
9483 return (b < 0x80) && ascii_printable_chars[b];
9484}
9485
9490static inline uint8_t
9491escape_hexadecimal_digit(const uint8_t value) {
9492 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9493}
9494
9500static inline uint32_t
9501escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9502 uint32_t value = 0;
9503 for (size_t index = 0; index < length; index++) {
9504 if (index != 0) value <<= 4;
9505 value |= escape_hexadecimal_digit(string[index]);
9506 }
9507
9508 // Here we're going to verify that the value is actually a valid Unicode
9509 // codepoint and not a surrogate pair.
9510 if (value >= 0xD800 && value <= 0xDFFF) {
9511 pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9512 return 0xFFFD;
9513 }
9514
9515 return value;
9516}
9517
9521static inline uint8_t
9522escape_byte(uint8_t value, const uint8_t flags) {
9523 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9524 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9525 return value;
9526}
9527
9531static inline void
9532escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9533 // \u escape sequences in string-like structures implicitly change the
9534 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9535 // literal.
9536 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9537 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9538 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9539 }
9540
9542 }
9543
9544 if (value <= 0x7F) { // 0xxxxxxx
9545 pm_buffer_append_byte(buffer, (uint8_t) value);
9546 } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
9547 pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
9548 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9549 } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
9550 pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
9551 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9552 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9553 } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
9554 pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
9555 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
9556 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9557 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9558 } else {
9559 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9560 pm_buffer_append_byte(buffer, 0xEF);
9561 pm_buffer_append_byte(buffer, 0xBF);
9562 pm_buffer_append_byte(buffer, 0xBD);
9563 }
9564}
9565
9570static inline void
9571escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9572 if (byte >= 0x80) {
9573 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9574 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9575 }
9576
9577 parser->explicit_encoding = parser->encoding;
9578 }
9579
9580 pm_buffer_append_byte(buffer, byte);
9581}
9582
9586static inline void
9587escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9588 size_t width;
9589 if (parser->encoding_changed) {
9590 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9591 } else {
9592 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9593 }
9594
9595 // TODO: If the character is invalid in the given encoding, then we'll just
9596 // push one byte into the buffer. This should actually be an error.
9597 width = (width == 0) ? 1 : width;
9598
9599 for (size_t index = 0; index < width; index++) {
9600 escape_write_byte_encoded(parser, buffer, *parser->current.end);
9601 parser->current.end++;
9602 }
9603}
9604
9620static inline void
9621escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9622 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9623 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9624 }
9625
9626 escape_write_byte_encoded(parser, buffer, byte);
9627}
9628
9634static void
9635escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9636#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9637
9638 PM_PARSER_WARN_TOKEN_FORMAT(
9639 parser,
9640 parser->current,
9641 PM_WARN_INVALID_CHARACTER,
9642 FLAG(flags),
9643 FLAG(flag),
9644 type
9645 );
9646
9647#undef FLAG
9648}
9649
9653static void
9654escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9655 switch (peek(parser)) {
9656 case '\\': {
9657 parser->current.end++;
9658 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9659 return;
9660 }
9661 case '\'': {
9662 parser->current.end++;
9663 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9664 return;
9665 }
9666 case 'a': {
9667 parser->current.end++;
9668 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9669 return;
9670 }
9671 case 'b': {
9672 parser->current.end++;
9673 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9674 return;
9675 }
9676 case 'e': {
9677 parser->current.end++;
9678 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9679 return;
9680 }
9681 case 'f': {
9682 parser->current.end++;
9683 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9684 return;
9685 }
9686 case 'n': {
9687 parser->current.end++;
9688 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9689 return;
9690 }
9691 case 'r': {
9692 parser->current.end++;
9693 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9694 return;
9695 }
9696 case 's': {
9697 parser->current.end++;
9698 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9699 return;
9700 }
9701 case 't': {
9702 parser->current.end++;
9703 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9704 return;
9705 }
9706 case 'v': {
9707 parser->current.end++;
9708 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9709 return;
9710 }
9711 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9712 uint8_t value = (uint8_t) (*parser->current.end - '0');
9713 parser->current.end++;
9714
9715 if (pm_char_is_octal_digit(peek(parser))) {
9716 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9717 parser->current.end++;
9718
9719 if (pm_char_is_octal_digit(peek(parser))) {
9720 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9721 parser->current.end++;
9722 }
9723 }
9724
9725 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9726 return;
9727 }
9728 case 'x': {
9729 const uint8_t *start = parser->current.end - 1;
9730
9731 parser->current.end++;
9732 uint8_t byte = peek(parser);
9733
9734 if (pm_char_is_hexadecimal_digit(byte)) {
9735 uint8_t value = escape_hexadecimal_digit(byte);
9736 parser->current.end++;
9737
9738 byte = peek(parser);
9739 if (pm_char_is_hexadecimal_digit(byte)) {
9740 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9741 parser->current.end++;
9742 }
9743
9744 value = escape_byte(value, flags);
9745 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9746 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9747 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9748 } else {
9749 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9750 }
9751 }
9752
9753 escape_write_byte_encoded(parser, buffer, value);
9754 } else {
9755 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9756 }
9757
9758 return;
9759 }
9760 case 'u': {
9761 const uint8_t *start = parser->current.end - 1;
9762 parser->current.end++;
9763
9764 if (parser->current.end == parser->end) {
9765 const uint8_t *start = parser->current.end - 2;
9766 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9767 } else if (peek(parser) == '{') {
9768 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9769 parser->current.end++;
9770
9771 size_t whitespace;
9772 while (true) {
9773 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9774 parser->current.end += whitespace;
9775 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9776 // This is super hacky, but it gets us nicer error
9777 // messages because we can still pass it off to the
9778 // regular expression engine even if we hit an
9779 // unterminated regular expression.
9780 parser->current.end += 2;
9781 } else {
9782 break;
9783 }
9784 }
9785
9786 const uint8_t *extra_codepoints_start = NULL;
9787 int codepoints_count = 0;
9788
9789 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9790 const uint8_t *unicode_start = parser->current.end;
9791 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9792
9793 if (hexadecimal_length > 6) {
9794 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9795 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9796 } else if (hexadecimal_length == 0) {
9797 // there are not hexadecimal characters
9798
9799 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9800 // If this is a regular expression, we are going to
9801 // let the regular expression engine handle this
9802 // error instead of us.
9803 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9804 } else {
9805 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9806 pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9807 }
9808
9809 return;
9810 }
9811
9812 parser->current.end += hexadecimal_length;
9813 codepoints_count++;
9814 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9815 extra_codepoints_start = unicode_start;
9816 }
9817
9818 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9819 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9820
9821 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9822 }
9823
9824 // ?\u{nnnn} character literal should contain only one codepoint
9825 // and cannot be like ?\u{nnnn mmmm}.
9826 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9827 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9828 }
9829
9830 if (parser->current.end == parser->end) {
9831 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9832 } else if (peek(parser) == '}') {
9833 parser->current.end++;
9834 } else {
9835 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9836 // If this is a regular expression, we are going to let
9837 // the regular expression engine handle this error
9838 // instead of us.
9839 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9840 } else {
9841 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9842 }
9843 }
9844
9845 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9846 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9847 }
9848 } else {
9849 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9850
9851 if (length == 0) {
9852 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9853 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9854 } else {
9855 const uint8_t *start = parser->current.end - 2;
9856 PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9857 }
9858 } else if (length == 4) {
9859 uint32_t value = escape_unicode(parser, parser->current.end, 4);
9860
9861 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9862 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9863 }
9864
9865 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9866 parser->current.end += 4;
9867 } else {
9868 parser->current.end += length;
9869
9870 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9871 // If this is a regular expression, we are going to let
9872 // the regular expression engine handle this error
9873 // instead of us.
9874 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9875 } else {
9876 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9877 }
9878 }
9879 }
9880
9881 return;
9882 }
9883 case 'c': {
9884 parser->current.end++;
9885 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9886 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9887 }
9888
9889 if (parser->current.end == parser->end) {
9890 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9891 return;
9892 }
9893
9894 uint8_t peeked = peek(parser);
9895 switch (peeked) {
9896 case '?': {
9897 parser->current.end++;
9898 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9899 return;
9900 }
9901 case '\\':
9902 parser->current.end++;
9903
9904 if (match(parser, 'u') || match(parser, 'U')) {
9905 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9906 return;
9907 }
9908
9909 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9910 return;
9911 case ' ':
9912 parser->current.end++;
9913 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9914 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9915 return;
9916 case '\t':
9917 parser->current.end++;
9918 escape_read_warn(parser, flags, 0, "\\t");
9919 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9920 return;
9921 default: {
9922 if (!char_is_ascii_printable(peeked)) {
9923 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9924 return;
9925 }
9926
9927 parser->current.end++;
9928 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9929 return;
9930 }
9931 }
9932 }
9933 case 'C': {
9934 parser->current.end++;
9935 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9936 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9937 }
9938
9939 if (peek(parser) != '-') {
9940 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9941 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9942 return;
9943 }
9944
9945 parser->current.end++;
9946 if (parser->current.end == parser->end) {
9947 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9948 return;
9949 }
9950
9951 uint8_t peeked = peek(parser);
9952 switch (peeked) {
9953 case '?': {
9954 parser->current.end++;
9955 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9956 return;
9957 }
9958 case '\\':
9959 parser->current.end++;
9960
9961 if (match(parser, 'u') || match(parser, 'U')) {
9962 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9963 return;
9964 }
9965
9966 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9967 return;
9968 case ' ':
9969 parser->current.end++;
9970 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9971 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9972 return;
9973 case '\t':
9974 parser->current.end++;
9975 escape_read_warn(parser, flags, 0, "\\t");
9976 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9977 return;
9978 default: {
9979 if (!char_is_ascii_printable(peeked)) {
9980 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9981 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9982 return;
9983 }
9984
9985 parser->current.end++;
9986 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9987 return;
9988 }
9989 }
9990 }
9991 case 'M': {
9992 parser->current.end++;
9993 if (flags & PM_ESCAPE_FLAG_META) {
9994 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9995 }
9996
9997 if (peek(parser) != '-') {
9998 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9999 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10000 return;
10001 }
10002
10003 parser->current.end++;
10004 if (parser->current.end == parser->end) {
10005 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10006 return;
10007 }
10008
10009 uint8_t peeked = peek(parser);
10010 switch (peeked) {
10011 case '\\':
10012 parser->current.end++;
10013
10014 if (match(parser, 'u') || match(parser, 'U')) {
10015 pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10016 return;
10017 }
10018
10019 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10020 return;
10021 case ' ':
10022 parser->current.end++;
10023 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10024 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10025 return;
10026 case '\t':
10027 parser->current.end++;
10028 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10029 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10030 return;
10031 default:
10032 if (!char_is_ascii_printable(peeked)) {
10033 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10034 pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10035 return;
10036 }
10037
10038 parser->current.end++;
10039 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10040 return;
10041 }
10042 }
10043 case '\r': {
10044 if (peek_offset(parser, 1) == '\n') {
10045 parser->current.end += 2;
10046 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10047 return;
10048 }
10049 }
10050 /* fallthrough */
10051 default: {
10052 if (parser->current.end < parser->end) {
10053 escape_write_escape_encoded(parser, buffer);
10054 } else {
10055 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10056 }
10057 return;
10058 }
10059 }
10060}
10061
10087static pm_token_type_t
10088lex_question_mark(pm_parser_t *parser) {
10089 if (lex_state_end_p(parser)) {
10090 lex_state_set(parser, PM_LEX_STATE_BEG);
10092 }
10093
10094 if (parser->current.end >= parser->end) {
10095 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10096 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10098 }
10099
10100 if (pm_char_is_whitespace(*parser->current.end)) {
10101 lex_state_set(parser, PM_LEX_STATE_BEG);
10103 }
10104
10105 lex_state_set(parser, PM_LEX_STATE_BEG);
10106
10107 if (match(parser, '\\')) {
10108 lex_state_set(parser, PM_LEX_STATE_END);
10109
10110 pm_buffer_t buffer;
10111 pm_buffer_init_capacity(&buffer, 3);
10112
10113 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10114 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10115
10117 } else {
10118 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10119
10120 // Ternary operators can have a ? immediately followed by an identifier
10121 // which starts with an underscore. We check for this case here.
10122 if (
10123 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10124 (
10125 (parser->current.end + encoding_width >= parser->end) ||
10126 !char_is_identifier(parser, parser->current.end + encoding_width)
10127 )
10128 ) {
10129 lex_state_set(parser, PM_LEX_STATE_END);
10130 parser->current.end += encoding_width;
10131 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10133 }
10134 }
10135
10137}
10138
10143static pm_token_type_t
10144lex_at_variable(pm_parser_t *parser) {
10146 size_t width;
10147
10148 if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
10149 parser->current.end += width;
10150
10151 while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
10152 parser->current.end += width;
10153 }
10154 } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
10155 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10156 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10157 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10158 }
10159
10160 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10161 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10162 } else {
10163 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10164 pm_parser_err_token(parser, &parser->current, diag_id);
10165 }
10166
10167 // If we're lexing an embedded variable, then we need to pop back into the
10168 // parent lex context.
10169 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10170 lex_mode_pop(parser);
10171 }
10172
10173 return type;
10174}
10175
10179static inline void
10180parser_lex_callback(pm_parser_t *parser) {
10181 if (parser->lex_callback) {
10182 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10183 }
10184}
10185
10189static inline pm_comment_t *
10190parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10191 pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10192 if (comment == NULL) return NULL;
10193
10194 *comment = (pm_comment_t) {
10195 .type = type,
10196 .location = { parser->current.start, parser->current.end }
10197 };
10198
10199 return comment;
10200}
10201
10207static pm_token_type_t
10208lex_embdoc(pm_parser_t *parser) {
10209 // First, lex out the EMBDOC_BEGIN token.
10210 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10211
10212 if (newline == NULL) {
10213 parser->current.end = parser->end;
10214 } else {
10215 pm_newline_list_append(&parser->newline_list, newline);
10216 parser->current.end = newline + 1;
10217 }
10218
10219 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10220 parser_lex_callback(parser);
10221
10222 // Now, create a comment that is going to be attached to the parser.
10223 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10224 if (comment == NULL) return PM_TOKEN_EOF;
10225
10226 // Now, loop until we find the end of the embedded documentation or the end
10227 // of the file.
10228 while (parser->current.end + 4 <= parser->end) {
10229 parser->current.start = parser->current.end;
10230
10231 // If we've hit the end of the embedded documentation then we'll return
10232 // that token here.
10233 if (
10234 (memcmp(parser->current.end, "=end", 4) == 0) &&
10235 (
10236 (parser->current.end + 4 == parser->end) || // end of file
10237 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10238 (parser->current.end[4] == '\0') || // NUL or end of script
10239 (parser->current.end[4] == '\004') || // ^D
10240 (parser->current.end[4] == '\032') // ^Z
10241 )
10242 ) {
10243 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10244
10245 if (newline == NULL) {
10246 parser->current.end = parser->end;
10247 } else {
10248 pm_newline_list_append(&parser->newline_list, newline);
10249 parser->current.end = newline + 1;
10250 }
10251
10252 parser->current.type = PM_TOKEN_EMBDOC_END;
10253 parser_lex_callback(parser);
10254
10255 comment->location.end = parser->current.end;
10256 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10257
10258 return PM_TOKEN_EMBDOC_END;
10259 }
10260
10261 // Otherwise, we'll parse until the end of the line and return a line of
10262 // embedded documentation.
10263 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10264
10265 if (newline == NULL) {
10266 parser->current.end = parser->end;
10267 } else {
10268 pm_newline_list_append(&parser->newline_list, newline);
10269 parser->current.end = newline + 1;
10270 }
10271
10272 parser->current.type = PM_TOKEN_EMBDOC_LINE;
10273 parser_lex_callback(parser);
10274 }
10275
10276 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10277
10278 comment->location.end = parser->current.end;
10279 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10280
10281 return PM_TOKEN_EOF;
10282}
10283
10289static inline void
10290parser_lex_ignored_newline(pm_parser_t *parser) {
10291 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10292 parser_lex_callback(parser);
10293}
10294
10304static inline void
10305parser_flush_heredoc_end(pm_parser_t *parser) {
10306 assert(parser->heredoc_end <= parser->end);
10307 parser->next_start = parser->heredoc_end;
10308 parser->heredoc_end = NULL;
10309}
10310
10314static bool
10315parser_end_of_line_p(const pm_parser_t *parser) {
10316 const uint8_t *cursor = parser->current.end;
10317
10318 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10319 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10320 }
10321
10322 return true;
10323}
10324
10343typedef struct {
10349
10354 const uint8_t *cursor;
10356
10376
10380static inline void
10381pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10382 pm_buffer_append_byte(&token_buffer->buffer, byte);
10383}
10384
10385static inline void
10386pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10387 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10388}
10389
10393static inline size_t
10394parser_char_width(const pm_parser_t *parser) {
10395 size_t width;
10396 if (parser->encoding_changed) {
10397 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10398 } else {
10399 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10400 }
10401
10402 // TODO: If the character is invalid in the given encoding, then we'll just
10403 // push one byte into the buffer. This should actually be an error.
10404 return (width == 0 ? 1 : width);
10405}
10406
10410static void
10411pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10412 size_t width = parser_char_width(parser);
10413 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10414 parser->current.end += width;
10415}
10416
10417static void
10418pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10419 size_t width = parser_char_width(parser);
10420 pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10421 pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10422 parser->current.end += width;
10423}
10424
10425static bool
10426pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10427 for (size_t index = 0; index < length; index++) {
10428 if (value[index] & 0x80) return false;
10429 }
10430
10431 return true;
10432}
10433
10440static inline void
10441pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10442 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10443}
10444
10445static inline void
10446pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10447 pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10448 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10449 pm_buffer_free(&token_buffer->regexp_buffer);
10450}
10451
10461static void
10462pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10463 if (token_buffer->cursor == NULL) {
10464 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10465 } else {
10466 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10467 pm_token_buffer_copy(parser, token_buffer);
10468 }
10469}
10470
10471static void
10472pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10473 if (token_buffer->base.cursor == NULL) {
10474 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10475 parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10476 } else {
10477 pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10478 pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10479 pm_regexp_token_buffer_copy(parser, token_buffer);
10480 }
10481}
10482
10483#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10484
10493static void
10494pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10495 const uint8_t *start;
10496 if (token_buffer->cursor == NULL) {
10497 pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10498 start = parser->current.start;
10499 } else {
10500 start = token_buffer->cursor;
10501 }
10502
10503 const uint8_t *end = parser->current.end - 1;
10504 assert(end >= start);
10505 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10506
10507 token_buffer->cursor = end;
10508}
10509
10510static void
10511pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10512 const uint8_t *start;
10513 if (token_buffer->base.cursor == NULL) {
10514 pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10515 pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10516 start = parser->current.start;
10517 } else {
10518 start = token_buffer->base.cursor;
10519 }
10520
10521 const uint8_t *end = parser->current.end - 1;
10522 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10523 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10524
10525 token_buffer->base.cursor = end;
10526}
10527
10528#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10529
10534static inline size_t
10535pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10536 size_t whitespace = 0;
10537
10538 switch (indent) {
10539 case PM_HEREDOC_INDENT_NONE:
10540 // Do nothing, we can't match a terminator with
10541 // indentation and there's no need to calculate common
10542 // whitespace.
10543 break;
10544 case PM_HEREDOC_INDENT_DASH:
10545 // Skip past inline whitespace.
10546 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10547 break;
10548 case PM_HEREDOC_INDENT_TILDE:
10549 // Skip past inline whitespace and calculate common
10550 // whitespace.
10551 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10552 if (**cursor == '\t') {
10553 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10554 } else {
10555 whitespace++;
10556 }
10557 (*cursor)++;
10558 }
10559
10560 break;
10561 }
10562
10563 return whitespace;
10564}
10565
10570static uint8_t
10571pm_lex_percent_delimiter(pm_parser_t *parser) {
10572 size_t eol_length = match_eol(parser);
10573
10574 if (eol_length) {
10575 if (parser->heredoc_end) {
10576 // If we have already lexed a heredoc, then the newline has already
10577 // been added to the list. In this case we want to just flush the
10578 // heredoc end.
10579 parser_flush_heredoc_end(parser);
10580 } else {
10581 // Otherwise, we'll add the newline to the list of newlines.
10582 pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10583 }
10584
10585 uint8_t delimiter = *parser->current.end;
10586
10587 // If our delimiter is \r\n, we want to treat it as if it's \n.
10588 // For example, %\r\nfoo\r\n should be "foo"
10589 if (eol_length == 2) {
10590 delimiter = *(parser->current.end + 1);
10591 }
10592
10593 parser->current.end += eol_length;
10594 return delimiter;
10595 }
10596
10597 return *parser->current.end++;
10598}
10599
10604#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10605
10612static void
10613parser_lex(pm_parser_t *parser) {
10614 assert(parser->current.end <= parser->end);
10615 parser->previous = parser->current;
10616
10617 // This value mirrors cmd_state from CRuby.
10618 bool previous_command_start = parser->command_start;
10619 parser->command_start = false;
10620
10621 // This is used to communicate to the newline lexing function that we've
10622 // already seen a comment.
10623 bool lexed_comment = false;
10624
10625 // Here we cache the current value of the semantic token seen flag. This is
10626 // used to reset it in case we find a token that shouldn't flip this flag.
10627 unsigned int semantic_token_seen = parser->semantic_token_seen;
10628 parser->semantic_token_seen = true;
10629
10630 switch (parser->lex_modes.current->mode) {
10631 case PM_LEX_DEFAULT:
10632 case PM_LEX_EMBEXPR:
10633 case PM_LEX_EMBVAR:
10634
10635 // We have a specific named label here because we are going to jump back to
10636 // this location in the event that we have lexed a token that should not be
10637 // returned to the parser. This includes comments, ignored newlines, and
10638 // invalid tokens of some form.
10639 lex_next_token: {
10640 // If we have the special next_start pointer set, then we're going to jump
10641 // to that location and start lexing from there.
10642 if (parser->next_start != NULL) {
10643 parser->current.end = parser->next_start;
10644 parser->next_start = NULL;
10645 }
10646
10647 // This value mirrors space_seen from CRuby. It tracks whether or not
10648 // space has been eaten before the start of the next token.
10649 bool space_seen = false;
10650
10651 // First, we're going to skip past any whitespace at the front of the next
10652 // token.
10653 bool chomping = true;
10654 while (parser->current.end < parser->end && chomping) {
10655 switch (*parser->current.end) {
10656 case ' ':
10657 case '\t':
10658 case '\f':
10659 case '\v':
10660 parser->current.end++;
10661 space_seen = true;
10662 break;
10663 case '\r':
10664 if (match_eol_offset(parser, 1)) {
10665 chomping = false;
10666 } else {
10667 pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10668 parser->current.end++;
10669 space_seen = true;
10670 }
10671 break;
10672 case '\\': {
10673 size_t eol_length = match_eol_offset(parser, 1);
10674 if (eol_length) {
10675 if (parser->heredoc_end) {
10676 parser->current.end = parser->heredoc_end;
10677 parser->heredoc_end = NULL;
10678 } else {
10679 parser->current.end += eol_length + 1;
10680 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10681 space_seen = true;
10682 }
10683 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10684 parser->current.end += 2;
10685 } else {
10686 chomping = false;
10687 }
10688
10689 break;
10690 }
10691 default:
10692 chomping = false;
10693 break;
10694 }
10695 }
10696
10697 // Next, we'll set to start of this token to be the current end.
10698 parser->current.start = parser->current.end;
10699
10700 // We'll check if we're at the end of the file. If we are, then we
10701 // need to return the EOF token.
10702 if (parser->current.end >= parser->end) {
10703 // If we hit EOF, but the EOF came immediately after a newline,
10704 // set the start of the token to the newline. This way any EOF
10705 // errors will be reported as happening on that line rather than
10706 // a line after. For example "foo(\n" should report an error
10707 // on line 1 even though EOF technically occurs on line 2.
10708 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10709 parser->current.start -= 1;
10710 }
10711 LEX(PM_TOKEN_EOF);
10712 }
10713
10714 // Finally, we'll check the current character to determine the next
10715 // token.
10716 switch (*parser->current.end++) {
10717 case '\0': // NUL or end of script
10718 case '\004': // ^D
10719 case '\032': // ^Z
10720 parser->current.end--;
10721 LEX(PM_TOKEN_EOF);
10722
10723 case '#': { // comments
10724 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10725 parser->current.end = ending == NULL ? parser->end : ending;
10726
10727 // If we found a comment while lexing, then we're going to
10728 // add it to the list of comments in the file and keep
10729 // lexing.
10730 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10731 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10732
10733 if (ending) parser->current.end++;
10734 parser->current.type = PM_TOKEN_COMMENT;
10735 parser_lex_callback(parser);
10736
10737 // Here, parse the comment to see if it's a magic comment
10738 // and potentially change state on the parser.
10739 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10740 ptrdiff_t length = parser->current.end - parser->current.start;
10741
10742 // If we didn't find a magic comment within the first
10743 // pass and we're at the start of the file, then we need
10744 // to do another pass to potentially find other patterns
10745 // for encoding comments.
10746 if (length >= 10 && !parser->encoding_locked) {
10747 parser_lex_magic_comment_encoding(parser);
10748 }
10749 }
10750
10751 lexed_comment = true;
10752 }
10753 /* fallthrough */
10754 case '\r':
10755 case '\n': {
10756 parser->semantic_token_seen = semantic_token_seen & 0x1;
10757 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10758
10759 if (eol_length) {
10760 // The only way you can have carriage returns in this
10761 // particular loop is if you have a carriage return
10762 // followed by a newline. In that case we'll just skip
10763 // over the carriage return and continue lexing, in
10764 // order to make it so that the newline token
10765 // encapsulates both the carriage return and the
10766 // newline. Note that we need to check that we haven't
10767 // already lexed a comment here because that falls
10768 // through into here as well.
10769 if (!lexed_comment) {
10770 parser->current.end += eol_length - 1; // skip CR
10771 }
10772
10773 if (parser->heredoc_end == NULL) {
10774 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10775 }
10776 }
10777
10778 if (parser->heredoc_end) {
10779 parser_flush_heredoc_end(parser);
10780 }
10781
10782 // If this is an ignored newline, then we can continue lexing after
10783 // calling the callback with the ignored newline token.
10784 switch (lex_state_ignored_p(parser)) {
10785 case PM_IGNORED_NEWLINE_NONE:
10786 break;
10787 case PM_IGNORED_NEWLINE_PATTERN:
10788 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10789 if (!lexed_comment) parser_lex_ignored_newline(parser);
10790 lex_state_set(parser, PM_LEX_STATE_BEG);
10791 parser->command_start = true;
10792 parser->current.type = PM_TOKEN_NEWLINE;
10793 return;
10794 }
10795 /* fallthrough */
10796 case PM_IGNORED_NEWLINE_ALL:
10797 if (!lexed_comment) parser_lex_ignored_newline(parser);
10798 lexed_comment = false;
10799 goto lex_next_token;
10800 }
10801
10802 // Here we need to look ahead and see if there is a call operator
10803 // (either . or &.) that starts the next line. If there is, then this
10804 // is going to become an ignored newline and we're going to instead
10805 // return the call operator.
10806 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10807 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10808
10809 if (next_content < parser->end) {
10810 // If we hit a comment after a newline, then we're going to check
10811 // if it's ignored or if it's followed by a method call ('.').
10812 // If it is, then we're going to call the
10813 // callback with an ignored newline and then continue lexing.
10814 // Otherwise we'll return a regular newline.
10815 if (next_content[0] == '#') {
10816 // Here we look for a "." or "&." following a "\n".
10817 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10818
10819 while (following && (following + 1 < parser->end)) {
10820 following++;
10821 following += pm_strspn_inline_whitespace(following, parser->end - following);
10822
10823 // If this is not followed by a comment, then we can break out
10824 // of this loop.
10825 if (peek_at(parser, following) != '#') break;
10826
10827 // If there is a comment, then we need to find the end of the
10828 // comment and continue searching from there.
10829 following = next_newline(following, parser->end - following);
10830 }
10831
10832 // If the lex state was ignored, or we hit a '.' or a '&.',
10833 // we will lex the ignored newline
10834 if (
10835 lex_state_ignored_p(parser) ||
10836 (following && (
10837 (peek_at(parser, following) == '.') ||
10838 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10839 ))
10840 ) {
10841 if (!lexed_comment) parser_lex_ignored_newline(parser);
10842 lexed_comment = false;
10843 goto lex_next_token;
10844 }
10845 }
10846
10847 // If we hit a . after a newline, then we're in a call chain and
10848 // we need to return the call operator.
10849 if (next_content[0] == '.') {
10850 // To match ripper, we need to emit an ignored newline even though
10851 // it's a real newline in the case that we have a beginless range
10852 // on a subsequent line.
10853 if (peek_at(parser, next_content + 1) == '.') {
10854 if (!lexed_comment) parser_lex_ignored_newline(parser);
10855 lex_state_set(parser, PM_LEX_STATE_BEG);
10856 parser->command_start = true;
10857 parser->current.type = PM_TOKEN_NEWLINE;
10858 return;
10859 }
10860
10861 if (!lexed_comment) parser_lex_ignored_newline(parser);
10862 lex_state_set(parser, PM_LEX_STATE_DOT);
10863 parser->current.start = next_content;
10864 parser->current.end = next_content + 1;
10865 parser->next_start = NULL;
10866 LEX(PM_TOKEN_DOT);
10867 }
10868
10869 // If we hit a &. after a newline, then we're in a call chain and
10870 // we need to return the call operator.
10871 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10872 if (!lexed_comment) parser_lex_ignored_newline(parser);
10873 lex_state_set(parser, PM_LEX_STATE_DOT);
10874 parser->current.start = next_content;
10875 parser->current.end = next_content + 2;
10876 parser->next_start = NULL;
10878 }
10879 }
10880
10881 // At this point we know this is a regular newline, and we can set the
10882 // necessary state and return the token.
10883 lex_state_set(parser, PM_LEX_STATE_BEG);
10884 parser->command_start = true;
10885 parser->current.type = PM_TOKEN_NEWLINE;
10886 if (!lexed_comment) parser_lex_callback(parser);
10887 return;
10888 }
10889
10890 // ,
10891 case ',':
10892 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10893 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10894 }
10895
10896 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10897 LEX(PM_TOKEN_COMMA);
10898
10899 // (
10900 case '(': {
10902
10903 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10905 }
10906
10907 parser->enclosure_nesting++;
10908 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10909 pm_do_loop_stack_push(parser, false);
10910 LEX(type);
10911 }
10912
10913 // )
10914 case ')':
10915 parser->enclosure_nesting--;
10916 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10917 pm_do_loop_stack_pop(parser);
10919
10920 // ;
10921 case ';':
10922 lex_state_set(parser, PM_LEX_STATE_BEG);
10923 parser->command_start = true;
10924 LEX(PM_TOKEN_SEMICOLON);
10925
10926 // [ [] []=
10927 case '[':
10928 parser->enclosure_nesting++;
10930
10931 if (lex_state_operator_p(parser)) {
10932 if (match(parser, ']')) {
10933 parser->enclosure_nesting--;
10934 lex_state_set(parser, PM_LEX_STATE_ARG);
10936 }
10937
10938 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10939 LEX(type);
10940 }
10941
10942 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10944 }
10945
10946 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10947 pm_do_loop_stack_push(parser, false);
10948 LEX(type);
10949
10950 // ]
10951 case ']':
10952 parser->enclosure_nesting--;
10953 lex_state_set(parser, PM_LEX_STATE_END);
10954 pm_do_loop_stack_pop(parser);
10956
10957 // {
10958 case '{': {
10960
10961 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10962 // This { begins a lambda
10963 parser->command_start = true;
10964 lex_state_set(parser, PM_LEX_STATE_BEG);
10966 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10967 // This { begins a hash literal
10968 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10969 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10970 // This { begins a block
10971 parser->command_start = true;
10972 lex_state_set(parser, PM_LEX_STATE_BEG);
10973 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10974 // This { begins a block on a command
10975 parser->command_start = true;
10976 lex_state_set(parser, PM_LEX_STATE_BEG);
10977 } else {
10978 // This { begins a hash literal
10979 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10980 }
10981
10982 parser->enclosure_nesting++;
10983 parser->brace_nesting++;
10984 pm_do_loop_stack_push(parser, false);
10985
10986 LEX(type);
10987 }
10988
10989 // }
10990 case '}':
10991 parser->enclosure_nesting--;
10992 pm_do_loop_stack_pop(parser);
10993
10994 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10995 lex_mode_pop(parser);
10997 }
10998
10999 parser->brace_nesting--;
11000 lex_state_set(parser, PM_LEX_STATE_END);
11002
11003 // * ** **= *=
11004 case '*': {
11005 if (match(parser, '*')) {
11006 if (match(parser, '=')) {
11007 lex_state_set(parser, PM_LEX_STATE_BEG);
11009 }
11010
11012
11013 if (lex_state_spcarg_p(parser, space_seen)) {
11014 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11016 } else if (lex_state_beg_p(parser)) {
11018 } else if (ambiguous_operator_p(parser, space_seen)) {
11019 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11020 }
11021
11022 if (lex_state_operator_p(parser)) {
11023 lex_state_set(parser, PM_LEX_STATE_ARG);
11024 } else {
11025 lex_state_set(parser, PM_LEX_STATE_BEG);
11026 }
11027
11028 LEX(type);
11029 }
11030
11031 if (match(parser, '=')) {
11032 lex_state_set(parser, PM_LEX_STATE_BEG);
11034 }
11035
11037
11038 if (lex_state_spcarg_p(parser, space_seen)) {
11039 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11041 } else if (lex_state_beg_p(parser)) {
11043 } else if (ambiguous_operator_p(parser, space_seen)) {
11044 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11045 }
11046
11047 if (lex_state_operator_p(parser)) {
11048 lex_state_set(parser, PM_LEX_STATE_ARG);
11049 } else {
11050 lex_state_set(parser, PM_LEX_STATE_BEG);
11051 }
11052
11053 LEX(type);
11054 }
11055
11056 // ! != !~ !@
11057 case '!':
11058 if (lex_state_operator_p(parser)) {
11059 lex_state_set(parser, PM_LEX_STATE_ARG);
11060 if (match(parser, '@')) {
11061 LEX(PM_TOKEN_BANG);
11062 }
11063 } else {
11064 lex_state_set(parser, PM_LEX_STATE_BEG);
11065 }
11066
11067 if (match(parser, '=')) {
11069 }
11070
11071 if (match(parser, '~')) {
11073 }
11074
11075 LEX(PM_TOKEN_BANG);
11076
11077 // = => =~ == === =begin
11078 case '=':
11079 if (
11080 current_token_starts_line(parser) &&
11081 (parser->current.end + 5 <= parser->end) &&
11082 memcmp(parser->current.end, "begin", 5) == 0 &&
11083 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11084 ) {
11085 pm_token_type_t type = lex_embdoc(parser);
11086 if (type == PM_TOKEN_EOF) {
11087 LEX(type);
11088 }
11089
11090 goto lex_next_token;
11091 }
11092
11093 if (lex_state_operator_p(parser)) {
11094 lex_state_set(parser, PM_LEX_STATE_ARG);
11095 } else {
11096 lex_state_set(parser, PM_LEX_STATE_BEG);
11097 }
11098
11099 if (match(parser, '>')) {
11101 }
11102
11103 if (match(parser, '~')) {
11105 }
11106
11107 if (match(parser, '=')) {
11108 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11109 }
11110
11111 LEX(PM_TOKEN_EQUAL);
11112
11113 // < << <<= <= <=>
11114 case '<':
11115 if (match(parser, '<')) {
11116 if (
11117 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11118 !lex_state_end_p(parser) &&
11119 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11120 ) {
11121 const uint8_t *end = parser->current.end;
11122
11123 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11124 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11125
11126 if (match(parser, '-')) {
11127 indent = PM_HEREDOC_INDENT_DASH;
11128 }
11129 else if (match(parser, '~')) {
11130 indent = PM_HEREDOC_INDENT_TILDE;
11131 }
11132
11133 if (match(parser, '`')) {
11134 quote = PM_HEREDOC_QUOTE_BACKTICK;
11135 }
11136 else if (match(parser, '"')) {
11137 quote = PM_HEREDOC_QUOTE_DOUBLE;
11138 }
11139 else if (match(parser, '\'')) {
11140 quote = PM_HEREDOC_QUOTE_SINGLE;
11141 }
11142
11143 const uint8_t *ident_start = parser->current.end;
11144 size_t width = 0;
11145
11146 if (parser->current.end >= parser->end) {
11147 parser->current.end = end;
11148 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
11149 parser->current.end = end;
11150 } else {
11151 if (quote == PM_HEREDOC_QUOTE_NONE) {
11152 parser->current.end += width;
11153
11154 while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
11155 parser->current.end += width;
11156 }
11157 } else {
11158 // If we have quotes, then we're going to go until we find the
11159 // end quote.
11160 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11161 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11162 parser->current.end++;
11163 }
11164 }
11165
11166 size_t ident_length = (size_t) (parser->current.end - ident_start);
11167 bool ident_error = false;
11168
11169 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11170 pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11171 ident_error = true;
11172 }
11173
11174 parser->explicit_encoding = NULL;
11175 lex_mode_push(parser, (pm_lex_mode_t) {
11176 .mode = PM_LEX_HEREDOC,
11177 .as.heredoc = {
11178 .base = {
11179 .ident_start = ident_start,
11180 .ident_length = ident_length,
11181 .quote = quote,
11182 .indent = indent
11183 },
11184 .next_start = parser->current.end,
11185 .common_whitespace = NULL,
11186 .line_continuation = false
11187 }
11188 });
11189
11190 if (parser->heredoc_end == NULL) {
11191 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11192
11193 if (body_start == NULL) {
11194 // If there is no newline after the heredoc identifier, then
11195 // this is not a valid heredoc declaration. In this case we
11196 // will add an error, but we will still return a heredoc
11197 // start.
11198 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11199 body_start = parser->end;
11200 } else {
11201 // Otherwise, we want to indicate that the body of the
11202 // heredoc starts on the character after the next newline.
11203 pm_newline_list_append(&parser->newline_list, body_start);
11204 body_start++;
11205 }
11206
11207 parser->next_start = body_start;
11208 } else {
11209 parser->next_start = parser->heredoc_end;
11210 }
11211
11213 }
11214 }
11215
11216 if (match(parser, '=')) {
11217 lex_state_set(parser, PM_LEX_STATE_BEG);
11219 }
11220
11221 if (ambiguous_operator_p(parser, space_seen)) {
11222 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11223 }
11224
11225 if (lex_state_operator_p(parser)) {
11226 lex_state_set(parser, PM_LEX_STATE_ARG);
11227 } else {
11228 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11229 lex_state_set(parser, PM_LEX_STATE_BEG);
11230 }
11231
11232 LEX(PM_TOKEN_LESS_LESS);
11233 }
11234
11235 if (lex_state_operator_p(parser)) {
11236 lex_state_set(parser, PM_LEX_STATE_ARG);
11237 } else {
11238 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11239 lex_state_set(parser, PM_LEX_STATE_BEG);
11240 }
11241
11242 if (match(parser, '=')) {
11243 if (match(parser, '>')) {
11245 }
11246
11248 }
11249
11250 LEX(PM_TOKEN_LESS);
11251
11252 // > >> >>= >=
11253 case '>':
11254 if (match(parser, '>')) {
11255 if (lex_state_operator_p(parser)) {
11256 lex_state_set(parser, PM_LEX_STATE_ARG);
11257 } else {
11258 lex_state_set(parser, PM_LEX_STATE_BEG);
11259 }
11260 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11261 }
11262
11263 if (lex_state_operator_p(parser)) {
11264 lex_state_set(parser, PM_LEX_STATE_ARG);
11265 } else {
11266 lex_state_set(parser, PM_LEX_STATE_BEG);
11267 }
11268
11269 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11270
11271 // double-quoted string literal
11272 case '"': {
11273 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11274 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11276 }
11277
11278 // xstring literal
11279 case '`': {
11280 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11281 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11282 LEX(PM_TOKEN_BACKTICK);
11283 }
11284
11285 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11286 if (previous_command_start) {
11287 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11288 } else {
11289 lex_state_set(parser, PM_LEX_STATE_ARG);
11290 }
11291
11292 LEX(PM_TOKEN_BACKTICK);
11293 }
11294
11295 lex_mode_push_string(parser, true, false, '\0', '`');
11296 LEX(PM_TOKEN_BACKTICK);
11297 }
11298
11299 // single-quoted string literal
11300 case '\'': {
11301 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11302 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11304 }
11305
11306 // ? character literal
11307 case '?':
11308 LEX(lex_question_mark(parser));
11309
11310 // & && &&= &=
11311 case '&': {
11312 if (match(parser, '&')) {
11313 lex_state_set(parser, PM_LEX_STATE_BEG);
11314
11315 if (match(parser, '=')) {
11317 }
11318
11320 }
11321
11322 if (match(parser, '=')) {
11323 lex_state_set(parser, PM_LEX_STATE_BEG);
11325 }
11326
11327 if (match(parser, '.')) {
11328 lex_state_set(parser, PM_LEX_STATE_DOT);
11330 }
11331
11333 if (lex_state_spcarg_p(parser, space_seen)) {
11334 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11335 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11336 } else {
11337 const uint8_t delim = peek_offset(parser, 1);
11338
11339 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1)) {
11340 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11341 }
11342 }
11343
11345 } else if (lex_state_beg_p(parser)) {
11347 } else if (ambiguous_operator_p(parser, space_seen)) {
11348 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11349 }
11350
11351 if (lex_state_operator_p(parser)) {
11352 lex_state_set(parser, PM_LEX_STATE_ARG);
11353 } else {
11354 lex_state_set(parser, PM_LEX_STATE_BEG);
11355 }
11356
11357 LEX(type);
11358 }
11359
11360 // | || ||= |=
11361 case '|':
11362 if (match(parser, '|')) {
11363 if (match(parser, '=')) {
11364 lex_state_set(parser, PM_LEX_STATE_BEG);
11366 }
11367
11368 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11369 parser->current.end--;
11370 LEX(PM_TOKEN_PIPE);
11371 }
11372
11373 lex_state_set(parser, PM_LEX_STATE_BEG);
11374 LEX(PM_TOKEN_PIPE_PIPE);
11375 }
11376
11377 if (match(parser, '=')) {
11378 lex_state_set(parser, PM_LEX_STATE_BEG);
11380 }
11381
11382 if (lex_state_operator_p(parser)) {
11383 lex_state_set(parser, PM_LEX_STATE_ARG);
11384 } else {
11385 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11386 }
11387
11388 LEX(PM_TOKEN_PIPE);
11389
11390 // + += +@
11391 case '+': {
11392 if (lex_state_operator_p(parser)) {
11393 lex_state_set(parser, PM_LEX_STATE_ARG);
11394
11395 if (match(parser, '@')) {
11396 LEX(PM_TOKEN_UPLUS);
11397 }
11398
11399 LEX(PM_TOKEN_PLUS);
11400 }
11401
11402 if (match(parser, '=')) {
11403 lex_state_set(parser, PM_LEX_STATE_BEG);
11405 }
11406
11407 if (
11408 lex_state_beg_p(parser) ||
11409 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11410 ) {
11411 lex_state_set(parser, PM_LEX_STATE_BEG);
11412
11413 if (pm_char_is_decimal_digit(peek(parser))) {
11414 parser->current.end++;
11415 pm_token_type_t type = lex_numeric(parser);
11416 lex_state_set(parser, PM_LEX_STATE_END);
11417 LEX(type);
11418 }
11419
11420 LEX(PM_TOKEN_UPLUS);
11421 }
11422
11423 if (ambiguous_operator_p(parser, space_seen)) {
11424 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11425 }
11426
11427 lex_state_set(parser, PM_LEX_STATE_BEG);
11428 LEX(PM_TOKEN_PLUS);
11429 }
11430
11431 // - -= -@
11432 case '-': {
11433 if (lex_state_operator_p(parser)) {
11434 lex_state_set(parser, PM_LEX_STATE_ARG);
11435
11436 if (match(parser, '@')) {
11437 LEX(PM_TOKEN_UMINUS);
11438 }
11439
11440 LEX(PM_TOKEN_MINUS);
11441 }
11442
11443 if (match(parser, '=')) {
11444 lex_state_set(parser, PM_LEX_STATE_BEG);
11446 }
11447
11448 if (match(parser, '>')) {
11449 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11451 }
11452
11453 bool spcarg = lex_state_spcarg_p(parser, space_seen);
11454 bool is_beg = lex_state_beg_p(parser);
11455 if (!is_beg && spcarg) {
11456 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11457 }
11458
11459 if (is_beg || spcarg) {
11460 lex_state_set(parser, PM_LEX_STATE_BEG);
11461 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
11462 }
11463
11464 if (ambiguous_operator_p(parser, space_seen)) {
11465 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11466 }
11467
11468 lex_state_set(parser, PM_LEX_STATE_BEG);
11469 LEX(PM_TOKEN_MINUS);
11470 }
11471
11472 // . .. ...
11473 case '.': {
11474 bool beg_p = lex_state_beg_p(parser);
11475
11476 if (match(parser, '.')) {
11477 if (match(parser, '.')) {
11478 // If we're _not_ inside a range within default parameters
11479 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11480 if (lex_state_p(parser, PM_LEX_STATE_END)) {
11481 lex_state_set(parser, PM_LEX_STATE_BEG);
11482 } else {
11483 lex_state_set(parser, PM_LEX_STATE_ENDARG);
11484 }
11486 }
11487
11488 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11489 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11490 }
11491
11492 lex_state_set(parser, PM_LEX_STATE_BEG);
11494 }
11495
11496 lex_state_set(parser, PM_LEX_STATE_BEG);
11497 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11498 }
11499
11500 lex_state_set(parser, PM_LEX_STATE_DOT);
11501 LEX(PM_TOKEN_DOT);
11502 }
11503
11504 // integer
11505 case '0':
11506 case '1':
11507 case '2':
11508 case '3':
11509 case '4':
11510 case '5':
11511 case '6':
11512 case '7':
11513 case '8':
11514 case '9': {
11515 pm_token_type_t type = lex_numeric(parser);
11516 lex_state_set(parser, PM_LEX_STATE_END);
11517 LEX(type);
11518 }
11519
11520 // :: symbol
11521 case ':':
11522 if (match(parser, ':')) {
11523 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11524 lex_state_set(parser, PM_LEX_STATE_BEG);
11526 }
11527
11528 lex_state_set(parser, PM_LEX_STATE_DOT);
11530 }
11531
11532 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11533 lex_state_set(parser, PM_LEX_STATE_BEG);
11534 LEX(PM_TOKEN_COLON);
11535 }
11536
11537 if (peek(parser) == '"' || peek(parser) == '\'') {
11538 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11539 parser->current.end++;
11540 }
11541
11542 lex_state_set(parser, PM_LEX_STATE_FNAME);
11544
11545 // / /=
11546 case '/':
11547 if (lex_state_beg_p(parser)) {
11548 lex_mode_push_regexp(parser, '\0', '/');
11550 }
11551
11552 if (match(parser, '=')) {
11553 lex_state_set(parser, PM_LEX_STATE_BEG);
11555 }
11556
11557 if (lex_state_spcarg_p(parser, space_seen)) {
11558 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11559 lex_mode_push_regexp(parser, '\0', '/');
11561 }
11562
11563 if (ambiguous_operator_p(parser, space_seen)) {
11564 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11565 }
11566
11567 if (lex_state_operator_p(parser)) {
11568 lex_state_set(parser, PM_LEX_STATE_ARG);
11569 } else {
11570 lex_state_set(parser, PM_LEX_STATE_BEG);
11571 }
11572
11573 LEX(PM_TOKEN_SLASH);
11574
11575 // ^ ^=
11576 case '^':
11577 if (lex_state_operator_p(parser)) {
11578 lex_state_set(parser, PM_LEX_STATE_ARG);
11579 } else {
11580 lex_state_set(parser, PM_LEX_STATE_BEG);
11581 }
11582 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11583
11584 // ~ ~@
11585 case '~':
11586 if (lex_state_operator_p(parser)) {
11587 (void) match(parser, '@');
11588 lex_state_set(parser, PM_LEX_STATE_ARG);
11589 } else {
11590 lex_state_set(parser, PM_LEX_STATE_BEG);
11591 }
11592
11593 LEX(PM_TOKEN_TILDE);
11594
11595 // % %= %i %I %q %Q %w %W
11596 case '%': {
11597 // If there is no subsequent character then we have an
11598 // invalid token. We're going to say it's the percent
11599 // operator because we don't want to move into the string
11600 // lex mode unnecessarily.
11601 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11602 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11603 LEX(PM_TOKEN_PERCENT);
11604 }
11605
11606 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11607 lex_state_set(parser, PM_LEX_STATE_BEG);
11609 } else if (
11610 lex_state_beg_p(parser) ||
11611 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11612 lex_state_spcarg_p(parser, space_seen)
11613 ) {
11614 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11615 if (*parser->current.end >= 0x80) {
11616 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11617 }
11618
11619 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11620 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11622 }
11623
11624 // Delimiters for %-literals cannot be alphanumeric. We
11625 // validate that here.
11626 uint8_t delimiter = peek_offset(parser, 1);
11627 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11628 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11629 goto lex_next_token;
11630 }
11631
11632 switch (peek(parser)) {
11633 case 'i': {
11634 parser->current.end++;
11635
11636 if (parser->current.end < parser->end) {
11637 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11638 } else {
11639 lex_mode_push_list_eof(parser);
11640 }
11641
11643 }
11644 case 'I': {
11645 parser->current.end++;
11646
11647 if (parser->current.end < parser->end) {
11648 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11649 } else {
11650 lex_mode_push_list_eof(parser);
11651 }
11652
11654 }
11655 case 'r': {
11656 parser->current.end++;
11657
11658 if (parser->current.end < parser->end) {
11659 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11660 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11661 } else {
11662 lex_mode_push_regexp(parser, '\0', '\0');
11663 }
11664
11666 }
11667 case 'q': {
11668 parser->current.end++;
11669
11670 if (parser->current.end < parser->end) {
11671 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11672 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11673 } else {
11674 lex_mode_push_string_eof(parser);
11675 }
11676
11678 }
11679 case 'Q': {
11680 parser->current.end++;
11681
11682 if (parser->current.end < parser->end) {
11683 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11684 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11685 } else {
11686 lex_mode_push_string_eof(parser);
11687 }
11688
11690 }
11691 case 's': {
11692 parser->current.end++;
11693
11694 if (parser->current.end < parser->end) {
11695 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11696 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11697 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11698 } else {
11699 lex_mode_push_string_eof(parser);
11700 }
11701
11703 }
11704 case 'w': {
11705 parser->current.end++;
11706
11707 if (parser->current.end < parser->end) {
11708 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11709 } else {
11710 lex_mode_push_list_eof(parser);
11711 }
11712
11714 }
11715 case 'W': {
11716 parser->current.end++;
11717
11718 if (parser->current.end < parser->end) {
11719 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11720 } else {
11721 lex_mode_push_list_eof(parser);
11722 }
11723
11725 }
11726 case 'x': {
11727 parser->current.end++;
11728
11729 if (parser->current.end < parser->end) {
11730 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11731 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11732 } else {
11733 lex_mode_push_string_eof(parser);
11734 }
11735
11737 }
11738 default:
11739 // If we get to this point, then we have a % that is completely
11740 // unparsable. In this case we'll just drop it from the parser
11741 // and skip past it and hope that the next token is something
11742 // that we can parse.
11743 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11744 goto lex_next_token;
11745 }
11746 }
11747
11748 if (ambiguous_operator_p(parser, space_seen)) {
11749 PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11750 }
11751
11752 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11753 LEX(PM_TOKEN_PERCENT);
11754 }
11755
11756 // global variable
11757 case '$': {
11758 pm_token_type_t type = lex_global_variable(parser);
11759
11760 // If we're lexing an embedded variable, then we need to pop back into
11761 // the parent lex context.
11762 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11763 lex_mode_pop(parser);
11764 }
11765
11766 lex_state_set(parser, PM_LEX_STATE_END);
11767 LEX(type);
11768 }
11769
11770 // instance variable, class variable
11771 case '@':
11772 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11773 LEX(lex_at_variable(parser));
11774
11775 default: {
11776 if (*parser->current.start != '_') {
11777 size_t width = char_is_identifier_start(parser, parser->current.start);
11778
11779 // If this isn't the beginning of an identifier, then
11780 // it's an invalid token as we've exhausted all of the
11781 // other options. We'll skip past it and return the next
11782 // token after adding an appropriate error message.
11783 if (!width) {
11784 if (*parser->current.start >= 0x80) {
11785 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11786 } else if (*parser->current.start == '\\') {
11787 switch (peek_at(parser, parser->current.start + 1)) {
11788 case ' ':
11789 parser->current.end++;
11790 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11791 break;
11792 case '\f':
11793 parser->current.end++;
11794 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11795 break;
11796 case '\t':
11797 parser->current.end++;
11798 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11799 break;
11800 case '\v':
11801 parser->current.end++;
11802 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11803 break;
11804 case '\r':
11805 if (peek_at(parser, parser->current.start + 2) != '\n') {
11806 parser->current.end++;
11807 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11808 break;
11809 }
11810 /* fallthrough */
11811 default:
11812 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11813 break;
11814 }
11815 } else if (char_is_ascii_printable(*parser->current.start)) {
11816 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11817 } else {
11818 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11819 }
11820
11821 goto lex_next_token;
11822 }
11823
11824 parser->current.end = parser->current.start + width;
11825 }
11826
11827 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11828
11829 // If we've hit a __END__ and it was at the start of the
11830 // line or the start of the file and it is followed by
11831 // either a \n or a \r\n, then this is the last token of the
11832 // file.
11833 if (
11834 ((parser->current.end - parser->current.start) == 7) &&
11835 current_token_starts_line(parser) &&
11836 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11837 (parser->current.end == parser->end || match_eol(parser))
11838 ) {
11839 // Since we know we're about to add an __END__ comment,
11840 // we know we need to add all of the newlines to get the
11841 // correct column information for it.
11842 const uint8_t *cursor = parser->current.end;
11843 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11844 pm_newline_list_append(&parser->newline_list, cursor++);
11845 }
11846
11847 parser->current.end = parser->end;
11848 parser->current.type = PM_TOKEN___END__;
11849 parser_lex_callback(parser);
11850
11851 parser->data_loc.start = parser->current.start;
11852 parser->data_loc.end = parser->current.end;
11853
11854 LEX(PM_TOKEN_EOF);
11855 }
11856
11857 pm_lex_state_t last_state = parser->lex_state;
11858
11860 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11861 if (previous_command_start) {
11862 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11863 } else {
11864 lex_state_set(parser, PM_LEX_STATE_ARG);
11865 }
11866 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11867 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11868 } else {
11869 lex_state_set(parser, PM_LEX_STATE_END);
11870 }
11871 }
11872
11873 if (
11874 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11876 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11877 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11878 ) {
11879 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11880 }
11881
11882 LEX(type);
11883 }
11884 }
11885 }
11886 case PM_LEX_LIST: {
11887 if (parser->next_start != NULL) {
11888 parser->current.end = parser->next_start;
11889 parser->next_start = NULL;
11890 }
11891
11892 // First we'll set the beginning of the token.
11893 parser->current.start = parser->current.end;
11894
11895 // If there's any whitespace at the start of the list, then we're
11896 // going to trim it off the beginning and create a new token.
11897 size_t whitespace;
11898
11899 if (parser->heredoc_end) {
11900 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11901 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11902 whitespace += 1;
11903 }
11904 } else {
11905 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11906 }
11907
11908 if (whitespace > 0) {
11909 parser->current.end += whitespace;
11910 if (peek_offset(parser, -1) == '\n') {
11911 // mutates next_start
11912 parser_flush_heredoc_end(parser);
11913 }
11914 LEX(PM_TOKEN_WORDS_SEP);
11915 }
11916
11917 // We'll check if we're at the end of the file. If we are, then we
11918 // need to return the EOF token.
11919 if (parser->current.end >= parser->end) {
11920 LEX(PM_TOKEN_EOF);
11921 }
11922
11923 // Here we'll get a list of the places where strpbrk should break,
11924 // and then find the first one.
11925 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11926 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11927 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11928
11929 // If we haven't found an escape yet, then this buffer will be
11930 // unallocated since we can refer directly to the source string.
11931 pm_token_buffer_t token_buffer = { 0 };
11932
11933 while (breakpoint != NULL) {
11934 // If we hit whitespace, then we must have received content by
11935 // now, so we can return an element of the list.
11936 if (pm_char_is_whitespace(*breakpoint)) {
11937 parser->current.end = breakpoint;
11938 pm_token_buffer_flush(parser, &token_buffer);
11940 }
11941
11942 // If we hit the terminator, we need to check which token to
11943 // return.
11944 if (*breakpoint == lex_mode->as.list.terminator) {
11945 // If this terminator doesn't actually close the list, then
11946 // we need to continue on past it.
11947 if (lex_mode->as.list.nesting > 0) {
11948 parser->current.end = breakpoint + 1;
11949 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11950 lex_mode->as.list.nesting--;
11951 continue;
11952 }
11953
11954 // If we've hit the terminator and we've already skipped
11955 // past content, then we can return a list node.
11956 if (breakpoint > parser->current.start) {
11957 parser->current.end = breakpoint;
11958 pm_token_buffer_flush(parser, &token_buffer);
11960 }
11961
11962 // Otherwise, switch back to the default state and return
11963 // the end of the list.
11964 parser->current.end = breakpoint + 1;
11965 lex_mode_pop(parser);
11966 lex_state_set(parser, PM_LEX_STATE_END);
11968 }
11969
11970 // If we hit a null byte, skip directly past it.
11971 if (*breakpoint == '\0') {
11972 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11973 continue;
11974 }
11975
11976 // If we hit escapes, then we need to treat the next token
11977 // literally. In this case we'll skip past the next character
11978 // and find the next breakpoint.
11979 if (*breakpoint == '\\') {
11980 parser->current.end = breakpoint + 1;
11981
11982 // If we've hit the end of the file, then break out of the
11983 // loop by setting the breakpoint to NULL.
11984 if (parser->current.end == parser->end) {
11985 breakpoint = NULL;
11986 continue;
11987 }
11988
11989 pm_token_buffer_escape(parser, &token_buffer);
11990 uint8_t peeked = peek(parser);
11991
11992 switch (peeked) {
11993 case ' ':
11994 case '\f':
11995 case '\t':
11996 case '\v':
11997 case '\\':
11998 pm_token_buffer_push_byte(&token_buffer, peeked);
11999 parser->current.end++;
12000 break;
12001 case '\r':
12002 parser->current.end++;
12003 if (peek(parser) != '\n') {
12004 pm_token_buffer_push_byte(&token_buffer, '\r');
12005 break;
12006 }
12007 /* fallthrough */
12008 case '\n':
12009 pm_token_buffer_push_byte(&token_buffer, '\n');
12010
12011 if (parser->heredoc_end) {
12012 // ... if we are on the same line as a heredoc,
12013 // flush the heredoc and continue parsing after
12014 // heredoc_end.
12015 parser_flush_heredoc_end(parser);
12016 pm_token_buffer_copy(parser, &token_buffer);
12018 } else {
12019 // ... else track the newline.
12020 pm_newline_list_append(&parser->newline_list, parser->current.end);
12021 }
12022
12023 parser->current.end++;
12024 break;
12025 default:
12026 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12027 pm_token_buffer_push_byte(&token_buffer, peeked);
12028 parser->current.end++;
12029 } else if (lex_mode->as.list.interpolation) {
12030 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12031 } else {
12032 pm_token_buffer_push_byte(&token_buffer, '\\');
12033 pm_token_buffer_push_escaped(&token_buffer, parser);
12034 }
12035
12036 break;
12037 }
12038
12039 token_buffer.cursor = parser->current.end;
12040 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12041 continue;
12042 }
12043
12044 // If we hit a #, then we will attempt to lex interpolation.
12045 if (*breakpoint == '#') {
12046 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12047
12048 if (type == PM_TOKEN_NOT_PROVIDED) {
12049 // If we haven't returned at this point then we had something
12050 // that looked like an interpolated class or instance variable
12051 // like "#@" but wasn't actually. In this case we'll just skip
12052 // to the next breakpoint.
12053 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12054 continue;
12055 }
12056
12058 pm_token_buffer_flush(parser, &token_buffer);
12059 }
12060
12061 LEX(type);
12062 }
12063
12064 // If we've hit the incrementor, then we need to skip past it
12065 // and find the next breakpoint.
12066 assert(*breakpoint == lex_mode->as.list.incrementor);
12067 parser->current.end = breakpoint + 1;
12068 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12069 lex_mode->as.list.nesting++;
12070 continue;
12071 }
12072
12073 if (parser->current.end > parser->current.start) {
12074 pm_token_buffer_flush(parser, &token_buffer);
12076 }
12077
12078 // If we were unable to find a breakpoint, then this token hits the
12079 // end of the file.
12080 parser->current.end = parser->end;
12081 pm_token_buffer_flush(parser, &token_buffer);
12083 }
12084 case PM_LEX_REGEXP: {
12085 // First, we'll set to start of this token to be the current end.
12086 if (parser->next_start == NULL) {
12087 parser->current.start = parser->current.end;
12088 } else {
12089 parser->current.start = parser->next_start;
12090 parser->current.end = parser->next_start;
12091 parser->next_start = NULL;
12092 }
12093
12094 // We'll check if we're at the end of the file. If we are, then we
12095 // need to return the EOF token.
12096 if (parser->current.end >= parser->end) {
12097 LEX(PM_TOKEN_EOF);
12098 }
12099
12100 // Get a reference to the current mode.
12101 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12102
12103 // These are the places where we need to split up the content of the
12104 // regular expression. We'll use strpbrk to find the first of these
12105 // characters.
12106 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12107 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12108 pm_regexp_token_buffer_t token_buffer = { 0 };
12109
12110 while (breakpoint != NULL) {
12111 uint8_t term = lex_mode->as.regexp.terminator;
12112 bool is_terminator = (*breakpoint == term);
12113
12114 // If the terminator is newline, we need to consider \r\n _also_ a newline
12115 // For example: `%\nfoo\r\n`
12116 // The string should be "foo", not "foo\r"
12117 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12118 if (term == '\n') {
12119 is_terminator = true;
12120 }
12121
12122 // If the terminator is a CR, but we see a CRLF, we need to
12123 // treat the CRLF as a newline, meaning this is _not_ the
12124 // terminator
12125 if (term == '\r') {
12126 is_terminator = false;
12127 }
12128 }
12129
12130 // If we hit the terminator, we need to determine what kind of
12131 // token to return.
12132 if (is_terminator) {
12133 if (lex_mode->as.regexp.nesting > 0) {
12134 parser->current.end = breakpoint + 1;
12135 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12136 lex_mode->as.regexp.nesting--;
12137 continue;
12138 }
12139
12140 // Here we've hit the terminator. If we have already consumed
12141 // content then we need to return that content as string content
12142 // first.
12143 if (breakpoint > parser->current.start) {
12144 parser->current.end = breakpoint;
12145 pm_regexp_token_buffer_flush(parser, &token_buffer);
12147 }
12148
12149 // Check here if we need to track the newline.
12150 size_t eol_length = match_eol_at(parser, breakpoint);
12151 if (eol_length) {
12152 parser->current.end = breakpoint + eol_length;
12153 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12154 } else {
12155 parser->current.end = breakpoint + 1;
12156 }
12157
12158 // Since we've hit the terminator of the regular expression,
12159 // we now need to parse the options.
12160 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12161
12162 lex_mode_pop(parser);
12163 lex_state_set(parser, PM_LEX_STATE_END);
12165 }
12166
12167 // If we've hit the incrementor, then we need to skip past it
12168 // and find the next breakpoint.
12169 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12170 parser->current.end = breakpoint + 1;
12171 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12172 lex_mode->as.regexp.nesting++;
12173 continue;
12174 }
12175
12176 switch (*breakpoint) {
12177 case '\0':
12178 // If we hit a null byte, skip directly past it.
12179 parser->current.end = breakpoint + 1;
12180 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12181 break;
12182 case '\r':
12183 if (peek_at(parser, breakpoint + 1) != '\n') {
12184 parser->current.end = breakpoint + 1;
12185 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12186 break;
12187 }
12188
12189 breakpoint++;
12190 parser->current.end = breakpoint;
12191 pm_regexp_token_buffer_escape(parser, &token_buffer);
12192 token_buffer.base.cursor = breakpoint;
12193
12194 /* fallthrough */
12195 case '\n':
12196 // If we've hit a newline, then we need to track that in
12197 // the list of newlines.
12198 if (parser->heredoc_end == NULL) {
12199 pm_newline_list_append(&parser->newline_list, breakpoint);
12200 parser->current.end = breakpoint + 1;
12201 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12202 break;
12203 }
12204
12205 parser->current.end = breakpoint + 1;
12206 parser_flush_heredoc_end(parser);
12207 pm_regexp_token_buffer_flush(parser, &token_buffer);
12209 case '\\': {
12210 // If we hit escapes, then we need to treat the next
12211 // token literally. In this case we'll skip past the
12212 // next character and find the next breakpoint.
12213 parser->current.end = breakpoint + 1;
12214
12215 // If we've hit the end of the file, then break out of
12216 // the loop by setting the breakpoint to NULL.
12217 if (parser->current.end == parser->end) {
12218 breakpoint = NULL;
12219 break;
12220 }
12221
12222 pm_regexp_token_buffer_escape(parser, &token_buffer);
12223 uint8_t peeked = peek(parser);
12224
12225 switch (peeked) {
12226 case '\r':
12227 parser->current.end++;
12228 if (peek(parser) != '\n') {
12229 if (lex_mode->as.regexp.terminator != '\r') {
12230 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12231 }
12232 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12233 pm_token_buffer_push_byte(&token_buffer.base, '\r');
12234 break;
12235 }
12236 /* fallthrough */
12237 case '\n':
12238 if (parser->heredoc_end) {
12239 // ... if we are on the same line as a heredoc,
12240 // flush the heredoc and continue parsing after
12241 // heredoc_end.
12242 parser_flush_heredoc_end(parser);
12243 pm_regexp_token_buffer_copy(parser, &token_buffer);
12245 } else {
12246 // ... else track the newline.
12247 pm_newline_list_append(&parser->newline_list, parser->current.end);
12248 }
12249
12250 parser->current.end++;
12251 break;
12252 case 'c':
12253 case 'C':
12254 case 'M':
12255 case 'u':
12256 case 'x':
12257 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12258 break;
12259 default:
12260 if (lex_mode->as.regexp.terminator == peeked) {
12261 // Some characters when they are used as the
12262 // terminator also receive an escape. They are
12263 // enumerated here.
12264 switch (peeked) {
12265 case '$': case ')': case '*': case '+':
12266 case '.': case '>': case '?': case ']':
12267 case '^': case '|': case '}':
12268 pm_token_buffer_push_byte(&token_buffer.base, '\\');
12269 break;
12270 default:
12271 break;
12272 }
12273
12274 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12275 pm_token_buffer_push_byte(&token_buffer.base, peeked);
12276 parser->current.end++;
12277 break;
12278 }
12279
12280 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12281 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12282 break;
12283 }
12284
12285 token_buffer.base.cursor = parser->current.end;
12286 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12287 break;
12288 }
12289 case '#': {
12290 // If we hit a #, then we will attempt to lex
12291 // interpolation.
12292 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12293
12294 if (type == PM_TOKEN_NOT_PROVIDED) {
12295 // If we haven't returned at this point then we had
12296 // something that looked like an interpolated class or
12297 // instance variable like "#@" but wasn't actually. In
12298 // this case we'll just skip to the next breakpoint.
12299 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12300 break;
12301 }
12302
12304 pm_regexp_token_buffer_flush(parser, &token_buffer);
12305 }
12306
12307 LEX(type);
12308 }
12309 default:
12310 assert(false && "unreachable");
12311 break;
12312 }
12313 }
12314
12315 if (parser->current.end > parser->current.start) {
12316 pm_regexp_token_buffer_flush(parser, &token_buffer);
12318 }
12319
12320 // If we were unable to find a breakpoint, then this token hits the
12321 // end of the file.
12322 parser->current.end = parser->end;
12323 pm_regexp_token_buffer_flush(parser, &token_buffer);
12325 }
12326 case PM_LEX_STRING: {
12327 // First, we'll set to start of this token to be the current end.
12328 if (parser->next_start == NULL) {
12329 parser->current.start = parser->current.end;
12330 } else {
12331 parser->current.start = parser->next_start;
12332 parser->current.end = parser->next_start;
12333 parser->next_start = NULL;
12334 }
12335
12336 // We'll check if we're at the end of the file. If we are, then we need to
12337 // return the EOF token.
12338 if (parser->current.end >= parser->end) {
12339 LEX(PM_TOKEN_EOF);
12340 }
12341
12342 // These are the places where we need to split up the content of the
12343 // string. We'll use strpbrk to find the first of these characters.
12344 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12345 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12346 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12347
12348 // If we haven't found an escape yet, then this buffer will be
12349 // unallocated since we can refer directly to the source string.
12350 pm_token_buffer_t token_buffer = { 0 };
12351
12352 while (breakpoint != NULL) {
12353 // If we hit the incrementor, then we'll increment then nesting and
12354 // continue lexing.
12355 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12356 lex_mode->as.string.nesting++;
12357 parser->current.end = breakpoint + 1;
12358 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12359 continue;
12360 }
12361
12362 uint8_t term = lex_mode->as.string.terminator;
12363 bool is_terminator = (*breakpoint == term);
12364
12365 // If the terminator is newline, we need to consider \r\n _also_ a newline
12366 // For example: `%r\nfoo\r\n`
12367 // The string should be /foo/, not /foo\r/
12368 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12369 if (term == '\n') {
12370 is_terminator = true;
12371 }
12372
12373 // If the terminator is a CR, but we see a CRLF, we need to
12374 // treat the CRLF as a newline, meaning this is _not_ the
12375 // terminator
12376 if (term == '\r') {
12377 is_terminator = false;
12378 }
12379 }
12380
12381 // Note that we have to check the terminator here first because we could
12382 // potentially be parsing a % string that has a # character as the
12383 // terminator.
12384 if (is_terminator) {
12385 // If this terminator doesn't actually close the string, then we need
12386 // to continue on past it.
12387 if (lex_mode->as.string.nesting > 0) {
12388 parser->current.end = breakpoint + 1;
12389 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12390 lex_mode->as.string.nesting--;
12391 continue;
12392 }
12393
12394 // Here we've hit the terminator. If we have already consumed content
12395 // then we need to return that content as string content first.
12396 if (breakpoint > parser->current.start) {
12397 parser->current.end = breakpoint;
12398 pm_token_buffer_flush(parser, &token_buffer);
12400 }
12401
12402 // Otherwise we need to switch back to the parent lex mode and
12403 // return the end of the string.
12404 size_t eol_length = match_eol_at(parser, breakpoint);
12405 if (eol_length) {
12406 parser->current.end = breakpoint + eol_length;
12407 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12408 } else {
12409 parser->current.end = breakpoint + 1;
12410 }
12411
12412 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12413 parser->current.end++;
12414 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12415 lex_mode_pop(parser);
12416 LEX(PM_TOKEN_LABEL_END);
12417 }
12418
12419 lex_state_set(parser, PM_LEX_STATE_END);
12420 lex_mode_pop(parser);
12422 }
12423
12424 switch (*breakpoint) {
12425 case '\0':
12426 // Skip directly past the null character.
12427 parser->current.end = breakpoint + 1;
12428 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12429 break;
12430 case '\r':
12431 if (peek_at(parser, breakpoint + 1) != '\n') {
12432 parser->current.end = breakpoint + 1;
12433 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12434 break;
12435 }
12436
12437 // If we hit a \r\n sequence, then we need to treat it
12438 // as a newline.
12439 breakpoint++;
12440 parser->current.end = breakpoint;
12441 pm_token_buffer_escape(parser, &token_buffer);
12442 token_buffer.cursor = breakpoint;
12443
12444 /* fallthrough */
12445 case '\n':
12446 // When we hit a newline, we need to flush any potential
12447 // heredocs. Note that this has to happen after we check
12448 // for the terminator in case the terminator is a
12449 // newline character.
12450 if (parser->heredoc_end == NULL) {
12451 pm_newline_list_append(&parser->newline_list, breakpoint);
12452 parser->current.end = breakpoint + 1;
12453 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12454 break;
12455 }
12456
12457 parser->current.end = breakpoint + 1;
12458 parser_flush_heredoc_end(parser);
12459 pm_token_buffer_flush(parser, &token_buffer);
12461 case '\\': {
12462 // Here we hit escapes.
12463 parser->current.end = breakpoint + 1;
12464
12465 // If we've hit the end of the file, then break out of
12466 // the loop by setting the breakpoint to NULL.
12467 if (parser->current.end == parser->end) {
12468 breakpoint = NULL;
12469 continue;
12470 }
12471
12472 pm_token_buffer_escape(parser, &token_buffer);
12473 uint8_t peeked = peek(parser);
12474
12475 switch (peeked) {
12476 case '\\':
12477 pm_token_buffer_push_byte(&token_buffer, '\\');
12478 parser->current.end++;
12479 break;
12480 case '\r':
12481 parser->current.end++;
12482 if (peek(parser) != '\n') {
12483 if (!lex_mode->as.string.interpolation) {
12484 pm_token_buffer_push_byte(&token_buffer, '\\');
12485 }
12486 pm_token_buffer_push_byte(&token_buffer, '\r');
12487 break;
12488 }
12489 /* fallthrough */
12490 case '\n':
12491 if (!lex_mode->as.string.interpolation) {
12492 pm_token_buffer_push_byte(&token_buffer, '\\');
12493 pm_token_buffer_push_byte(&token_buffer, '\n');
12494 }
12495
12496 if (parser->heredoc_end) {
12497 // ... if we are on the same line as a heredoc,
12498 // flush the heredoc and continue parsing after
12499 // heredoc_end.
12500 parser_flush_heredoc_end(parser);
12501 pm_token_buffer_copy(parser, &token_buffer);
12503 } else {
12504 // ... else track the newline.
12505 pm_newline_list_append(&parser->newline_list, parser->current.end);
12506 }
12507
12508 parser->current.end++;
12509 break;
12510 default:
12511 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12512 pm_token_buffer_push_byte(&token_buffer, peeked);
12513 parser->current.end++;
12514 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12515 pm_token_buffer_push_byte(&token_buffer, peeked);
12516 parser->current.end++;
12517 } else if (lex_mode->as.string.interpolation) {
12518 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12519 } else {
12520 pm_token_buffer_push_byte(&token_buffer, '\\');
12521 pm_token_buffer_push_escaped(&token_buffer, parser);
12522 }
12523
12524 break;
12525 }
12526
12527 token_buffer.cursor = parser->current.end;
12528 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12529 break;
12530 }
12531 case '#': {
12532 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12533
12534 if (type == PM_TOKEN_NOT_PROVIDED) {
12535 // If we haven't returned at this point then we had something that
12536 // looked like an interpolated class or instance variable like "#@"
12537 // but wasn't actually. In this case we'll just skip to the next
12538 // breakpoint.
12539 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12540 break;
12541 }
12542
12544 pm_token_buffer_flush(parser, &token_buffer);
12545 }
12546
12547 LEX(type);
12548 }
12549 default:
12550 assert(false && "unreachable");
12551 }
12552 }
12553
12554 if (parser->current.end > parser->current.start) {
12555 pm_token_buffer_flush(parser, &token_buffer);
12557 }
12558
12559 // If we've hit the end of the string, then this is an unterminated
12560 // string. In that case we'll return a string content token.
12561 parser->current.end = parser->end;
12562 pm_token_buffer_flush(parser, &token_buffer);
12564 }
12565 case PM_LEX_HEREDOC: {
12566 // First, we'll set to start of this token.
12567 if (parser->next_start == NULL) {
12568 parser->current.start = parser->current.end;
12569 } else {
12570 parser->current.start = parser->next_start;
12571 parser->current.end = parser->next_start;
12572 parser->heredoc_end = NULL;
12573 parser->next_start = NULL;
12574 }
12575
12576 // Now let's grab the information about the identifier off of the
12577 // current lex mode.
12578 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12579 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12580
12581 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12582 lex_mode->as.heredoc.line_continuation = false;
12583
12584 // We'll check if we're at the end of the file. If we are, then we
12585 // will add an error (because we weren't able to find the
12586 // terminator) but still continue parsing so that content after the
12587 // declaration of the heredoc can be parsed.
12588 if (parser->current.end >= parser->end) {
12589 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12590 parser->next_start = lex_mode->as.heredoc.next_start;
12591 parser->heredoc_end = parser->current.end;
12592 lex_state_set(parser, PM_LEX_STATE_END);
12593 lex_mode_pop(parser);
12595 }
12596
12597 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12598 size_t ident_length = heredoc_lex_mode->ident_length;
12599
12600 // If we are immediately following a newline and we have hit the
12601 // terminator, then we need to return the ending of the heredoc.
12602 if (current_token_starts_line(parser)) {
12603 const uint8_t *start = parser->current.start;
12604
12605 if (!line_continuation && (start + ident_length <= parser->end)) {
12606 const uint8_t *newline = next_newline(start, parser->end - start);
12607 const uint8_t *ident_end = newline;
12608 const uint8_t *terminator_end = newline;
12609
12610 if (newline == NULL) {
12611 terminator_end = parser->end;
12612 ident_end = parser->end;
12613 } else {
12614 terminator_end++;
12615 if (newline[-1] == '\r') {
12616 ident_end--; // Remove \r
12617 }
12618 }
12619
12620 const uint8_t *terminator_start = ident_end - ident_length;
12621 const uint8_t *cursor = start;
12622
12623 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12624 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12625 cursor++;
12626 }
12627 }
12628
12629 if (
12630 (cursor == terminator_start) &&
12631 (memcmp(terminator_start, ident_start, ident_length) == 0)
12632 ) {
12633 if (newline != NULL) {
12634 pm_newline_list_append(&parser->newline_list, newline);
12635 }
12636
12637 parser->current.end = terminator_end;
12638 if (*lex_mode->as.heredoc.next_start == '\\') {
12639 parser->next_start = NULL;
12640 } else {
12641 parser->next_start = lex_mode->as.heredoc.next_start;
12642 parser->heredoc_end = parser->current.end;
12643 }
12644
12645 lex_state_set(parser, PM_LEX_STATE_END);
12646 lex_mode_pop(parser);
12648 }
12649 }
12650
12651 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12652 if (
12653 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12654 lex_mode->as.heredoc.common_whitespace != NULL &&
12655 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12656 peek_at(parser, start) != '\n'
12657 ) {
12658 *lex_mode->as.heredoc.common_whitespace = whitespace;
12659 }
12660 }
12661
12662 // Otherwise we'll be parsing string content. These are the places
12663 // where we need to split up the content of the heredoc. We'll use
12664 // strpbrk to find the first of these characters.
12665 uint8_t breakpoints[] = "\r\n\\#";
12666
12667 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12668 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12669 breakpoints[3] = '\0';
12670 }
12671
12672 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12673 pm_token_buffer_t token_buffer = { 0 };
12674 bool was_line_continuation = false;
12675
12676 while (breakpoint != NULL) {
12677 switch (*breakpoint) {
12678 case '\0':
12679 // Skip directly past the null character.
12680 parser->current.end = breakpoint + 1;
12681 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12682 break;
12683 case '\r':
12684 parser->current.end = breakpoint + 1;
12685
12686 if (peek_at(parser, breakpoint + 1) != '\n') {
12687 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12688 break;
12689 }
12690
12691 // If we hit a \r\n sequence, then we want to replace it
12692 // with a single \n character in the final string.
12693 breakpoint++;
12694 pm_token_buffer_escape(parser, &token_buffer);
12695 token_buffer.cursor = breakpoint;
12696
12697 /* fallthrough */
12698 case '\n': {
12699 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12700 parser_flush_heredoc_end(parser);
12701 parser->current.end = breakpoint + 1;
12702 pm_token_buffer_flush(parser, &token_buffer);
12704 }
12705
12706 pm_newline_list_append(&parser->newline_list, breakpoint);
12707
12708 // If we have a - or ~ heredoc, then we can match after
12709 // some leading whitespace.
12710 const uint8_t *start = breakpoint + 1;
12711
12712 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12713 // We want to match the terminator starting from the end of the line in case
12714 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12715 const uint8_t *newline = next_newline(start, parser->end - start);
12716
12717 if (newline == NULL) {
12718 newline = parser->end;
12719 } else if (newline[-1] == '\r') {
12720 newline--; // Remove \r
12721 }
12722
12723 // Start of a possible terminator.
12724 const uint8_t *terminator_start = newline - ident_length;
12725
12726 // Cursor to check for the leading whitespace. We skip the
12727 // leading whitespace if we have a - or ~ heredoc.
12728 const uint8_t *cursor = start;
12729
12730 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12731 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12732 cursor++;
12733 }
12734 }
12735
12736 if (
12737 cursor == terminator_start &&
12738 (memcmp(terminator_start, ident_start, ident_length) == 0)
12739 ) {
12740 parser->current.end = breakpoint + 1;
12741 pm_token_buffer_flush(parser, &token_buffer);
12743 }
12744 }
12745
12746 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12747
12748 // If we have hit a newline that is followed by a valid
12749 // terminator, then we need to return the content of the
12750 // heredoc here as string content. Then, the next time a
12751 // token is lexed, it will match again and return the
12752 // end of the heredoc.
12753 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12754 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12755 *lex_mode->as.heredoc.common_whitespace = whitespace;
12756 }
12757
12758 parser->current.end = breakpoint + 1;
12759 pm_token_buffer_flush(parser, &token_buffer);
12761 }
12762
12763 // Otherwise we hit a newline and it wasn't followed by
12764 // a terminator, so we can continue parsing.
12765 parser->current.end = breakpoint + 1;
12766 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12767 break;
12768 }
12769 case '\\': {
12770 // If we hit an escape, then we need to skip past
12771 // however many characters the escape takes up. However
12772 // it's important that if \n or \r\n are escaped, we
12773 // stop looping before the newline and not after the
12774 // newline so that we can still potentially find the
12775 // terminator of the heredoc.
12776 parser->current.end = breakpoint + 1;
12777
12778 // If we've hit the end of the file, then break out of
12779 // the loop by setting the breakpoint to NULL.
12780 if (parser->current.end == parser->end) {
12781 breakpoint = NULL;
12782 continue;
12783 }
12784
12785 pm_token_buffer_escape(parser, &token_buffer);
12786 uint8_t peeked = peek(parser);
12787
12788 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12789 switch (peeked) {
12790 case '\r':
12791 parser->current.end++;
12792 if (peek(parser) != '\n') {
12793 pm_token_buffer_push_byte(&token_buffer, '\\');
12794 pm_token_buffer_push_byte(&token_buffer, '\r');
12795 break;
12796 }
12797 /* fallthrough */
12798 case '\n':
12799 pm_token_buffer_push_byte(&token_buffer, '\\');
12800 pm_token_buffer_push_byte(&token_buffer, '\n');
12801 token_buffer.cursor = parser->current.end + 1;
12802 breakpoint = parser->current.end;
12803 continue;
12804 default:
12805 pm_token_buffer_push_byte(&token_buffer, '\\');
12806 pm_token_buffer_push_escaped(&token_buffer, parser);
12807 break;
12808 }
12809 } else {
12810 switch (peeked) {
12811 case '\r':
12812 parser->current.end++;
12813 if (peek(parser) != '\n') {
12814 pm_token_buffer_push_byte(&token_buffer, '\r');
12815 break;
12816 }
12817 /* fallthrough */
12818 case '\n':
12819 // If we are in a tilde here, we should
12820 // break out of the loop and return the
12821 // string content.
12822 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12823 const uint8_t *end = parser->current.end;
12824 pm_newline_list_append(&parser->newline_list, end);
12825
12826 // Here we want the buffer to only
12827 // include up to the backslash.
12828 parser->current.end = breakpoint;
12829 pm_token_buffer_flush(parser, &token_buffer);
12830
12831 // Now we can advance the end of the
12832 // token past the newline.
12833 parser->current.end = end + 1;
12834 lex_mode->as.heredoc.line_continuation = true;
12836 }
12837
12838 was_line_continuation = true;
12839 token_buffer.cursor = parser->current.end + 1;
12840 breakpoint = parser->current.end;
12841 continue;
12842 default:
12843 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12844 break;
12845 }
12846 }
12847
12848 token_buffer.cursor = parser->current.end;
12849 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12850 break;
12851 }
12852 case '#': {
12853 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12854
12855 if (type == PM_TOKEN_NOT_PROVIDED) {
12856 // If we haven't returned at this point then we had
12857 // something that looked like an interpolated class
12858 // or instance variable like "#@" but wasn't
12859 // actually. In this case we'll just skip to the
12860 // next breakpoint.
12861 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12862 break;
12863 }
12864
12866 pm_token_buffer_flush(parser, &token_buffer);
12867 }
12868
12869 LEX(type);
12870 }
12871 default:
12872 assert(false && "unreachable");
12873 }
12874
12875 was_line_continuation = false;
12876 }
12877
12878 if (parser->current.end > parser->current.start) {
12879 parser->current.end = parser->end;
12880 pm_token_buffer_flush(parser, &token_buffer);
12882 }
12883
12884 // If we've hit the end of the string, then this is an unterminated
12885 // heredoc. In that case we'll return a string content token.
12886 parser->current.end = parser->end;
12887 pm_token_buffer_flush(parser, &token_buffer);
12889 }
12890 }
12891
12892 assert(false && "unreachable");
12893}
12894
12895#undef LEX
12896
12897/******************************************************************************/
12898/* Parse functions */
12899/******************************************************************************/
12900
12909typedef enum {
12910 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12911 PM_BINDING_POWER_STATEMENT = 2,
12912 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12913 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12914 PM_BINDING_POWER_COMPOSITION = 8, // and or
12915 PM_BINDING_POWER_NOT = 10, // not
12916 PM_BINDING_POWER_MATCH = 12, // => in
12917 PM_BINDING_POWER_DEFINED = 14, // defined?
12918 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12919 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12920 PM_BINDING_POWER_TERNARY = 20, // ?:
12921 PM_BINDING_POWER_RANGE = 22, // .. ...
12922 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12923 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12924 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12925 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12926 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12927 PM_BINDING_POWER_BITWISE_AND = 34, // &
12928 PM_BINDING_POWER_SHIFT = 36, // << >>
12929 PM_BINDING_POWER_TERM = 38, // + -
12930 PM_BINDING_POWER_FACTOR = 40, // * / %
12931 PM_BINDING_POWER_UMINUS = 42, // -@
12932 PM_BINDING_POWER_EXPONENT = 44, // **
12933 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12934 PM_BINDING_POWER_INDEX = 48, // [] []=
12935 PM_BINDING_POWER_CALL = 50, // :: .
12936 PM_BINDING_POWER_MAX = 52
12937} pm_binding_power_t;
12938
12943typedef struct {
12945 pm_binding_power_t left;
12946
12948 pm_binding_power_t right;
12949
12952
12959
12960#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12961#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12962#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12963#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12964#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12965
12966pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12967 // rescue
12968 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12969
12970 // if unless until while
12971 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12972 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12973 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12974 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12975
12976 // and or
12977 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12978 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12979
12980 // => in
12981 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12982 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12983
12984 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12985 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12986 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12987 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12988 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12989 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12990 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12991 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12992 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12993 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12994 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12995 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12996 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12997 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12998 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12999
13000 // ?:
13001 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
13002
13003 // .. ...
13004 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13005 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
13006 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13007 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
13008
13009 // ||
13010 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
13011
13012 // &&
13013 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
13014
13015 // != !~ == === =~ <=>
13016 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13017 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13018 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13019 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13020 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13021 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
13022
13023 // > >= < <=
13024 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13025 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13026 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13027 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
13028
13029 // ^ |
13030 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13031 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
13032
13033 // &
13034 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
13035
13036 // >> <<
13037 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13038 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
13039
13040 // - +
13041 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13042 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13043
13044 // % / *
13045 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13046 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13047 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13048 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13049
13050 // -@
13051 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13052 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13053
13054 // **
13055 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13056 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13057
13058 // ! ~ +@
13059 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13060 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13061 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13062
13063 // [
13064 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13065
13066 // :: . &.
13067 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13068 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13069 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13070};
13071
13072#undef BINDING_POWER_ASSIGNMENT
13073#undef LEFT_ASSOCIATIVE
13074#undef RIGHT_ASSOCIATIVE
13075#undef RIGHT_ASSOCIATIVE_UNARY
13076
13080static inline bool
13081match1(const pm_parser_t *parser, pm_token_type_t type) {
13082 return parser->current.type == type;
13083}
13084
13088static inline bool
13089match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13090 return match1(parser, type1) || match1(parser, type2);
13091}
13092
13096static inline bool
13097match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13098 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13099}
13100
13104static inline bool
13105match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13106 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13107}
13108
13112static inline bool
13113match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13114 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13115}
13116
13120static inline bool
13121match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13122 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13123}
13124
13128static inline bool
13129match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13130 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13131}
13132
13139static bool
13140accept1(pm_parser_t *parser, pm_token_type_t type) {
13141 if (match1(parser, type)) {
13142 parser_lex(parser);
13143 return true;
13144 }
13145 return false;
13146}
13147
13152static inline bool
13153accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13154 if (match2(parser, type1, type2)) {
13155 parser_lex(parser);
13156 return true;
13157 }
13158 return false;
13159}
13160
13172static void
13173expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13174 if (accept1(parser, type)) return;
13175
13176 const uint8_t *location = parser->previous.end;
13177 pm_parser_err(parser, location, location, diag_id);
13178
13179 parser->previous.start = location;
13180 parser->previous.type = PM_TOKEN_MISSING;
13181}
13182
13187static void
13188expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13189 if (accept2(parser, type1, type2)) return;
13190
13191 const uint8_t *location = parser->previous.end;
13192 pm_parser_err(parser, location, location, diag_id);
13193
13194 parser->previous.start = location;
13195 parser->previous.type = PM_TOKEN_MISSING;
13196}
13197
13202static void
13203expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13204 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13205 parser_lex(parser);
13206 } else {
13207 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13208 parser->previous.start = parser->previous.end;
13209 parser->previous.type = PM_TOKEN_MISSING;
13210 }
13211}
13212
13213static pm_node_t *
13214parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13215
13220static pm_node_t *
13221parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13222 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13223 pm_assert_value_expression(parser, node);
13224 return node;
13225}
13226
13245static inline bool
13246token_begins_expression_p(pm_token_type_t type) {
13247 switch (type) {
13250 // We need to special case this because it is a binary operator that
13251 // should not be marked as beginning an expression.
13252 return false;
13255 case PM_TOKEN_COLON:
13256 case PM_TOKEN_COMMA:
13258 case PM_TOKEN_EOF:
13269 case PM_TOKEN_NEWLINE:
13271 case PM_TOKEN_SEMICOLON:
13272 // The reason we need this short-circuit is because we're using the
13273 // binding powers table to tell us if the subsequent token could
13274 // potentially be the start of an expression. If there _is_ a binding
13275 // power for one of these tokens, then we should remove it from this list
13276 // and let it be handled by the default case below.
13277 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13278 return false;
13280 // This is a special case because this unary operator cannot appear
13281 // as a general operator, it only appears in certain circumstances.
13282 return false;
13284 case PM_TOKEN_UMINUS:
13286 case PM_TOKEN_UPLUS:
13287 case PM_TOKEN_BANG:
13288 case PM_TOKEN_TILDE:
13289 case PM_TOKEN_UDOT_DOT:
13291 // These unary tokens actually do have binding power associated with them
13292 // so that we can correctly place them into the precedence order. But we
13293 // want them to be marked as beginning an expression, so we need to
13294 // special case them here.
13295 return true;
13296 default:
13297 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13298 }
13299}
13300
13305static pm_node_t *
13306parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13307 if (accept1(parser, PM_TOKEN_USTAR)) {
13308 pm_token_t operator = parser->previous;
13309 pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13310 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13311 }
13312
13313 return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13314}
13315
13320static void
13321parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13322 // The method name needs to change. If we previously had
13323 // foo, we now need foo=. In this case we'll allocate a new
13324 // owned string, copy the previous method name in, and
13325 // append an =.
13326 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13327 size_t length = constant->length;
13328 uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13329 if (name == NULL) return;
13330
13331 memcpy(name, constant->start, length);
13332 name[length] = '=';
13333
13334 // Now switch the name to the new string.
13335 // This silences clang analyzer warning about leak of memory pointed by `name`.
13336 // NOLINTNEXTLINE(clang-analyzer-*)
13337 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13338}
13339
13346static pm_node_t *
13347parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13348 switch (PM_NODE_TYPE(target)) {
13349 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13350 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13351 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13352 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13353 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13354 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13355 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13356 default: break;
13357 }
13358
13359 pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13360 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13361
13362 pm_node_destroy(parser, target);
13363 return (pm_node_t *) result;
13364}
13365
13371static void
13372parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13373 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13374
13375 for (size_t index = 0; index < implicit_parameters->size; index++) {
13376 if (implicit_parameters->nodes[index] == node) {
13377 // If the node is not the last one in the list, we need to shift the
13378 // remaining nodes down to fill the gap. This is extremely unlikely
13379 // to happen.
13380 if (index != implicit_parameters->size - 1) {
13381 memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13382 }
13383
13384 implicit_parameters->size--;
13385 break;
13386 }
13387 }
13388}
13389
13398static pm_node_t *
13399parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13400 switch (PM_NODE_TYPE(target)) {
13401 case PM_MISSING_NODE:
13402 return target;
13404 case PM_FALSE_NODE:
13407 case PM_NIL_NODE:
13408 case PM_SELF_NODE:
13409 case PM_TRUE_NODE: {
13410 // In these special cases, we have specific error messages and we
13411 // will replace them with local variable writes.
13412 return parse_unwriteable_target(parser, target);
13413 }
13417 return target;
13419 if (context_def_p(parser)) {
13420 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13421 }
13422
13425
13426 return target;
13428 if (context_def_p(parser)) {
13429 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13430 }
13431
13432 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13433 target->type = PM_CONSTANT_TARGET_NODE;
13434
13435 return target;
13438 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13439 return target;
13443 return target;
13445 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13446 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13447 parse_target_implicit_parameter(parser, target);
13448 }
13449
13450 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13451 uint32_t name = cast->name;
13452 uint32_t depth = cast->depth;
13453 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13454
13457
13458 return target;
13459 }
13461 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13462 pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13463
13464 parse_target_implicit_parameter(parser, target);
13465 pm_node_destroy(parser, target);
13466
13467 return node;
13468 }
13472 return target;
13474 if (splat_parent) {
13475 // Multi target is not accepted in all positions. If this is one
13476 // of them, then we need to add an error.
13477 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13478 }
13479
13480 return target;
13481 case PM_SPLAT_NODE: {
13482 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13483
13484 if (splat->expression != NULL) {
13485 splat->expression = parse_target(parser, splat->expression, multiple, true);
13486 }
13487
13488 return (pm_node_t *) splat;
13489 }
13490 case PM_CALL_NODE: {
13491 pm_call_node_t *call = (pm_call_node_t *) target;
13492
13493 // If we have no arguments to the call node and we need this to be a
13494 // target then this is either a method call or a local variable
13495 // write.
13496 if (
13497 (call->message_loc.start != NULL) &&
13498 (call->message_loc.end[-1] != '!') &&
13499 (call->message_loc.end[-1] != '?') &&
13500 (call->opening_loc.start == NULL) &&
13501 (call->arguments == NULL) &&
13502 (call->block == NULL)
13503 ) {
13504 if (call->receiver == NULL) {
13505 // When we get here, we have a local variable write, because it
13506 // was previously marked as a method call but now we have an =.
13507 // This looks like:
13508 //
13509 // foo = 1
13510 //
13511 // When it was parsed in the prefix position, foo was seen as a
13512 // method call with no receiver and no arguments. Now we have an
13513 // =, so we know it's a local variable write.
13514 const pm_location_t message_loc = call->message_loc;
13515
13516 pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13517 pm_node_destroy(parser, target);
13518
13519 return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13520 }
13521
13522 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13523 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13524 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13525 }
13526
13527 parse_write_name(parser, &call->name);
13528 return (pm_node_t *) pm_call_target_node_create(parser, call);
13529 }
13530 }
13531
13532 // If there is no call operator and the message is "[]" then this is
13533 // an aref expression, and we can transform it into an aset
13534 // expression.
13535 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13536 return (pm_node_t *) pm_index_target_node_create(parser, call);
13537 }
13538 }
13539 /* fallthrough */
13540 default:
13541 // In this case we have a node that we don't know how to convert
13542 // into a target. We need to treat it as an error. For now, we'll
13543 // mark it as an error and just skip right past it.
13544 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13545 return target;
13546 }
13547}
13548
13553static pm_node_t *
13554parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13555 pm_node_t *result = parse_target(parser, target, multiple, false);
13556
13557 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13558 // parens after the targets.
13559 if (
13560 !match1(parser, PM_TOKEN_EQUAL) &&
13561 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13562 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13563 ) {
13564 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13565 }
13566
13567 return result;
13568}
13569
13574static pm_node_t *
13575parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13576 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13577
13578 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13579 return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13580 }
13581
13582 return write;
13583}
13584
13588static pm_node_t *
13589parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13590 switch (PM_NODE_TYPE(target)) {
13591 case PM_MISSING_NODE:
13592 pm_node_destroy(parser, value);
13593 return target;
13595 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13596 pm_node_destroy(parser, target);
13597 return (pm_node_t *) node;
13598 }
13599 case PM_CONSTANT_PATH_NODE: {
13600 pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13601
13602 if (context_def_p(parser)) {
13603 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13604 }
13605
13606 return parse_shareable_constant_write(parser, node);
13607 }
13608 case PM_CONSTANT_READ_NODE: {
13609 pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13610
13611 if (context_def_p(parser)) {
13612 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13613 }
13614
13615 pm_node_destroy(parser, target);
13616 return parse_shareable_constant_write(parser, node);
13617 }
13620 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13621 /* fallthrough */
13623 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13624 pm_node_destroy(parser, target);
13625 return (pm_node_t *) node;
13626 }
13629
13630 pm_constant_id_t name = local_read->name;
13631 pm_location_t name_loc = target->location;
13632
13633 uint32_t depth = local_read->depth;
13634 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13635
13636 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13637 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13638 PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13639 parse_target_implicit_parameter(parser, target);
13640 }
13641
13642 pm_locals_unread(&scope->locals, name);
13643 pm_node_destroy(parser, target);
13644
13645 return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13646 }
13648 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13649 pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13650
13651 parse_target_implicit_parameter(parser, target);
13652 pm_node_destroy(parser, target);
13653
13654 return node;
13655 }
13657 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13658 pm_node_destroy(parser, target);
13659 return write_node;
13660 }
13662 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13663 case PM_SPLAT_NODE: {
13664 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13665
13666 if (splat->expression != NULL) {
13667 splat->expression = parse_write(parser, splat->expression, operator, value);
13668 }
13669
13670 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13671 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13672
13673 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13674 }
13675 case PM_CALL_NODE: {
13676 pm_call_node_t *call = (pm_call_node_t *) target;
13677
13678 // If we have no arguments to the call node and we need this to be a
13679 // target then this is either a method call or a local variable
13680 // write.
13681 if (
13682 (call->message_loc.start != NULL) &&
13683 (call->message_loc.end[-1] != '!') &&
13684 (call->message_loc.end[-1] != '?') &&
13685 (call->opening_loc.start == NULL) &&
13686 (call->arguments == NULL) &&
13687 (call->block == NULL)
13688 ) {
13689 if (call->receiver == NULL) {
13690 // When we get here, we have a local variable write, because it
13691 // was previously marked as a method call but now we have an =.
13692 // This looks like:
13693 //
13694 // foo = 1
13695 //
13696 // When it was parsed in the prefix position, foo was seen as a
13697 // method call with no receiver and no arguments. Now we have an
13698 // =, so we know it's a local variable write.
13699 const pm_location_t message = call->message_loc;
13700
13701 pm_parser_local_add_location(parser, message.start, message.end, 0);
13702 pm_node_destroy(parser, target);
13703
13704 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13705 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13706
13707 pm_refute_numbered_parameter(parser, message.start, message.end);
13708 return target;
13709 }
13710
13711 if (char_is_identifier_start(parser, call->message_loc.start)) {
13712 // When we get here, we have a method call, because it was
13713 // previously marked as a method call but now we have an =. This
13714 // looks like:
13715 //
13716 // foo.bar = 1
13717 //
13718 // When it was parsed in the prefix position, foo.bar was seen as a
13719 // method call with no arguments. Now we have an =, so we know it's
13720 // a method call with an argument. In this case we will create the
13721 // arguments node, parse the argument, and add it to the list.
13722 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13723 call->arguments = arguments;
13724
13725 pm_arguments_node_arguments_append(arguments, value);
13726 call->base.location.end = arguments->base.location.end;
13727
13728 parse_write_name(parser, &call->name);
13729 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13730
13731 return (pm_node_t *) call;
13732 }
13733 }
13734
13735 // If there is no call operator and the message is "[]" then this is
13736 // an aref expression, and we can transform it into an aset
13737 // expression.
13738 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13739 if (call->arguments == NULL) {
13740 call->arguments = pm_arguments_node_create(parser);
13741 }
13742
13743 pm_arguments_node_arguments_append(call->arguments, value);
13744 target->location.end = value->location.end;
13745
13746 // Replace the name with "[]=".
13747 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13748
13749 // Ensure that the arguments for []= don't contain keywords
13750 pm_index_arguments_check(parser, call->arguments, call->block);
13751 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13752
13753 return target;
13754 }
13755
13756 // If there are arguments on the call node, then it can't be a method
13757 // call ending with = or a local variable write, so it must be a
13758 // syntax error. In this case we'll fall through to our default
13759 // handling. We need to free the value that we parsed because there
13760 // is no way for us to attach it to the tree at this point.
13761 pm_node_destroy(parser, value);
13762 }
13763 /* fallthrough */
13764 default:
13765 // In this case we have a node that we don't know how to convert into a
13766 // target. We need to treat it as an error. For now, we'll mark it as an
13767 // error and just skip right past it.
13768 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13769 return target;
13770 }
13771}
13772
13779static pm_node_t *
13780parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13781 switch (PM_NODE_TYPE(target)) {
13782 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13783 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13784 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13785 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13786 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13787 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13788 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13789 default: break;
13790 }
13791
13792 pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13793 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13794
13795 pm_node_destroy(parser, target);
13796 return (pm_node_t *) result;
13797}
13798
13809static pm_node_t *
13810parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13811 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13812
13813 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13814 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13815
13816 while (accept1(parser, PM_TOKEN_COMMA)) {
13817 if (accept1(parser, PM_TOKEN_USTAR)) {
13818 // Here we have a splat operator. It can have a name or be
13819 // anonymous. It can be the final target or be in the middle if
13820 // there haven't been any others yet.
13821 if (has_rest) {
13822 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13823 }
13824
13825 pm_token_t star_operator = parser->previous;
13826 pm_node_t *name = NULL;
13827
13828 if (token_begins_expression_p(parser->current.type)) {
13829 name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13830 name = parse_target(parser, name, true, true);
13831 }
13832
13833 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13834 pm_multi_target_node_targets_append(parser, result, splat);
13835 has_rest = true;
13836 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13837 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13838 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13839 target = parse_target(parser, target, true, false);
13840
13841 pm_multi_target_node_targets_append(parser, result, target);
13842 context_pop(parser);
13843 } else if (token_begins_expression_p(parser->current.type)) {
13844 pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13845 target = parse_target(parser, target, true, false);
13846
13847 pm_multi_target_node_targets_append(parser, result, target);
13848 } else if (!match1(parser, PM_TOKEN_EOF)) {
13849 // If we get here, then we have a trailing , in a multi target node.
13850 // We'll add an implicit rest node to represent this.
13851 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13852 pm_multi_target_node_targets_append(parser, result, rest);
13853 break;
13854 }
13855 }
13856
13857 return (pm_node_t *) result;
13858}
13859
13864static pm_node_t *
13865parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13866 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13867 accept1(parser, PM_TOKEN_NEWLINE);
13868
13869 // Ensure that we have either an = or a ) after the targets.
13870 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13871 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13872 }
13873
13874 return result;
13875}
13876
13880static pm_statements_node_t *
13881parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13882 // First, skip past any optional terminators that might be at the beginning
13883 // of the statements.
13884 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13885
13886 // If we have a terminator, then we can just return NULL.
13887 if (context_terminator(context, &parser->current)) return NULL;
13888
13889 pm_statements_node_t *statements = pm_statements_node_create(parser);
13890
13891 // At this point we know we have at least one statement, and that it
13892 // immediately follows the current token.
13893 context_push(parser, context);
13894
13895 while (true) {
13896 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13897 pm_statements_node_body_append(parser, statements, node, true);
13898
13899 // If we're recovering from a syntax error, then we need to stop parsing
13900 // the statements now.
13901 if (parser->recovering) {
13902 // If this is the level of context where the recovery has happened,
13903 // then we can mark the parser as done recovering.
13904 if (context_terminator(context, &parser->current)) parser->recovering = false;
13905 break;
13906 }
13907
13908 // If we have a terminator, then we will parse all consecutive
13909 // terminators and then continue parsing the statements list.
13910 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13911 // If we have a terminator, then we will continue parsing the
13912 // statements list.
13913 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13914 if (context_terminator(context, &parser->current)) break;
13915
13916 // Now we can continue parsing the list of statements.
13917 continue;
13918 }
13919
13920 // At this point we have a list of statements that are not terminated by
13921 // a newline or semicolon. At this point we need to check if we're at
13922 // the end of the statements list. If we are, then we should break out
13923 // of the loop.
13924 if (context_terminator(context, &parser->current)) break;
13925
13926 // At this point, we have a syntax error, because the statement was not
13927 // terminated by a newline or semicolon, and we're not at the end of the
13928 // statements list. Ideally we should scan forward to determine if we
13929 // should insert a missing terminator or break out of parsing the
13930 // statements list at this point.
13931 //
13932 // We don't have that yet, so instead we'll do a more naive approach. If
13933 // we were unable to parse an expression, then we will skip past this
13934 // token and continue parsing the statements list. Otherwise we'll add
13935 // an error and continue parsing the statements list.
13936 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13937 parser_lex(parser);
13938
13939 // If we are at the end of the file, then we need to stop parsing
13940 // the statements entirely at this point. Mark the parser as
13941 // recovering, as we know that EOF closes the top-level context, and
13942 // then break out of the loop.
13943 if (match1(parser, PM_TOKEN_EOF)) {
13944 parser->recovering = true;
13945 break;
13946 }
13947
13948 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13949 if (context_terminator(context, &parser->current)) break;
13950 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13951 // This is an inlined version of accept1 because the error that we
13952 // want to add has varargs. If this happens again, we should
13953 // probably extract a helper function.
13954 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13955 parser->previous.start = parser->previous.end;
13956 parser->previous.type = PM_TOKEN_MISSING;
13957 }
13958 }
13959
13960 context_pop(parser);
13961 bool last_value = true;
13962 switch (context) {
13965 last_value = false;
13966 break;
13967 default:
13968 break;
13969 }
13970 pm_void_statements_check(parser, statements, last_value);
13971
13972 return statements;
13973}
13974
13979static void
13980pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13981 const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13982
13983 if (duplicated != NULL) {
13984 pm_buffer_t buffer = { 0 };
13985 pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13986
13987 pm_diagnostic_list_append_format(
13988 &parser->warning_list,
13989 duplicated->location.start,
13990 duplicated->location.end,
13991 PM_WARN_DUPLICATED_HASH_KEY,
13992 (int) pm_buffer_length(&buffer),
13993 pm_buffer_value(&buffer),
13994 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
13995 );
13996
13997 pm_buffer_free(&buffer);
13998 }
13999}
14000
14005static void
14006pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
14007 pm_node_t *previous;
14008
14009 if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
14010 pm_diagnostic_list_append_format(
14011 &parser->warning_list,
14012 node->location.start,
14013 node->location.end,
14014 PM_WARN_DUPLICATED_WHEN_CLAUSE,
14015 pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
14016 pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
14017 );
14018 }
14019}
14020
14024static bool
14025parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
14027 bool contains_keyword_splat = false;
14028
14029 while (true) {
14030 pm_node_t *element;
14031
14032 switch (parser->current.type) {
14033 case PM_TOKEN_USTAR_STAR: {
14034 parser_lex(parser);
14035 pm_token_t operator = parser->previous;
14036 pm_node_t *value = NULL;
14037
14038 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14039 // If we're about to parse a nested hash that is being
14040 // pushed into this hash directly with **, then we want the
14041 // inner hash to share the static literals with the outer
14042 // hash.
14043 parser->current_hash_keys = literals;
14044 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14045 } else if (token_begins_expression_p(parser->current.type)) {
14046 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14047 } else {
14048 pm_parser_scope_forwarding_keywords_check(parser, &operator);
14049 }
14050
14051 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14052 contains_keyword_splat = true;
14053 break;
14054 }
14055 case PM_TOKEN_LABEL: {
14056 pm_token_t label = parser->current;
14057 parser_lex(parser);
14058
14059 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14060 pm_hash_key_static_literals_add(parser, literals, key);
14061
14062 pm_token_t operator = not_provided(parser);
14063 pm_node_t *value = NULL;
14064
14065 if (token_begins_expression_p(parser->current.type)) {
14066 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14067 } else {
14068 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14069 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14070 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14071 } else {
14072 int depth = -1;
14073 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14074
14075 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14076 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14077 } else {
14078 depth = pm_parser_local_depth(parser, &identifier);
14079 }
14080
14081 if (depth == -1) {
14082 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14083 } else {
14084 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14085 }
14086 }
14087
14088 value->location.end++;
14089 value = (pm_node_t *) pm_implicit_node_create(parser, value);
14090 }
14091
14092 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14093 break;
14094 }
14095 default: {
14096 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14097
14098 // Hash keys that are strings are automatically frozen. We will
14099 // mark that here.
14100 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14101 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14102 }
14103
14104 pm_hash_key_static_literals_add(parser, literals, key);
14105
14106 pm_token_t operator;
14107 if (pm_symbol_node_label_p(key)) {
14108 operator = not_provided(parser);
14109 } else {
14110 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14111 operator = parser->previous;
14112 }
14113
14114 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14115 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14116 break;
14117 }
14118 }
14119
14120 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14121 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14122 } else {
14123 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14124 }
14125
14126 // If there's no comma after the element, then we're done.
14127 if (!accept1(parser, PM_TOKEN_COMMA)) break;
14128
14129 // If the next element starts with a label or a **, then we know we have
14130 // another element in the hash, so we'll continue parsing.
14131 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14132
14133 // Otherwise we need to check if the subsequent token begins an expression.
14134 // If it does, then we'll continue parsing.
14135 if (token_begins_expression_p(parser->current.type)) continue;
14136
14137 // Otherwise by default we will exit out of this loop.
14138 break;
14139 }
14140
14141 return contains_keyword_splat;
14142}
14143
14147static inline void
14148parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14149 if (arguments->arguments == NULL) {
14150 arguments->arguments = pm_arguments_node_create(parser);
14151 }
14152
14153 pm_arguments_node_arguments_append(arguments->arguments, argument);
14154}
14155
14159static void
14160parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14161 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14162
14163 // First we need to check if the next token is one that could be the start
14164 // of an argument. If it's not, then we can just return.
14165 if (
14166 match2(parser, terminator, PM_TOKEN_EOF) ||
14167 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14168 context_terminator(parser->current_context->context, &parser->current)
14169 ) {
14170 return;
14171 }
14172
14173 bool parsed_first_argument = false;
14174 bool parsed_bare_hash = false;
14175 bool parsed_block_argument = false;
14176 bool parsed_forwarding_arguments = false;
14177
14178 while (!match1(parser, PM_TOKEN_EOF)) {
14179 if (parsed_forwarding_arguments) {
14180 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14181 }
14182
14183 pm_node_t *argument = NULL;
14184
14185 switch (parser->current.type) {
14187 case PM_TOKEN_LABEL: {
14188 if (parsed_bare_hash) {
14189 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14190 }
14191
14192 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14193 argument = (pm_node_t *) hash;
14194
14195 pm_static_literals_t hash_keys = { 0 };
14196 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14197
14198 parse_arguments_append(parser, arguments, argument);
14199
14201 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14202 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14203
14204 pm_static_literals_free(&hash_keys);
14205 parsed_bare_hash = true;
14206
14207 break;
14208 }
14209 case PM_TOKEN_UAMPERSAND: {
14210 parser_lex(parser);
14211 pm_token_t operator = parser->previous;
14212 pm_node_t *expression = NULL;
14213
14214 if (token_begins_expression_p(parser->current.type)) {
14215 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14216 } else {
14217 pm_parser_scope_forwarding_block_check(parser, &operator);
14218 }
14219
14220 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14221 if (parsed_block_argument) {
14222 parse_arguments_append(parser, arguments, argument);
14223 } else {
14224 arguments->block = argument;
14225 }
14226
14227 if (match1(parser, PM_TOKEN_COMMA)) {
14228 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14229 }
14230
14231 parsed_block_argument = true;
14232 break;
14233 }
14234 case PM_TOKEN_USTAR: {
14235 parser_lex(parser);
14236 pm_token_t operator = parser->previous;
14237
14239 pm_parser_scope_forwarding_positionals_check(parser, &operator);
14240 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14241 if (parsed_bare_hash) {
14242 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14243 }
14244 } else {
14245 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14246
14247 if (parsed_bare_hash) {
14248 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14249 }
14250
14251 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14252 }
14253
14254 parse_arguments_append(parser, arguments, argument);
14255 break;
14256 }
14257 case PM_TOKEN_UDOT_DOT_DOT: {
14258 if (accepts_forwarding) {
14259 parser_lex(parser);
14260
14261 if (token_begins_expression_p(parser->current.type)) {
14262 // If the token begins an expression then this ... was
14263 // not actually argument forwarding but was instead a
14264 // range.
14265 pm_token_t operator = parser->previous;
14266 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14267
14268 // If we parse a range, we need to validate that we
14269 // didn't accidentally violate the nonassoc rules of the
14270 // ... operator.
14271 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14272 pm_range_node_t *range = (pm_range_node_t *) right;
14273 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14274 }
14275
14276 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14277 } else {
14278 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14279 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14280 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14281 }
14282
14283 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14284 parse_arguments_append(parser, arguments, argument);
14285 pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14286 arguments->has_forwarding = true;
14287 parsed_forwarding_arguments = true;
14288 break;
14289 }
14290 }
14291 }
14292 /* fallthrough */
14293 default: {
14294 if (argument == NULL) {
14295 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14296 }
14297
14298 bool contains_keywords = false;
14299 bool contains_keyword_splat = false;
14300
14301 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14302 if (parsed_bare_hash) {
14303 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14304 }
14305
14306 pm_token_t operator;
14307 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14308 operator = parser->previous;
14309 } else {
14310 operator = not_provided(parser);
14311 }
14312
14313 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14314 contains_keywords = true;
14315
14316 // Create the set of static literals for this hash.
14317 pm_static_literals_t hash_keys = { 0 };
14318 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14319
14320 // Finish parsing the one we are part way through.
14321 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14322 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14323
14324 pm_keyword_hash_node_elements_append(bare_hash, argument);
14325 argument = (pm_node_t *) bare_hash;
14326
14327 // Then parse more if we have a comma
14328 if (accept1(parser, PM_TOKEN_COMMA) && (
14329 token_begins_expression_p(parser->current.type) ||
14330 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14331 )) {
14332 contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14333 }
14334
14335 pm_static_literals_free(&hash_keys);
14336 parsed_bare_hash = true;
14337 }
14338
14339 parse_arguments_append(parser, arguments, argument);
14340
14341 pm_node_flags_t flags = 0;
14342 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14343 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14344 pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14345
14346 break;
14347 }
14348 }
14349
14350 parsed_first_argument = true;
14351
14352 // If parsing the argument failed, we need to stop parsing arguments.
14353 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14354
14355 // If the terminator of these arguments is not EOF, then we have a
14356 // specific token we're looking for. In that case we can accept a
14357 // newline here because it is not functioning as a statement terminator.
14358 bool accepted_newline = false;
14359 if (terminator != PM_TOKEN_EOF) {
14360 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14361 }
14362
14363 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14364 // If we previously were on a comma and we just parsed a bare hash,
14365 // then we want to continue parsing arguments. This is because the
14366 // comma was grabbed up by the hash parser.
14367 } else if (accept1(parser, PM_TOKEN_COMMA)) {
14368 // If there was a comma, then we need to check if we also accepted a
14369 // newline. If we did, then this is a syntax error.
14370 if (accepted_newline) {
14371 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14372 }
14373 } else {
14374 // If there is no comma at the end of the argument list then we're
14375 // done parsing arguments and can break out of this loop.
14376 break;
14377 }
14378
14379 // If we hit the terminator, then that means we have a trailing comma so
14380 // we can accept that output as well.
14381 if (match1(parser, terminator)) break;
14382 }
14383}
14384
14396parse_required_destructured_parameter(pm_parser_t *parser) {
14397 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14398
14399 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14400 pm_multi_target_node_opening_set(node, &parser->previous);
14401
14402 do {
14403 pm_node_t *param;
14404
14405 // If we get here then we have a trailing comma, which isn't allowed in
14406 // the grammar. In other places, multi targets _do_ allow trailing
14407 // commas, so here we'll assume this is a mistake of the user not
14408 // knowing it's not allowed here.
14409 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14410 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14411 pm_multi_target_node_targets_append(parser, node, param);
14412 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14413 break;
14414 }
14415
14416 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14417 param = (pm_node_t *) parse_required_destructured_parameter(parser);
14418 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14419 pm_token_t star = parser->previous;
14420 pm_node_t *value = NULL;
14421
14422 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14423 pm_token_t name = parser->previous;
14424 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14425 if (pm_parser_parameter_name_check(parser, &name)) {
14426 pm_node_flag_set_repeated_parameter(value);
14427 }
14428 pm_parser_local_add_token(parser, &name, 1);
14429 }
14430
14431 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14432 } else {
14433 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14434 pm_token_t name = parser->previous;
14435
14436 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14437 if (pm_parser_parameter_name_check(parser, &name)) {
14438 pm_node_flag_set_repeated_parameter(param);
14439 }
14440 pm_parser_local_add_token(parser, &name, 1);
14441 }
14442
14443 pm_multi_target_node_targets_append(parser, node, param);
14444 } while (accept1(parser, PM_TOKEN_COMMA));
14445
14446 accept1(parser, PM_TOKEN_NEWLINE);
14447 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14448 pm_multi_target_node_closing_set(node, &parser->previous);
14449
14450 return node;
14451}
14452
14457typedef enum {
14458 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14459 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14460 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14461 PM_PARAMETERS_ORDER_KEYWORDS,
14462 PM_PARAMETERS_ORDER_REST,
14463 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14464 PM_PARAMETERS_ORDER_OPTIONAL,
14465 PM_PARAMETERS_ORDER_NAMED,
14466 PM_PARAMETERS_ORDER_NONE,
14467} pm_parameters_order_t;
14468
14472static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14473 [0] = PM_PARAMETERS_NO_CHANGE,
14474 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14475 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14476 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14477 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14478 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14479 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14480 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14481 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14482 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14483 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14484 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14485};
14486
14494static bool
14495update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14496 pm_parameters_order_t state = parameters_ordering[token->type];
14497 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14498
14499 // If we see another ordered argument after a optional argument
14500 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14501 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14502 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14503 return true;
14504 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14505 return true;
14506 }
14507
14508 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14509 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14510 return false;
14511 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14512 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14513 return false;
14514 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14515 // We know what transition we failed on, so we can provide a better error here.
14516 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14517 return false;
14518 }
14519
14520 if (state < *current) *current = state;
14521 return true;
14522}
14523
14527static pm_parameters_node_t *
14528parse_parameters(
14529 pm_parser_t *parser,
14530 pm_binding_power_t binding_power,
14531 bool uses_parentheses,
14532 bool allows_trailing_comma,
14533 bool allows_forwarding_parameters,
14534 bool accepts_blocks_in_defaults,
14535 bool in_block,
14536 uint16_t depth
14537) {
14538 pm_do_loop_stack_push(parser, false);
14539
14540 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14541 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14542
14543 while (true) {
14544 bool parsing = true;
14545
14546 switch (parser->current.type) {
14548 update_parameter_state(parser, &parser->current, &order);
14549 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14550
14551 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14552 pm_parameters_node_requireds_append(params, param);
14553 } else {
14554 pm_parameters_node_posts_append(params, param);
14555 }
14556 break;
14557 }
14559 case PM_TOKEN_AMPERSAND: {
14560 update_parameter_state(parser, &parser->current, &order);
14561 parser_lex(parser);
14562
14563 pm_token_t operator = parser->previous;
14564 pm_token_t name;
14565
14566 bool repeated = false;
14567 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14568 name = parser->previous;
14569 repeated = pm_parser_parameter_name_check(parser, &name);
14570 pm_parser_local_add_token(parser, &name, 1);
14571 } else {
14572 name = not_provided(parser);
14573 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14574 }
14575
14576 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14577 if (repeated) {
14578 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14579 }
14580 if (params->block == NULL) {
14581 pm_parameters_node_block_set(params, param);
14582 } else {
14583 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14584 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14585 }
14586
14587 break;
14588 }
14589 case PM_TOKEN_UDOT_DOT_DOT: {
14590 if (!allows_forwarding_parameters) {
14591 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14592 }
14593
14594 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14595 parser_lex(parser);
14596
14597 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14598 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14599
14600 if (params->keyword_rest != NULL) {
14601 // If we already have a keyword rest parameter, then we replace it with the
14602 // forwarding parameter and move the keyword rest parameter to the posts list.
14603 pm_node_t *keyword_rest = params->keyword_rest;
14604 pm_parameters_node_posts_append(params, keyword_rest);
14605 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14606 params->keyword_rest = NULL;
14607 }
14608
14609 pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14610 break;
14611 }
14614 case PM_TOKEN_CONSTANT:
14617 case PM_TOKEN_METHOD_NAME: {
14618 parser_lex(parser);
14619 switch (parser->previous.type) {
14620 case PM_TOKEN_CONSTANT:
14621 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14622 break;
14624 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14625 break;
14627 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14628 break;
14630 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14631 break;
14633 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14634 break;
14635 default: break;
14636 }
14637
14638 if (parser->current.type == PM_TOKEN_EQUAL) {
14639 update_parameter_state(parser, &parser->current, &order);
14640 } else {
14641 update_parameter_state(parser, &parser->previous, &order);
14642 }
14643
14644 pm_token_t name = parser->previous;
14645 bool repeated = pm_parser_parameter_name_check(parser, &name);
14646 pm_parser_local_add_token(parser, &name, 1);
14647
14648 if (match1(parser, PM_TOKEN_EQUAL)) {
14649 pm_token_t operator = parser->current;
14650 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14651 parser_lex(parser);
14652
14653 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14654 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14655
14656 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14657 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14658 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14659
14660 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14661
14662 if (repeated) {
14663 pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14664 }
14665 pm_parameters_node_optionals_append(params, param);
14666
14667 // If the value of the parameter increased the number of
14668 // reads of that parameter, then we need to warn that we
14669 // have a circular definition.
14670 if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14671 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14672 }
14673
14674 context_pop(parser);
14675
14676 // If parsing the value of the parameter resulted in error recovery,
14677 // then we can put a missing node in its place and stop parsing the
14678 // parameters entirely now.
14679 if (parser->recovering) {
14680 parsing = false;
14681 break;
14682 }
14683 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14684 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14685 if (repeated) {
14686 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14687 }
14688 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14689 } else {
14690 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14691 if (repeated) {
14692 pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14693 }
14694 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14695 }
14696
14697 break;
14698 }
14699 case PM_TOKEN_LABEL: {
14700 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14701 update_parameter_state(parser, &parser->current, &order);
14702
14703 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14704 parser_lex(parser);
14705
14706 pm_token_t name = parser->previous;
14707 pm_token_t local = name;
14708 local.end -= 1;
14709
14710 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14711 pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14712 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14713 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14714 }
14715
14716 bool repeated = pm_parser_parameter_name_check(parser, &local);
14717 pm_parser_local_add_token(parser, &local, 1);
14718
14719 switch (parser->current.type) {
14720 case PM_TOKEN_COMMA:
14722 case PM_TOKEN_PIPE: {
14723 context_pop(parser);
14724
14725 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14726 if (repeated) {
14727 pm_node_flag_set_repeated_parameter(param);
14728 }
14729
14730 pm_parameters_node_keywords_append(params, param);
14731 break;
14732 }
14733 case PM_TOKEN_SEMICOLON:
14734 case PM_TOKEN_NEWLINE: {
14735 context_pop(parser);
14736
14737 if (uses_parentheses) {
14738 parsing = false;
14739 break;
14740 }
14741
14742 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14743 if (repeated) {
14744 pm_node_flag_set_repeated_parameter(param);
14745 }
14746
14747 pm_parameters_node_keywords_append(params, param);
14748 break;
14749 }
14750 default: {
14751 pm_node_t *param;
14752
14753 if (token_begins_expression_p(parser->current.type)) {
14754 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14755 uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14756
14757 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14758 pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14759 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14760
14761 if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14762 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14763 }
14764
14765 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14766 }
14767 else {
14768 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14769 }
14770
14771 if (repeated) {
14772 pm_node_flag_set_repeated_parameter(param);
14773 }
14774
14775 context_pop(parser);
14776 pm_parameters_node_keywords_append(params, param);
14777
14778 // If parsing the value of the parameter resulted in error recovery,
14779 // then we can put a missing node in its place and stop parsing the
14780 // parameters entirely now.
14781 if (parser->recovering) {
14782 parsing = false;
14783 break;
14784 }
14785 }
14786 }
14787
14788 parser->in_keyword_arg = false;
14789 break;
14790 }
14791 case PM_TOKEN_USTAR:
14792 case PM_TOKEN_STAR: {
14793 update_parameter_state(parser, &parser->current, &order);
14794 parser_lex(parser);
14795
14796 pm_token_t operator = parser->previous;
14797 pm_token_t name;
14798 bool repeated = false;
14799
14800 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14801 name = parser->previous;
14802 repeated = pm_parser_parameter_name_check(parser, &name);
14803 pm_parser_local_add_token(parser, &name, 1);
14804 } else {
14805 name = not_provided(parser);
14806 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14807 }
14808
14809 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14810 if (repeated) {
14811 pm_node_flag_set_repeated_parameter(param);
14812 }
14813
14814 if (params->rest == NULL) {
14815 pm_parameters_node_rest_set(params, param);
14816 } else {
14817 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14818 pm_parameters_node_posts_append(params, param);
14819 }
14820
14821 break;
14822 }
14823 case PM_TOKEN_STAR_STAR:
14824 case PM_TOKEN_USTAR_STAR: {
14825 pm_parameters_order_t previous_order = order;
14826 update_parameter_state(parser, &parser->current, &order);
14827 parser_lex(parser);
14828
14829 pm_token_t operator = parser->previous;
14830 pm_node_t *param;
14831
14832 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14833 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14834 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14835 }
14836
14837 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14838 } else {
14839 pm_token_t name;
14840
14841 bool repeated = false;
14842 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14843 name = parser->previous;
14844 repeated = pm_parser_parameter_name_check(parser, &name);
14845 pm_parser_local_add_token(parser, &name, 1);
14846 } else {
14847 name = not_provided(parser);
14848 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14849 }
14850
14851 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14852 if (repeated) {
14853 pm_node_flag_set_repeated_parameter(param);
14854 }
14855 }
14856
14857 if (params->keyword_rest == NULL) {
14858 pm_parameters_node_keyword_rest_set(params, param);
14859 } else {
14860 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14861 pm_parameters_node_posts_append(params, param);
14862 }
14863
14864 break;
14865 }
14866 default:
14867 if (parser->previous.type == PM_TOKEN_COMMA) {
14868 if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14869 // If we get here, then we have a trailing comma in a
14870 // block parameter list.
14871 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14872
14873 if (params->rest == NULL) {
14874 pm_parameters_node_rest_set(params, param);
14875 } else {
14876 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14877 pm_parameters_node_posts_append(params, (pm_node_t *) param);
14878 }
14879 } else {
14880 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14881 }
14882 }
14883
14884 parsing = false;
14885 break;
14886 }
14887
14888 // If we hit some kind of issue while parsing the parameter, this would
14889 // have been set to false. In that case, we need to break out of the
14890 // loop.
14891 if (!parsing) break;
14892
14893 bool accepted_newline = false;
14894 if (uses_parentheses) {
14895 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14896 }
14897
14898 if (accept1(parser, PM_TOKEN_COMMA)) {
14899 // If there was a comma, but we also accepted a newline, then this
14900 // is a syntax error.
14901 if (accepted_newline) {
14902 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14903 }
14904 } else {
14905 // If there was no comma, then we're done parsing parameters.
14906 break;
14907 }
14908 }
14909
14910 pm_do_loop_stack_pop(parser);
14911
14912 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14913 if (params->base.location.start == params->base.location.end) {
14914 pm_node_destroy(parser, (pm_node_t *) params);
14915 return NULL;
14916 }
14917
14918 return params;
14919}
14920
14925static size_t
14926token_newline_index(const pm_parser_t *parser) {
14927 if (parser->heredoc_end == NULL) {
14928 // This is the common case. In this case we can look at the previously
14929 // recorded newline in the newline list and subtract from the current
14930 // offset.
14931 return parser->newline_list.size - 1;
14932 } else {
14933 // This is unlikely. This is the case that we have already parsed the
14934 // start of a heredoc, so we cannot rely on looking at the previous
14935 // offset of the newline list, and instead must go through the whole
14936 // process of a binary search for the line number.
14937 return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14938 }
14939}
14940
14945static int64_t
14946token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14947 const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14948 const uint8_t *end = token->start;
14949
14950 // Skip over the BOM if it is present.
14951 if (
14952 newline_index == 0 &&
14953 parser->start[0] == 0xef &&
14954 parser->start[1] == 0xbb &&
14955 parser->start[2] == 0xbf
14956 ) cursor += 3;
14957
14958 int64_t column = 0;
14959 for (; cursor < end; cursor++) {
14960 switch (*cursor) {
14961 case '\t':
14962 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14963 break;
14964 case ' ':
14965 column++;
14966 break;
14967 default:
14968 column++;
14969 if (break_on_non_space) return -1;
14970 break;
14971 }
14972 }
14973
14974 return column;
14975}
14976
14981static void
14982parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14983 // If these warnings are disabled (unlikely), then we can just return.
14984 if (!parser->warn_mismatched_indentation) return;
14985
14986 // If the tokens are on the same line, we do not warn.
14987 size_t closing_newline_index = token_newline_index(parser);
14988 if (opening_newline_index == closing_newline_index) return;
14989
14990 // If the opening token has anything other than spaces or tabs before it,
14991 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14992 // and the `if` immediately follows an `else` keyword.
14993 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14994 if (!if_after_else && (opening_column == -1)) return;
14995
14996 // Get a reference to the closing token off the current parser. This assumes
14997 // that the caller has placed this in the correct position.
14998 pm_token_t *closing_token = &parser->current;
14999
15000 // If the tokens are at the same indentation, we do not warn.
15001 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
15002 if ((closing_column == -1) || (opening_column == closing_column)) return;
15003
15004 // If the closing column is greater than the opening column and we are
15005 // allowing indentation, then we do not warn.
15006 if (allow_indent && (closing_column > opening_column)) return;
15007
15008 // Otherwise, add a warning.
15009 PM_PARSER_WARN_FORMAT(
15010 parser,
15011 closing_token->start,
15012 closing_token->end,
15013 PM_WARN_INDENTATION_MISMATCH,
15014 (int) (closing_token->end - closing_token->start),
15015 (const char *) closing_token->start,
15016 (int) (opening_token->end - opening_token->start),
15017 (const char *) opening_token->start,
15018 ((int32_t) opening_newline_index) + parser->start_line
15019 );
15020}
15021
15022typedef enum {
15023 PM_RESCUES_BEGIN = 1,
15024 PM_RESCUES_BLOCK,
15025 PM_RESCUES_CLASS,
15026 PM_RESCUES_DEF,
15027 PM_RESCUES_LAMBDA,
15028 PM_RESCUES_MODULE,
15029 PM_RESCUES_SCLASS
15030} pm_rescues_type_t;
15031
15036static inline void
15037parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15038 pm_rescue_node_t *current = NULL;
15039
15040 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15041 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15042 parser_lex(parser);
15043
15044 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15045
15046 switch (parser->current.type) {
15048 // Here we have an immediate => after the rescue keyword, in which case
15049 // we're going to have an empty list of exceptions to rescue (which
15050 // implies StandardError).
15051 parser_lex(parser);
15052 pm_rescue_node_operator_set(rescue, &parser->previous);
15053
15054 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15055 reference = parse_target(parser, reference, false, false);
15056
15057 pm_rescue_node_reference_set(rescue, reference);
15058 break;
15059 }
15060 case PM_TOKEN_NEWLINE:
15061 case PM_TOKEN_SEMICOLON:
15063 // Here we have a terminator for the rescue keyword, in which case we're
15064 // going to just continue on.
15065 break;
15066 default: {
15067 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15068 // Here we have something that could be an exception expression, so
15069 // we'll attempt to parse it here and any others delimited by commas.
15070
15071 do {
15072 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15073 pm_rescue_node_exceptions_append(rescue, expression);
15074
15075 // If we hit a newline, then this is the end of the rescue expression. We
15076 // can continue on to parse the statements.
15077 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15078
15079 // If we hit a `=>` then we're going to parse the exception variable. Once
15080 // we've done that, we'll break out of the loop and parse the statements.
15081 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15082 pm_rescue_node_operator_set(rescue, &parser->previous);
15083
15084 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15085 reference = parse_target(parser, reference, false, false);
15086
15087 pm_rescue_node_reference_set(rescue, reference);
15088 break;
15089 }
15090 } while (accept1(parser, PM_TOKEN_COMMA));
15091 }
15092 }
15093 }
15094
15095 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15096 accept1(parser, PM_TOKEN_KEYWORD_THEN);
15097 } else {
15098 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15099 }
15100
15102 pm_accepts_block_stack_push(parser, true);
15103 pm_context_t context;
15104
15105 switch (type) {
15106 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15107 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15108 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15109 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15110 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15111 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15112 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15113 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15114 }
15115
15116 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15117 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15118
15119 pm_accepts_block_stack_pop(parser);
15120 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15121 }
15122
15123 if (current == NULL) {
15124 pm_begin_node_rescue_clause_set(parent_node, rescue);
15125 } else {
15126 pm_rescue_node_subsequent_set(current, rescue);
15127 }
15128
15129 current = rescue;
15130 }
15131
15132 // The end node locations on rescue nodes will not be set correctly
15133 // since we won't know the end until we've found all subsequent
15134 // clauses. This sets the end location on all rescues once we know it.
15135 if (current != NULL) {
15136 const uint8_t *end_to_set = current->base.location.end;
15137 pm_rescue_node_t *clause = parent_node->rescue_clause;
15138
15139 while (clause != NULL) {
15140 clause->base.location.end = end_to_set;
15141 clause = clause->subsequent;
15142 }
15143 }
15144
15145 pm_token_t else_keyword;
15146 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15147 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15148 opening_newline_index = token_newline_index(parser);
15149
15150 else_keyword = parser->current;
15151 opening = &else_keyword;
15152
15153 parser_lex(parser);
15154 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15155
15156 pm_statements_node_t *else_statements = NULL;
15157 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15158 pm_accepts_block_stack_push(parser, true);
15159 pm_context_t context;
15160
15161 switch (type) {
15162 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15163 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15164 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15165 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15166 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15167 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15168 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15169 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15170 }
15171
15172 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15173 pm_accepts_block_stack_pop(parser);
15174
15175 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15176 }
15177
15178 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15179 pm_begin_node_else_clause_set(parent_node, else_clause);
15180
15181 // If we don't have a `current` rescue node, then this is a dangling
15182 // else, and it's an error.
15183 if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15184 }
15185
15186 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15187 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15188 pm_token_t ensure_keyword = parser->current;
15189
15190 parser_lex(parser);
15191 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15192
15193 pm_statements_node_t *ensure_statements = NULL;
15194 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15195 pm_accepts_block_stack_push(parser, true);
15196 pm_context_t context;
15197
15198 switch (type) {
15199 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15200 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15201 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15202 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15203 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15204 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15205 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15206 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15207 }
15208
15209 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15210 pm_accepts_block_stack_pop(parser);
15211
15212 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15213 }
15214
15215 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15216 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15217 }
15218
15219 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15220 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15221 pm_begin_node_end_keyword_set(parent_node, &parser->current);
15222 } else {
15223 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15224 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15225 }
15226}
15227
15232static pm_begin_node_t *
15233parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15234 pm_token_t begin_keyword = not_provided(parser);
15235 pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15236
15237 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15238 node->base.location.start = start;
15239
15240 return node;
15241}
15242
15247parse_block_parameters(
15248 pm_parser_t *parser,
15249 bool allows_trailing_comma,
15250 const pm_token_t *opening,
15251 bool is_lambda_literal,
15252 bool accepts_blocks_in_defaults,
15253 uint16_t depth
15254) {
15255 pm_parameters_node_t *parameters = NULL;
15256 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15257 parameters = parse_parameters(
15258 parser,
15259 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15260 false,
15261 allows_trailing_comma,
15262 false,
15263 accepts_blocks_in_defaults,
15264 true,
15265 (uint16_t) (depth + 1)
15266 );
15267 }
15268
15269 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15270 if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15271 accept1(parser, PM_TOKEN_NEWLINE);
15272
15273 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15274 do {
15275 switch (parser->current.type) {
15276 case PM_TOKEN_CONSTANT:
15277 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15278 parser_lex(parser);
15279 break;
15281 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15282 parser_lex(parser);
15283 break;
15285 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15286 parser_lex(parser);
15287 break;
15289 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15290 parser_lex(parser);
15291 break;
15292 default:
15293 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15294 break;
15295 }
15296
15297 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15298 pm_parser_local_add_token(parser, &parser->previous, 1);
15299
15300 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15301 if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15302
15303 pm_block_parameters_node_append_local(block_parameters, local);
15304 } while (accept1(parser, PM_TOKEN_COMMA));
15305 }
15306 }
15307
15308 return block_parameters;
15309}
15310
15315static bool
15316outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15317 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15318 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15319 }
15320
15321 return false;
15322}
15323
15329static const char * const pm_numbered_parameter_names[] = {
15330 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15331};
15332
15338static pm_node_t *
15339parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15340 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15341
15342 // If we have ordinary parameters, then we will return them as the set of
15343 // parameters.
15344 if (parameters != NULL) {
15345 // If we also have implicit parameters, then this is an error.
15346 if (implicit_parameters->size > 0) {
15347 pm_node_t *node = implicit_parameters->nodes[0];
15348
15350 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15352 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15353 } else {
15354 assert(false && "unreachable");
15355 }
15356 }
15357
15358 return parameters;
15359 }
15360
15361 // If we don't have any implicit parameters, then the set of parameters is
15362 // NULL.
15363 if (implicit_parameters->size == 0) {
15364 return NULL;
15365 }
15366
15367 // If we don't have ordinary parameters, then we now must validate our set
15368 // of implicit parameters. We can only have numbered parameters or it, but
15369 // they cannot be mixed.
15370 uint8_t numbered_parameter = 0;
15371 bool it_parameter = false;
15372
15373 for (size_t index = 0; index < implicit_parameters->size; index++) {
15374 pm_node_t *node = implicit_parameters->nodes[index];
15375
15377 if (it_parameter) {
15378 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15379 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15380 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15381 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15382 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15383 } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15384 numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15385 } else {
15386 assert(false && "unreachable");
15387 }
15389 if (numbered_parameter > 0) {
15390 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15391 } else {
15392 it_parameter = true;
15393 }
15394 }
15395 }
15396
15397 if (numbered_parameter > 0) {
15398 // Go through the parent scopes and mark them as being disallowed from
15399 // using numbered parameters because this inner scope is using them.
15400 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15401 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15402 }
15403
15404 const pm_location_t location = { .start = opening->start, .end = closing->end };
15405 return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15406 }
15407
15408 if (it_parameter) {
15409 return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15410 }
15411
15412 return NULL;
15413}
15414
15418static pm_block_node_t *
15419parse_block(pm_parser_t *parser, uint16_t depth) {
15420 pm_token_t opening = parser->previous;
15421 accept1(parser, PM_TOKEN_NEWLINE);
15422
15423 pm_accepts_block_stack_push(parser, true);
15424 pm_parser_scope_push(parser, false);
15425
15426 pm_block_parameters_node_t *block_parameters = NULL;
15427
15428 if (accept1(parser, PM_TOKEN_PIPE)) {
15429 pm_token_t block_parameters_opening = parser->previous;
15430 if (match1(parser, PM_TOKEN_PIPE)) {
15431 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15432 parser->command_start = true;
15433 parser_lex(parser);
15434 } else {
15435 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15436 accept1(parser, PM_TOKEN_NEWLINE);
15437 parser->command_start = true;
15438 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15439 }
15440
15441 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15442 }
15443
15444 accept1(parser, PM_TOKEN_NEWLINE);
15445 pm_node_t *statements = NULL;
15446
15447 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15448 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15449 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15450 }
15451
15452 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15453 } else {
15454 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15456 pm_accepts_block_stack_push(parser, true);
15457 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15458 pm_accepts_block_stack_pop(parser);
15459 }
15460
15461 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15462 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15463 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15464 }
15465 }
15466
15467 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15468 }
15469
15470 pm_constant_id_list_t locals;
15471 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15472 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15473
15474 pm_parser_scope_pop(parser);
15475 pm_accepts_block_stack_pop(parser);
15476
15477 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15478}
15479
15485static bool
15486parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15487 bool found = false;
15488
15489 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15490 found |= true;
15491 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15492
15493 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15494 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15495 } else {
15496 pm_accepts_block_stack_push(parser, true);
15497 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15498
15499 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15500 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15501 parser->previous.start = parser->previous.end;
15502 parser->previous.type = PM_TOKEN_MISSING;
15503 }
15504
15505 pm_accepts_block_stack_pop(parser);
15506 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15507 }
15508 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15509 found |= true;
15510 pm_accepts_block_stack_push(parser, false);
15511
15512 // If we get here, then the subsequent token cannot be used as an infix
15513 // operator. In this case we assume the subsequent token is part of an
15514 // argument to this method call.
15515 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15516
15517 // If we have done with the arguments and still not consumed the comma,
15518 // then we have a trailing comma where we need to check whether it is
15519 // allowed or not.
15520 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15521 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15522 }
15523
15524 pm_accepts_block_stack_pop(parser);
15525 }
15526
15527 // If we're at the end of the arguments, we can now check if there is a block
15528 // node that starts with a {. If there is, then we can parse it and add it to
15529 // the arguments.
15530 if (accepts_block) {
15531 pm_block_node_t *block = NULL;
15532
15533 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15534 found |= true;
15535 block = parse_block(parser, (uint16_t) (depth + 1));
15536 pm_arguments_validate_block(parser, arguments, block);
15537 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15538 found |= true;
15539 block = parse_block(parser, (uint16_t) (depth + 1));
15540 }
15541
15542 if (block != NULL) {
15543 if (arguments->block == NULL && !arguments->has_forwarding) {
15544 arguments->block = (pm_node_t *) block;
15545 } else {
15546 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15547
15548 if (arguments->block != NULL) {
15549 if (arguments->arguments == NULL) {
15550 arguments->arguments = pm_arguments_node_create(parser);
15551 }
15552 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15553 }
15554 arguments->block = (pm_node_t *) block;
15555 }
15556 }
15557 }
15558
15559 return found;
15560}
15561
15566static void
15567parse_return(pm_parser_t *parser, pm_node_t *node) {
15568 bool in_sclass = false;
15569 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15570 switch (context_node->context) {
15574 case PM_CONTEXT_BEGIN:
15575 case PM_CONTEXT_CASE_IN:
15578 case PM_CONTEXT_DEFINED:
15579 case PM_CONTEXT_ELSE:
15580 case PM_CONTEXT_ELSIF:
15581 case PM_CONTEXT_EMBEXPR:
15583 case PM_CONTEXT_FOR:
15584 case PM_CONTEXT_IF:
15586 case PM_CONTEXT_MAIN:
15588 case PM_CONTEXT_PARENS:
15589 case PM_CONTEXT_POSTEXE:
15591 case PM_CONTEXT_PREEXE:
15593 case PM_CONTEXT_TERNARY:
15594 case PM_CONTEXT_UNLESS:
15595 case PM_CONTEXT_UNTIL:
15596 case PM_CONTEXT_WHILE:
15597 // Keep iterating up the lists of contexts, because returns can
15598 // see through these.
15599 continue;
15603 case PM_CONTEXT_SCLASS:
15604 in_sclass = true;
15605 continue;
15609 case PM_CONTEXT_CLASS:
15613 case PM_CONTEXT_MODULE:
15614 // These contexts are invalid for a return.
15615 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15616 return;
15626 case PM_CONTEXT_DEF:
15632 // These contexts are valid for a return, and we should not
15633 // continue to loop.
15634 return;
15635 case PM_CONTEXT_NONE:
15636 // This case should never happen.
15637 assert(false && "unreachable");
15638 break;
15639 }
15640 }
15641 if (in_sclass) {
15642 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15643 }
15644}
15645
15650static void
15651parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15652 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15653 switch (context_node->context) {
15659 case PM_CONTEXT_DEFINED:
15660 case PM_CONTEXT_FOR:
15667 case PM_CONTEXT_POSTEXE:
15668 case PM_CONTEXT_UNTIL:
15669 case PM_CONTEXT_WHILE:
15670 // These are the good cases. We're allowed to have a block exit
15671 // in these contexts.
15672 return;
15673 case PM_CONTEXT_DEF:
15678 case PM_CONTEXT_MAIN:
15679 case PM_CONTEXT_PREEXE:
15680 case PM_CONTEXT_SCLASS:
15684 // These are the bad cases. We're not allowed to have a block
15685 // exit in these contexts.
15686 //
15687 // If we get here, then we're about to mark this block exit
15688 // as invalid. However, it could later _become_ valid if we
15689 // find a trailing while/until on the expression. In this
15690 // case instead of adding the error here, we'll add the
15691 // block exit to the list of exits for the expression, and
15692 // the node parsing will handle validating it instead.
15693 assert(parser->current_block_exits != NULL);
15694 pm_node_list_append(parser->current_block_exits, node);
15695 return;
15699 case PM_CONTEXT_BEGIN:
15700 case PM_CONTEXT_CASE_IN:
15705 case PM_CONTEXT_CLASS:
15707 case PM_CONTEXT_ELSE:
15708 case PM_CONTEXT_ELSIF:
15709 case PM_CONTEXT_EMBEXPR:
15711 case PM_CONTEXT_IF:
15715 case PM_CONTEXT_MODULE:
15717 case PM_CONTEXT_PARENS:
15720 case PM_CONTEXT_TERNARY:
15721 case PM_CONTEXT_UNLESS:
15722 // In these contexts we should continue walking up the list of
15723 // contexts.
15724 break;
15725 case PM_CONTEXT_NONE:
15726 // This case should never happen.
15727 assert(false && "unreachable");
15728 break;
15729 }
15730 }
15731}
15732
15737static pm_node_list_t *
15738push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15739 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15740 parser->current_block_exits = current_block_exits;
15741 return previous_block_exits;
15742}
15743
15749static void
15750flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15751 pm_node_t *block_exit;
15752 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15753 const char *type;
15754
15755 switch (PM_NODE_TYPE(block_exit)) {
15756 case PM_BREAK_NODE: type = "break"; break;
15757 case PM_NEXT_NODE: type = "next"; break;
15758 case PM_REDO_NODE: type = "redo"; break;
15759 default: assert(false && "unreachable"); type = ""; break;
15760 }
15761
15762 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15763 }
15764
15765 parser->current_block_exits = previous_block_exits;
15766}
15767
15772static void
15773pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15775 // If we matched a trailing while/until, then all of the block exits in
15776 // the contained list are valid. In this case we do not need to do
15777 // anything.
15778 parser->current_block_exits = previous_block_exits;
15779 } else if (previous_block_exits != NULL) {
15780 // If we did not matching a trailing while/until, then all of the block
15781 // exits contained in the list are invalid for this specific context.
15782 // However, they could still become valid in a higher level context if
15783 // there is another list above this one. In this case we'll push all of
15784 // the block exits up to the previous list.
15785 pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15786 parser->current_block_exits = previous_block_exits;
15787 } else {
15788 // If we did not match a trailing while/until and this was the last
15789 // chance to do so, then all of the block exits in the list are invalid
15790 // and we need to add an error for each of them.
15791 flush_block_exits(parser, previous_block_exits);
15792 }
15793}
15794
15795static inline pm_node_t *
15796parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15797 context_push(parser, PM_CONTEXT_PREDICATE);
15798 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15799 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15800
15801 // Predicates are closed by a term, a "then", or a term and then a "then".
15802 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15803
15804 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15805 predicate_closed = true;
15806 *then_keyword = parser->previous;
15807 }
15808
15809 if (!predicate_closed) {
15810 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15811 }
15812
15813 context_pop(parser);
15814 return predicate;
15815}
15816
15817static inline pm_node_t *
15818parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15819 pm_node_list_t current_block_exits = { 0 };
15820 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15821
15822 pm_token_t keyword = parser->previous;
15823 pm_token_t then_keyword = not_provided(parser);
15824
15825 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15826 pm_statements_node_t *statements = NULL;
15827
15829 pm_accepts_block_stack_push(parser, true);
15830 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15831 pm_accepts_block_stack_pop(parser);
15832 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15833 }
15834
15835 pm_token_t end_keyword = not_provided(parser);
15836 pm_node_t *parent = NULL;
15837
15838 switch (context) {
15839 case PM_CONTEXT_IF:
15840 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15841 break;
15842 case PM_CONTEXT_UNLESS:
15843 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15844 break;
15845 default:
15846 assert(false && "unreachable");
15847 break;
15848 }
15849
15850 pm_node_t *current = parent;
15851
15852 // Parse any number of elsif clauses. This will form a linked list of if
15853 // nodes pointing to each other from the top.
15854 if (context == PM_CONTEXT_IF) {
15855 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15856 if (parser_end_of_line_p(parser)) {
15857 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15858 }
15859
15860 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15861 pm_token_t elsif_keyword = parser->current;
15862 parser_lex(parser);
15863
15864 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15865 pm_accepts_block_stack_push(parser, true);
15866
15867 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15868 pm_accepts_block_stack_pop(parser);
15869 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15870
15871 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15872 ((pm_if_node_t *) current)->subsequent = elsif;
15873 current = elsif;
15874 }
15875 }
15876
15877 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15878 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15879 opening_newline_index = token_newline_index(parser);
15880
15881 parser_lex(parser);
15882 pm_token_t else_keyword = parser->previous;
15883
15884 pm_accepts_block_stack_push(parser, true);
15885 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15886 pm_accepts_block_stack_pop(parser);
15887
15888 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15889 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15890 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15891
15892 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15893
15894 switch (context) {
15895 case PM_CONTEXT_IF:
15896 ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15897 break;
15898 case PM_CONTEXT_UNLESS:
15899 ((pm_unless_node_t *) parent)->else_clause = else_node;
15900 break;
15901 default:
15902 assert(false && "unreachable");
15903 break;
15904 }
15905 } else {
15906 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15907 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15908 }
15909
15910 // Set the appropriate end location for all of the nodes in the subtree.
15911 switch (context) {
15912 case PM_CONTEXT_IF: {
15913 pm_node_t *current = parent;
15914 bool recursing = true;
15915
15916 while (recursing) {
15917 switch (PM_NODE_TYPE(current)) {
15918 case PM_IF_NODE:
15919 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15920 current = ((pm_if_node_t *) current)->subsequent;
15921 recursing = current != NULL;
15922 break;
15923 case PM_ELSE_NODE:
15924 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15925 recursing = false;
15926 break;
15927 default: {
15928 recursing = false;
15929 break;
15930 }
15931 }
15932 }
15933 break;
15934 }
15935 case PM_CONTEXT_UNLESS:
15936 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15937 break;
15938 default:
15939 assert(false && "unreachable");
15940 break;
15941 }
15942
15943 pop_block_exits(parser, previous_block_exits);
15944 pm_node_list_free(&current_block_exits);
15945
15946 return parent;
15947}
15948
15953#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15954 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15955 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15956 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15957 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15958 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15959 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15960 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15961 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15962 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15963 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15964
15969#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15970 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15971 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15972 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15973 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15974 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15975 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15976 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15977
15983#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15984 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15985 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15986 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15987 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15988 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15989 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15990 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15991 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15992
15997#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15998 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15999 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
16000 case PM_TOKEN_CLASS_VARIABLE
16001
16006#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
16007 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
16008 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
16009 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
16010
16011// Assert here that the flags are the same so that we can safely switch the type
16012// of the node without having to move the flags.
16013PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
16014
16019static inline pm_node_flags_t
16020parse_unescaped_encoding(const pm_parser_t *parser) {
16021 if (parser->explicit_encoding != NULL) {
16023 // If the there's an explicit encoding and it's using a UTF-8 escape
16024 // sequence, then mark the string as UTF-8.
16026 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
16027 // If there's a non-UTF-8 escape sequence being used, then the
16028 // string uses the source encoding, unless the source is marked as
16029 // US-ASCII. In that case the string is forced as ASCII-8BIT in
16030 // order to keep the string valid.
16032 }
16033 }
16034 return 0;
16035}
16036
16041static pm_node_t *
16042parse_string_part(pm_parser_t *parser, uint16_t depth) {
16043 switch (parser->current.type) {
16044 // Here the lexer has returned to us plain string content. In this case
16045 // we'll create a string node that has no opening or closing and return that
16046 // as the part. These kinds of parts look like:
16047 //
16048 // "aaa #{bbb} #@ccc ddd"
16049 // ^^^^ ^ ^^^^
16051 pm_token_t opening = not_provided(parser);
16052 pm_token_t closing = not_provided(parser);
16053
16054 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16055 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16056
16057 parser_lex(parser);
16058 return node;
16059 }
16060 // Here the lexer has returned the beginning of an embedded expression. In
16061 // that case we'll parse the inner statements and return that as the part.
16062 // These kinds of parts look like:
16063 //
16064 // "aaa #{bbb} #@ccc ddd"
16065 // ^^^^^^
16067 // Ruby disallows seeing encoding around interpolation in strings,
16068 // even though it is known at parse time.
16069 parser->explicit_encoding = NULL;
16070
16071 pm_lex_state_t state = parser->lex_state;
16072 int brace_nesting = parser->brace_nesting;
16073
16074 parser->brace_nesting = 0;
16075 lex_state_set(parser, PM_LEX_STATE_BEG);
16076 parser_lex(parser);
16077
16078 pm_token_t opening = parser->previous;
16079 pm_statements_node_t *statements = NULL;
16080
16081 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16082 pm_accepts_block_stack_push(parser, true);
16083 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16084 pm_accepts_block_stack_pop(parser);
16085 }
16086
16087 parser->brace_nesting = brace_nesting;
16088 lex_state_set(parser, state);
16089
16090 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16091 pm_token_t closing = parser->previous;
16092
16093 // If this set of embedded statements only contains a single
16094 // statement, then Ruby does not consider it as a possible statement
16095 // that could emit a line event.
16096 if (statements != NULL && statements->body.size == 1) {
16097 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16098 }
16099
16100 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16101 }
16102
16103 // Here the lexer has returned the beginning of an embedded variable.
16104 // In that case we'll parse the variable and create an appropriate node
16105 // for it and then return that node. These kinds of parts look like:
16106 //
16107 // "aaa #{bbb} #@ccc ddd"
16108 // ^^^^^
16109 case PM_TOKEN_EMBVAR: {
16110 // Ruby disallows seeing encoding around interpolation in strings,
16111 // even though it is known at parse time.
16112 parser->explicit_encoding = NULL;
16113
16114 lex_state_set(parser, PM_LEX_STATE_BEG);
16115 parser_lex(parser);
16116
16117 pm_token_t operator = parser->previous;
16118 pm_node_t *variable;
16119
16120 switch (parser->current.type) {
16121 // In this case a back reference is being interpolated. We'll
16122 // create a global variable read node.
16124 parser_lex(parser);
16125 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16126 break;
16127 // In this case an nth reference is being interpolated. We'll
16128 // create a global variable read node.
16130 parser_lex(parser);
16131 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16132 break;
16133 // In this case a global variable is being interpolated. We'll
16134 // create a global variable read node.
16136 parser_lex(parser);
16137 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16138 break;
16139 // In this case an instance variable is being interpolated.
16140 // We'll create an instance variable read node.
16142 parser_lex(parser);
16143 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16144 break;
16145 // In this case a class variable is being interpolated. We'll
16146 // create a class variable read node.
16148 parser_lex(parser);
16149 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16150 break;
16151 // We can hit here if we got an invalid token. In that case
16152 // we'll not attempt to lex this token and instead just return a
16153 // missing node.
16154 default:
16155 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16156 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16157 break;
16158 }
16159
16160 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16161 }
16162 default:
16163 parser_lex(parser);
16164 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16165 return NULL;
16166 }
16167}
16168
16174static const uint8_t *
16175parse_operator_symbol_name(const pm_token_t *name) {
16176 switch (name->type) {
16177 case PM_TOKEN_TILDE:
16178 case PM_TOKEN_BANG:
16179 if (name->end[-1] == '@') return name->end - 1;
16180 /* fallthrough */
16181 default:
16182 return name->end;
16183 }
16184}
16185
16186static pm_node_t *
16187parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16188 pm_token_t closing = not_provided(parser);
16189 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16190
16191 const uint8_t *end = parse_operator_symbol_name(&parser->current);
16192
16193 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16194 parser_lex(parser);
16195
16196 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16197 pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16198
16199 return (pm_node_t *) symbol;
16200}
16201
16207static pm_node_t *
16208parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16209 const pm_token_t opening = parser->previous;
16210
16211 if (lex_mode->mode != PM_LEX_STRING) {
16212 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16213
16214 switch (parser->current.type) {
16215 case PM_CASE_OPERATOR:
16216 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16218 case PM_TOKEN_CONSTANT:
16225 case PM_CASE_KEYWORD:
16226 parser_lex(parser);
16227 break;
16228 default:
16229 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16230 break;
16231 }
16232
16233 pm_token_t closing = not_provided(parser);
16234 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16235
16236 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16237 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16238
16239 return (pm_node_t *) symbol;
16240 }
16241
16242 if (lex_mode->as.string.interpolation) {
16243 // If we have the end of the symbol, then we can return an empty symbol.
16244 if (match1(parser, PM_TOKEN_STRING_END)) {
16245 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16246 parser_lex(parser);
16247
16248 pm_token_t content = not_provided(parser);
16249 pm_token_t closing = parser->previous;
16250 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16251 }
16252
16253 // Now we can parse the first part of the symbol.
16254 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16255
16256 // If we got a string part, then it's possible that we could transform
16257 // what looks like an interpolated symbol into a regular symbol.
16258 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16259 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16260 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16261
16262 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16263 }
16264
16265 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16266 if (part) pm_interpolated_symbol_node_append(symbol, part);
16267
16268 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16269 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16270 pm_interpolated_symbol_node_append(symbol, part);
16271 }
16272 }
16273
16274 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16275 if (match1(parser, PM_TOKEN_EOF)) {
16276 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16277 } else {
16278 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16279 }
16280
16281 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16282 return (pm_node_t *) symbol;
16283 }
16284
16285 pm_token_t content;
16286 pm_string_t unescaped;
16287
16288 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16289 content = parser->current;
16290 unescaped = parser->current_string;
16291 parser_lex(parser);
16292
16293 // If we have two string contents in a row, then the content of this
16294 // symbol is split because of heredoc contents. This looks like:
16295 //
16296 // <<A; :'a
16297 // A
16298 // b'
16299 //
16300 // In this case, the best way we have to represent this is as an
16301 // interpolated string node, so that's what we'll do here.
16302 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16303 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16304 pm_token_t bounds = not_provided(parser);
16305
16306 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16307 pm_interpolated_symbol_node_append(symbol, part);
16308
16309 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16310 pm_interpolated_symbol_node_append(symbol, part);
16311
16312 if (next_state != PM_LEX_STATE_NONE) {
16313 lex_state_set(parser, next_state);
16314 }
16315
16316 parser_lex(parser);
16317 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16318
16319 pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16320 return (pm_node_t *) symbol;
16321 }
16322 } else {
16323 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16324 pm_string_shared_init(&unescaped, content.start, content.end);
16325 }
16326
16327 if (next_state != PM_LEX_STATE_NONE) {
16328 lex_state_set(parser, next_state);
16329 }
16330
16331 if (match1(parser, PM_TOKEN_EOF)) {
16332 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16333 } else {
16334 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16335 }
16336
16337 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16338}
16339
16344static inline pm_node_t *
16345parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16346 switch (parser->current.type) {
16347 case PM_CASE_OPERATOR: {
16348 const pm_token_t opening = not_provided(parser);
16349 return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16350 }
16351 case PM_CASE_KEYWORD:
16352 case PM_TOKEN_CONSTANT:
16354 case PM_TOKEN_METHOD_NAME: {
16355 parser_lex(parser);
16356
16357 pm_token_t opening = not_provided(parser);
16358 pm_token_t closing = not_provided(parser);
16359 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16360
16361 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16362 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16363
16364 return (pm_node_t *) symbol;
16365 }
16366 case PM_TOKEN_SYMBOL_BEGIN: {
16367 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16368 parser_lex(parser);
16369
16370 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16371 }
16372 default:
16373 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16374 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16375 }
16376}
16377
16384static inline pm_node_t *
16385parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16386 switch (parser->current.type) {
16387 case PM_CASE_OPERATOR: {
16388 const pm_token_t opening = not_provided(parser);
16389 return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16390 }
16391 case PM_CASE_KEYWORD:
16392 case PM_TOKEN_CONSTANT:
16394 case PM_TOKEN_METHOD_NAME: {
16395 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16396 parser_lex(parser);
16397
16398 pm_token_t opening = not_provided(parser);
16399 pm_token_t closing = not_provided(parser);
16400 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16401
16402 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16403 pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16404
16405 return (pm_node_t *) symbol;
16406 }
16407 case PM_TOKEN_SYMBOL_BEGIN: {
16408 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16409 parser_lex(parser);
16410
16411 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16412 }
16414 parser_lex(parser);
16415 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16417 parser_lex(parser);
16418 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16420 parser_lex(parser);
16421 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16422 default:
16423 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16424 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16425 }
16426}
16427
16432static pm_node_t *
16433parse_variable(pm_parser_t *parser) {
16434 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16435 int depth;
16436 bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16437
16438 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16439 return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16440 }
16441
16442 pm_scope_t *current_scope = parser->current_scope;
16443 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16444 if (is_numbered_param) {
16445 // When you use a numbered parameter, it implies the existence of
16446 // all of the locals that exist before it. For example, referencing
16447 // _2 means that _1 must exist. Therefore here we loop through all
16448 // of the possibilities and add them into the constant pool.
16449 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16450 for (uint8_t number = 1; number <= maximum; number++) {
16451 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16452 }
16453
16454 if (!match1(parser, PM_TOKEN_EQUAL)) {
16455 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16456 }
16457
16458 pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16459 pm_node_list_append(&current_scope->implicit_parameters, node);
16460
16461 return node;
16462 } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16463 pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16464 pm_node_list_append(&current_scope->implicit_parameters, node);
16465
16466 return node;
16467 }
16468 }
16469
16470 return NULL;
16471}
16472
16476static pm_node_t *
16477parse_variable_call(pm_parser_t *parser) {
16478 pm_node_flags_t flags = 0;
16479
16480 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16481 pm_node_t *node = parse_variable(parser);
16482 if (node != NULL) return node;
16484 }
16485
16486 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16487 pm_node_flag_set((pm_node_t *)node, flags);
16488
16489 return (pm_node_t *) node;
16490}
16491
16497static inline pm_token_t
16498parse_method_definition_name(pm_parser_t *parser) {
16499 switch (parser->current.type) {
16500 case PM_CASE_KEYWORD:
16501 case PM_TOKEN_CONSTANT:
16503 parser_lex(parser);
16504 return parser->previous;
16506 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16507 parser_lex(parser);
16508 return parser->previous;
16509 case PM_CASE_OPERATOR:
16510 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16511 parser_lex(parser);
16512 return parser->previous;
16513 default:
16514 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16515 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16516 }
16517}
16518
16519static void
16520parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16521 // Get a reference to the string struct that is being held by the string
16522 // node. This is the value we're going to actually manipulate.
16523 pm_string_ensure_owned(string);
16524
16525 // Now get the bounds of the existing string. We'll use this as a
16526 // destination to move bytes into. We'll also use it for bounds checking
16527 // since we don't require that these strings be null terminated.
16528 size_t dest_length = pm_string_length(string);
16529 const uint8_t *source_cursor = (uint8_t *) string->source;
16530 const uint8_t *source_end = source_cursor + dest_length;
16531
16532 // We're going to move bytes backward in the string when we get leading
16533 // whitespace, so we'll maintain a pointer to the current position in the
16534 // string that we're writing to.
16535 size_t trimmed_whitespace = 0;
16536
16537 // While we haven't reached the amount of common whitespace that we need to
16538 // trim and we haven't reached the end of the string, we'll keep trimming
16539 // whitespace. Trimming in this context means skipping over these bytes such
16540 // that they aren't copied into the new string.
16541 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16542 if (*source_cursor == '\t') {
16543 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16544 if (trimmed_whitespace > common_whitespace) break;
16545 } else {
16546 trimmed_whitespace++;
16547 }
16548
16549 source_cursor++;
16550 dest_length--;
16551 }
16552
16553 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16554 string->length = dest_length;
16555}
16556
16560static void
16561parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16562 // The next node should be dedented if it's the first node in the list or if
16563 // it follows a string node.
16564 bool dedent_next = true;
16565
16566 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16567 // keep around two indices: a read and a write. If we end up trimming all of
16568 // the whitespace from a node, then we'll drop it from the list entirely.
16569 size_t write_index = 0;
16570
16571 pm_node_t *node;
16572 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16573 // We're not manipulating child nodes that aren't strings. In this case
16574 // we'll skip past it and indicate that the subsequent node should not
16575 // be dedented.
16576 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16577 nodes->nodes[write_index++] = node;
16578 dedent_next = false;
16579 continue;
16580 }
16581
16582 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16583 if (dedent_next) {
16584 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16585 }
16586
16587 if (string_node->unescaped.length == 0) {
16588 pm_node_destroy(parser, node);
16589 } else {
16590 nodes->nodes[write_index++] = node;
16591 }
16592
16593 // We always dedent the next node if it follows a string node.
16594 dedent_next = true;
16595 }
16596
16597 nodes->size = write_index;
16598}
16599
16603static pm_token_t
16604parse_strings_empty_content(const uint8_t *location) {
16605 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16606}
16607
16611static inline pm_node_t *
16612parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16613 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16614 bool concating = false;
16615
16616 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16617 pm_node_t *node = NULL;
16618
16619 // Here we have found a string literal. We'll parse it and add it to
16620 // the list of strings.
16621 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16622 assert(lex_mode->mode == PM_LEX_STRING);
16623 bool lex_interpolation = lex_mode->as.string.interpolation;
16624 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16625
16626 pm_token_t opening = parser->current;
16627 parser_lex(parser);
16628
16629 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16630 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16631 // If we get here, then we have an end immediately after a
16632 // start. In that case we'll create an empty content token and
16633 // return an uninterpolated string.
16634 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16635 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16636
16637 pm_string_shared_init(&string->unescaped, content.start, content.end);
16638 node = (pm_node_t *) string;
16639 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16640 // If we get here, then we have an end of a label immediately
16641 // after a start. In that case we'll create an empty symbol
16642 // node.
16643 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16644 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16645
16646 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16647 node = (pm_node_t *) symbol;
16648
16649 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16650 } else if (!lex_interpolation) {
16651 // If we don't accept interpolation then we expect the string to
16652 // start with a single string content node.
16653 pm_string_t unescaped;
16654 pm_token_t content;
16655
16656 if (match1(parser, PM_TOKEN_EOF)) {
16657 unescaped = PM_STRING_EMPTY;
16658 content = not_provided(parser);
16659 } else {
16660 unescaped = parser->current_string;
16661 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16662 content = parser->previous;
16663 }
16664
16665 // It is unfortunately possible to have multiple string content
16666 // nodes in a row in the case that there's heredoc content in
16667 // the middle of the string, like this cursed example:
16668 //
16669 // <<-END+'b
16670 // a
16671 // END
16672 // c'+'d'
16673 //
16674 // In that case we need to switch to an interpolated string to
16675 // be able to contain all of the parts.
16676 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16677 pm_node_list_t parts = { 0 };
16678
16679 pm_token_t delimiters = not_provided(parser);
16680 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16681 pm_node_list_append(&parts, part);
16682
16683 do {
16684 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16685 pm_node_list_append(&parts, part);
16686 parser_lex(parser);
16687 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16688
16689 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16690 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16691
16692 pm_node_list_free(&parts);
16693 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16694 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16695 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16696 } else if (match1(parser, PM_TOKEN_EOF)) {
16697 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16698 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16699 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16700 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16701 } else {
16702 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16703 parser->previous.start = parser->previous.end;
16704 parser->previous.type = PM_TOKEN_MISSING;
16705 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16706 }
16707 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16708 // In this case we've hit string content so we know the string
16709 // at least has something in it. We'll need to check if the
16710 // following token is the end (in which case we can return a
16711 // plain string) or if it's not then it has interpolation.
16712 pm_token_t content = parser->current;
16713 pm_string_t unescaped = parser->current_string;
16714 parser_lex(parser);
16715
16716 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16717 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16718 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16719
16720 // Kind of odd behavior, but basically if we have an
16721 // unterminated string and it ends in a newline, we back up one
16722 // character so that the error message is on the last line of
16723 // content in the string.
16724 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16725 const uint8_t *location = parser->previous.end;
16726 if (location > parser->start && location[-1] == '\n') location--;
16727 pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16728
16729 parser->previous.start = parser->previous.end;
16730 parser->previous.type = PM_TOKEN_MISSING;
16731 }
16732 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16733 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16734 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16735 } else {
16736 // If we get here, then we have interpolation so we'll need
16737 // to create a string or symbol node with interpolation.
16738 pm_node_list_t parts = { 0 };
16739 pm_token_t string_opening = not_provided(parser);
16740 pm_token_t string_closing = not_provided(parser);
16741
16742 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16743 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16744 pm_node_list_append(&parts, part);
16745
16746 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16747 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16748 pm_node_list_append(&parts, part);
16749 }
16750 }
16751
16752 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16753 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16754 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16755 } else if (match1(parser, PM_TOKEN_EOF)) {
16756 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16757 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16758 } else {
16759 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16760 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16761 }
16762
16763 pm_node_list_free(&parts);
16764 }
16765 } else {
16766 // If we get here, then the first part of the string is not plain
16767 // string content, in which case we need to parse the string as an
16768 // interpolated string.
16769 pm_node_list_t parts = { 0 };
16770 pm_node_t *part;
16771
16772 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16773 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16774 pm_node_list_append(&parts, part);
16775 }
16776 }
16777
16778 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16779 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16780 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16781 } else if (match1(parser, PM_TOKEN_EOF)) {
16782 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16783 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16784 } else {
16785 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16786 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16787 }
16788
16789 pm_node_list_free(&parts);
16790 }
16791
16792 if (current == NULL) {
16793 // If the node we just parsed is a symbol node, then we can't
16794 // concatenate it with anything else, so we can now return that
16795 // node.
16797 return node;
16798 }
16799
16800 // If we don't already have a node, then it's fine and we can just
16801 // set the result to be the node we just parsed.
16802 current = node;
16803 } else {
16804 // Otherwise we need to check the type of the node we just parsed.
16805 // If it cannot be concatenated with the previous node, then we'll
16806 // need to add a syntax error.
16808 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16809 }
16810
16811 // If we haven't already created our container for concatenation,
16812 // we'll do that now.
16813 if (!concating) {
16814 concating = true;
16815 pm_token_t bounds = not_provided(parser);
16816
16817 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16818 pm_interpolated_string_node_append(container, current);
16819 current = (pm_node_t *) container;
16820 }
16821
16822 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16823 }
16824 }
16825
16826 return current;
16827}
16828
16829#define PM_PARSE_PATTERN_SINGLE 0
16830#define PM_PARSE_PATTERN_TOP 1
16831#define PM_PARSE_PATTERN_MULTI 2
16832
16833static pm_node_t *
16834parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16835
16841static void
16842parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16843 // Skip this capture if it starts with an underscore.
16844 if (*location->start == '_') return;
16845
16846 if (pm_constant_id_list_includes(captures, capture)) {
16847 pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16848 } else {
16849 pm_constant_id_list_append(captures, capture);
16850 }
16851}
16852
16856static pm_node_t *
16857parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16858 // Now, if there are any :: operators that follow, parse them as constant
16859 // path nodes.
16860 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16861 pm_token_t delimiter = parser->previous;
16862 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16863 node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16864 }
16865
16866 // If there is a [ or ( that follows, then this is part of a larger pattern
16867 // expression. We'll parse the inner pattern here, then modify the returned
16868 // inner pattern with our constant path attached.
16869 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16870 return node;
16871 }
16872
16873 pm_token_t opening;
16874 pm_token_t closing;
16875 pm_node_t *inner = NULL;
16876
16877 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16878 opening = parser->previous;
16879 accept1(parser, PM_TOKEN_NEWLINE);
16880
16881 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16882 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16883 accept1(parser, PM_TOKEN_NEWLINE);
16884 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16885 }
16886
16887 closing = parser->previous;
16888 } else {
16889 parser_lex(parser);
16890 opening = parser->previous;
16891 accept1(parser, PM_TOKEN_NEWLINE);
16892
16893 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16894 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16895 accept1(parser, PM_TOKEN_NEWLINE);
16896 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16897 }
16898
16899 closing = parser->previous;
16900 }
16901
16902 if (!inner) {
16903 // If there was no inner pattern, then we have something like Foo() or
16904 // Foo[]. In that case we'll create an array pattern with no requireds.
16905 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16906 }
16907
16908 // Now that we have the inner pattern, check to see if it's an array, find,
16909 // or hash pattern. If it is, then we'll attach our constant path to it if
16910 // it doesn't already have a constant. If it's not one of those node types
16911 // or it does have a constant, then we'll create an array pattern.
16912 switch (PM_NODE_TYPE(inner)) {
16913 case PM_ARRAY_PATTERN_NODE: {
16914 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16915
16916 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16917 pattern_node->base.location.start = node->location.start;
16918 pattern_node->base.location.end = closing.end;
16919
16920 pattern_node->constant = node;
16921 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16922 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16923
16924 return (pm_node_t *) pattern_node;
16925 }
16926
16927 break;
16928 }
16929 case PM_FIND_PATTERN_NODE: {
16930 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16931
16932 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16933 pattern_node->base.location.start = node->location.start;
16934 pattern_node->base.location.end = closing.end;
16935
16936 pattern_node->constant = node;
16937 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16938 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16939
16940 return (pm_node_t *) pattern_node;
16941 }
16942
16943 break;
16944 }
16945 case PM_HASH_PATTERN_NODE: {
16946 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16947
16948 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16949 pattern_node->base.location.start = node->location.start;
16950 pattern_node->base.location.end = closing.end;
16951
16952 pattern_node->constant = node;
16953 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16954 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16955
16956 return (pm_node_t *) pattern_node;
16957 }
16958
16959 break;
16960 }
16961 default:
16962 break;
16963 }
16964
16965 // If we got here, then we didn't return one of the inner patterns by
16966 // attaching its constant. In this case we'll create an array pattern and
16967 // attach our constant to it.
16968 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16969 pm_array_pattern_node_requireds_append(pattern_node, inner);
16970 return (pm_node_t *) pattern_node;
16971}
16972
16976static pm_splat_node_t *
16977parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16978 assert(parser->previous.type == PM_TOKEN_USTAR);
16979 pm_token_t operator = parser->previous;
16980 pm_node_t *name = NULL;
16981
16982 // Rest patterns don't necessarily have a name associated with them. So we
16983 // will check for that here. If they do, then we'll add it to the local
16984 // table since this pattern will cause it to become a local variable.
16985 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16986 pm_token_t identifier = parser->previous;
16987 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16988
16989 int depth;
16990 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16991 pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16992 }
16993
16994 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16995 name = (pm_node_t *) pm_local_variable_target_node_create(
16996 parser,
16997 &PM_LOCATION_TOKEN_VALUE(&identifier),
16998 constant_id,
16999 (uint32_t) (depth == -1 ? 0 : depth)
17000 );
17001 }
17002
17003 // Finally we can return the created node.
17004 return pm_splat_node_create(parser, &operator, name);
17005}
17006
17010static pm_node_t *
17011parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
17012 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
17013 parser_lex(parser);
17014
17015 pm_token_t operator = parser->previous;
17016 pm_node_t *value = NULL;
17017
17018 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
17019 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
17020 }
17021
17022 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
17023 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17024
17025 int depth;
17026 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17027 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17028 }
17029
17030 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17031 value = (pm_node_t *) pm_local_variable_target_node_create(
17032 parser,
17033 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17034 constant_id,
17035 (uint32_t) (depth == -1 ? 0 : depth)
17036 );
17037 }
17038
17039 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17040}
17041
17046static bool
17047pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17048 ptrdiff_t length = end - start;
17049 if (length == 0) return false;
17050
17051 // First ensure that it starts with a valid identifier starting character.
17052 size_t width = char_is_identifier_start(parser, start);
17053 if (width == 0) return false;
17054
17055 // Next, ensure that it's not an uppercase character.
17056 if (parser->encoding_changed) {
17057 if (parser->encoding->isupper_char(start, length)) return false;
17058 } else {
17059 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17060 }
17061
17062 // Next, iterate through all of the bytes of the string to ensure that they
17063 // are all valid identifier characters.
17064 const uint8_t *cursor = start + width;
17065 while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
17066 return cursor == end;
17067}
17068
17073static pm_node_t *
17074parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17075 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17076
17077 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17078 int depth = -1;
17079
17080 if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17081 depth = pm_parser_local_depth_constant_id(parser, constant_id);
17082 } else {
17083 pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17084
17085 if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17086 PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17087 }
17088 }
17089
17090 if (depth == -1) {
17091 pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17092 }
17093
17094 parse_pattern_capture(parser, captures, constant_id, value_loc);
17095 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17096 parser,
17097 value_loc,
17098 constant_id,
17099 (uint32_t) (depth == -1 ? 0 : depth)
17100 );
17101
17102 return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17103}
17104
17109static void
17110parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17111 if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17112 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17113 }
17114}
17115
17120parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17121 pm_node_list_t assocs = { 0 };
17122 pm_static_literals_t keys = { 0 };
17123 pm_node_t *rest = NULL;
17124
17125 switch (PM_NODE_TYPE(first_node)) {
17128 rest = first_node;
17129 break;
17130 case PM_SYMBOL_NODE: {
17131 if (pm_symbol_node_label_p(first_node)) {
17132 parse_pattern_hash_key(parser, &keys, first_node);
17133 pm_node_t *value;
17134
17136 // Otherwise, we will create an implicit local variable
17137 // target for the value.
17138 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17139 } else {
17140 // Here we have a value for the first assoc in the list, so
17141 // we will parse it now.
17142 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17143 }
17144
17145 pm_token_t operator = not_provided(parser);
17146 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17147
17148 pm_node_list_append(&assocs, assoc);
17149 break;
17150 }
17151 }
17152 /* fallthrough */
17153 default: {
17154 // If we get anything else, then this is an error. For this we'll
17155 // create a missing node for the value and create an assoc node for
17156 // the first node in the list.
17157 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17158 pm_parser_err_node(parser, first_node, diag_id);
17159
17160 pm_token_t operator = not_provided(parser);
17161 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17162 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17163
17164 pm_node_list_append(&assocs, assoc);
17165 break;
17166 }
17167 }
17168
17169 // If there are any other assocs, then we'll parse them now.
17170 while (accept1(parser, PM_TOKEN_COMMA)) {
17171 // Here we need to break to support trailing commas.
17173 // Trailing commas are not allowed to follow a rest pattern.
17174 if (rest != NULL) {
17175 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17176 }
17177
17178 break;
17179 }
17180
17181 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17182 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17183
17184 if (rest == NULL) {
17185 rest = assoc;
17186 } else {
17187 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17188 pm_node_list_append(&assocs, assoc);
17189 }
17190 } else {
17191 pm_node_t *key;
17192
17193 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17194 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17195
17197 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17198 } else if (!pm_symbol_node_label_p(key)) {
17199 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17200 }
17201 } else {
17202 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17203 key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17204 }
17205
17206 parse_pattern_hash_key(parser, &keys, key);
17207 pm_node_t *value = NULL;
17208
17210 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17211 } else {
17212 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17213 }
17214
17215 pm_token_t operator = not_provided(parser);
17216 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17217
17218 if (rest != NULL) {
17219 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17220 }
17221
17222 pm_node_list_append(&assocs, assoc);
17223 }
17224 }
17225
17226 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17227 xfree(assocs.nodes);
17228
17229 pm_static_literals_free(&keys);
17230 return node;
17231}
17232
17236static pm_node_t *
17237parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17238 switch (parser->current.type) {
17240 case PM_TOKEN_METHOD_NAME: {
17241 parser_lex(parser);
17242 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17243
17244 int depth;
17245 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17246 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17247 }
17248
17249 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17250 return (pm_node_t *) pm_local_variable_target_node_create(
17251 parser,
17252 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17253 constant_id,
17254 (uint32_t) (depth == -1 ? 0 : depth)
17255 );
17256 }
17258 pm_token_t opening = parser->current;
17259 parser_lex(parser);
17260
17261 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17262 // If we have an empty array pattern, then we'll just return a new
17263 // array pattern node.
17264 return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17265 }
17266
17267 // Otherwise, we'll parse the inner pattern, then deal with it depending
17268 // on the type it returns.
17269 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17270
17271 accept1(parser, PM_TOKEN_NEWLINE);
17272 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17273 pm_token_t closing = parser->previous;
17274
17275 switch (PM_NODE_TYPE(inner)) {
17276 case PM_ARRAY_PATTERN_NODE: {
17277 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17278 if (pattern_node->opening_loc.start == NULL) {
17279 pattern_node->base.location.start = opening.start;
17280 pattern_node->base.location.end = closing.end;
17281
17282 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17283 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17284
17285 return (pm_node_t *) pattern_node;
17286 }
17287
17288 break;
17289 }
17290 case PM_FIND_PATTERN_NODE: {
17291 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17292 if (pattern_node->opening_loc.start == NULL) {
17293 pattern_node->base.location.start = opening.start;
17294 pattern_node->base.location.end = closing.end;
17295
17296 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17297 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17298
17299 return (pm_node_t *) pattern_node;
17300 }
17301
17302 break;
17303 }
17304 default:
17305 break;
17306 }
17307
17308 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17309 pm_array_pattern_node_requireds_append(node, inner);
17310 return (pm_node_t *) node;
17311 }
17312 case PM_TOKEN_BRACE_LEFT: {
17313 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17314 parser->pattern_matching_newlines = false;
17315
17317 pm_token_t opening = parser->current;
17318 parser_lex(parser);
17319
17320 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17321 // If we have an empty hash pattern, then we'll just return a new hash
17322 // pattern node.
17323 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17324 } else {
17325 pm_node_t *first_node;
17326
17327 switch (parser->current.type) {
17328 case PM_TOKEN_LABEL:
17329 parser_lex(parser);
17330 first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17331 break;
17333 first_node = parse_pattern_keyword_rest(parser, captures);
17334 break;
17336 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17337 break;
17338 default: {
17339 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17340 parser_lex(parser);
17341
17342 first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17343 break;
17344 }
17345 }
17346
17347 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17348
17349 accept1(parser, PM_TOKEN_NEWLINE);
17350 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17351 pm_token_t closing = parser->previous;
17352
17353 node->base.location.start = opening.start;
17354 node->base.location.end = closing.end;
17355
17356 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17357 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17358 }
17359
17360 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17361 return (pm_node_t *) node;
17362 }
17363 case PM_TOKEN_UDOT_DOT:
17364 case PM_TOKEN_UDOT_DOT_DOT: {
17365 pm_token_t operator = parser->current;
17366 parser_lex(parser);
17367
17368 // Since we have a unary range operator, we need to parse the subsequent
17369 // expression as the right side of the range.
17370 switch (parser->current.type) {
17371 case PM_CASE_PRIMITIVE: {
17372 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17373 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17374 }
17375 default: {
17376 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17377 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17378 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17379 }
17380 }
17381 }
17382 case PM_CASE_PRIMITIVE: {
17383 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17384
17385 // If we found a label, we need to immediately return to the caller.
17386 if (pm_symbol_node_label_p(node)) return node;
17387
17388 // Now that we have a primitive, we need to check if it's part of a range.
17389 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17390 pm_token_t operator = parser->previous;
17391
17392 // Now that we have the operator, we need to check if this is followed
17393 // by another expression. If it is, then we will create a full range
17394 // node. Otherwise, we'll create an endless range.
17395 switch (parser->current.type) {
17396 case PM_CASE_PRIMITIVE: {
17397 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17398 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17399 }
17400 default:
17401 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17402 }
17403 }
17404
17405 return node;
17406 }
17407 case PM_TOKEN_CARET: {
17408 parser_lex(parser);
17409 pm_token_t operator = parser->previous;
17410
17411 // At this point we have a pin operator. We need to check the subsequent
17412 // expression to determine if it's a variable or an expression.
17413 switch (parser->current.type) {
17414 case PM_TOKEN_IDENTIFIER: {
17415 parser_lex(parser);
17416 pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17417
17418 if (variable == NULL) {
17419 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17420 variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17421 }
17422
17423 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17424 }
17426 parser_lex(parser);
17427 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17428
17429 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17430 }
17432 parser_lex(parser);
17433 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17434
17435 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17436 }
17438 parser_lex(parser);
17439 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17440
17441 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17442 }
17444 parser_lex(parser);
17445 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17446
17447 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17448 }
17450 parser_lex(parser);
17451 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17452
17453 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17454 }
17456 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17457 parser->pattern_matching_newlines = false;
17458
17459 pm_token_t lparen = parser->current;
17460 parser_lex(parser);
17461
17462 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17463 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17464
17465 accept1(parser, PM_TOKEN_NEWLINE);
17466 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17467 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17468 }
17469 default: {
17470 // If we get here, then we have a pin operator followed by something
17471 // not understood. We'll create a missing node and return that.
17472 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17473 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17474 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17475 }
17476 }
17477 }
17478 case PM_TOKEN_UCOLON_COLON: {
17479 pm_token_t delimiter = parser->current;
17480 parser_lex(parser);
17481
17482 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17483 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17484
17485 return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17486 }
17487 case PM_TOKEN_CONSTANT: {
17488 pm_token_t constant = parser->current;
17489 parser_lex(parser);
17490
17491 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17492 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17493 }
17494 default:
17495 pm_parser_err_current(parser, diag_id);
17496 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17497 }
17498}
17499
17504static pm_node_t *
17505parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17506 pm_node_t *node = first_node;
17507
17508 while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17509 pm_token_t operator = parser->previous;
17510
17511 switch (parser->current.type) {
17515 case PM_TOKEN_CARET:
17516 case PM_TOKEN_CONSTANT:
17518 case PM_TOKEN_UDOT_DOT:
17520 case PM_CASE_PRIMITIVE: {
17521 if (node == NULL) {
17522 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17523 } else {
17524 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17525 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17526 }
17527
17528 break;
17529 }
17532 pm_token_t opening = parser->current;
17533 parser_lex(parser);
17534
17535 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17536 accept1(parser, PM_TOKEN_NEWLINE);
17537 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17538 pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
17539
17540 if (node == NULL) {
17541 node = right;
17542 } else {
17543 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17544 }
17545
17546 break;
17547 }
17548 default: {
17549 pm_parser_err_current(parser, diag_id);
17550 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17551
17552 if (node == NULL) {
17553 node = right;
17554 } else {
17555 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17556 }
17557
17558 break;
17559 }
17560 }
17561 }
17562
17563 // If we have an =>, then we are assigning this pattern to a variable.
17564 // In this case we should create an assignment node.
17565 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17566 pm_token_t operator = parser->previous;
17567 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17568
17569 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17570 int depth;
17571
17572 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17573 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17574 }
17575
17576 parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17577 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17578 parser,
17579 &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17580 constant_id,
17581 (uint32_t) (depth == -1 ? 0 : depth)
17582 );
17583
17584 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17585 }
17586
17587 return node;
17588}
17589
17593static pm_node_t *
17594parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17595 pm_node_t *node = NULL;
17596
17597 bool leading_rest = false;
17598 bool trailing_rest = false;
17599
17600 switch (parser->current.type) {
17601 case PM_TOKEN_LABEL: {
17602 parser_lex(parser);
17603 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17604 node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17605
17606 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17607 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17608 }
17609
17610 return node;
17611 }
17612 case PM_TOKEN_USTAR_STAR: {
17613 node = parse_pattern_keyword_rest(parser, captures);
17614 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17615
17616 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17617 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17618 }
17619
17620 return node;
17621 }
17622 case PM_TOKEN_STRING_BEGIN: {
17623 // We need special handling for string beginnings because they could
17624 // be dynamic symbols leading to hash patterns.
17625 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17626
17627 if (pm_symbol_node_label_p(node)) {
17628 node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17629
17630 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17631 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17632 }
17633
17634 return node;
17635 }
17636
17637 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17638 break;
17639 }
17640 case PM_TOKEN_USTAR: {
17641 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17642 parser_lex(parser);
17643 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17644 leading_rest = true;
17645 break;
17646 }
17647 }
17648 /* fallthrough */
17649 default:
17650 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17651 break;
17652 }
17653
17654 // If we got a dynamic label symbol, then we need to treat it like the
17655 // beginning of a hash pattern.
17656 if (pm_symbol_node_label_p(node)) {
17657 return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17658 }
17659
17660 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17661 // If we have a comma, then we are now parsing either an array pattern
17662 // or a find pattern. We need to parse all of the patterns, put them
17663 // into a big list, and then determine which type of node we have.
17664 pm_node_list_t nodes = { 0 };
17665 pm_node_list_append(&nodes, node);
17666
17667 // Gather up all of the patterns into the list.
17668 while (accept1(parser, PM_TOKEN_COMMA)) {
17669 // Break early here in case we have a trailing comma.
17671 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17672 pm_node_list_append(&nodes, node);
17673 trailing_rest = true;
17674 break;
17675 }
17676
17677 if (accept1(parser, PM_TOKEN_USTAR)) {
17678 node = (pm_node_t *) parse_pattern_rest(parser, captures);
17679
17680 // If we have already parsed a splat pattern, then this is an
17681 // error. We will continue to parse the rest of the patterns,
17682 // but we will indicate it as an error.
17683 if (trailing_rest) {
17684 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17685 }
17686
17687 trailing_rest = true;
17688 } else {
17689 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17690 }
17691
17692 pm_node_list_append(&nodes, node);
17693 }
17694
17695 // If the first pattern and the last pattern are rest patterns, then we
17696 // will call this a find pattern, regardless of how many rest patterns
17697 // are in between because we know we already added the appropriate
17698 // errors. Otherwise we will create an array pattern.
17699 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17700 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17701
17702 if (nodes.size == 2) {
17703 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17704 }
17705 } else {
17706 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17707
17708 if (leading_rest && trailing_rest) {
17709 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17710 }
17711 }
17712
17713 xfree(nodes.nodes);
17714 } else if (leading_rest) {
17715 // Otherwise, if we parsed a single splat pattern, then we know we have
17716 // an array pattern, so we can go ahead and create that node.
17717 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17718 }
17719
17720 return node;
17721}
17722
17728static inline void
17729parse_negative_numeric(pm_node_t *node) {
17730 switch (PM_NODE_TYPE(node)) {
17731 case PM_INTEGER_NODE: {
17732 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17733 cast->base.location.start--;
17734 cast->value.negative = true;
17735 break;
17736 }
17737 case PM_FLOAT_NODE: {
17738 pm_float_node_t *cast = (pm_float_node_t *) node;
17739 cast->base.location.start--;
17740 cast->value = -cast->value;
17741 break;
17742 }
17743 case PM_RATIONAL_NODE: {
17744 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17745 cast->base.location.start--;
17746 cast->numerator.negative = true;
17747 break;
17748 }
17749 case PM_IMAGINARY_NODE:
17750 node->location.start--;
17751 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17752 break;
17753 default:
17754 assert(false && "unreachable");
17755 break;
17756 }
17757}
17758
17764static void
17765pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17766 switch (diag_id) {
17767 case PM_ERR_HASH_KEY: {
17768 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17769 break;
17770 }
17771 case PM_ERR_HASH_VALUE:
17772 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17773 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17774 break;
17775 }
17776 case PM_ERR_UNARY_RECEIVER: {
17777 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17778 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17779 break;
17780 }
17781 case PM_ERR_UNARY_DISALLOWED:
17782 case PM_ERR_EXPECT_ARGUMENT: {
17783 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17784 break;
17785 }
17786 default:
17787 pm_parser_err_previous(parser, diag_id);
17788 break;
17789 }
17790}
17791
17795static void
17796parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17797#define CONTEXT_NONE 0
17798#define CONTEXT_THROUGH_ENSURE 1
17799#define CONTEXT_THROUGH_ELSE 2
17800
17801 pm_context_node_t *context_node = parser->current_context;
17802 int context = CONTEXT_NONE;
17803
17804 while (context_node != NULL) {
17805 switch (context_node->context) {
17813 case PM_CONTEXT_DEFINED:
17815 // These are the good cases. We're allowed to have a retry here.
17816 return;
17817 case PM_CONTEXT_CLASS:
17818 case PM_CONTEXT_DEF:
17820 case PM_CONTEXT_MAIN:
17821 case PM_CONTEXT_MODULE:
17822 case PM_CONTEXT_PREEXE:
17823 case PM_CONTEXT_SCLASS:
17824 // These are the bad cases. We're not allowed to have a retry in
17825 // these contexts.
17826 if (context == CONTEXT_NONE) {
17827 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17828 } else if (context == CONTEXT_THROUGH_ENSURE) {
17829 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17830 } else if (context == CONTEXT_THROUGH_ELSE) {
17831 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17832 }
17833 return;
17841 // These are also bad cases, but with a more specific error
17842 // message indicating the else.
17843 context = CONTEXT_THROUGH_ELSE;
17844 break;
17852 // These are also bad cases, but with a more specific error
17853 // message indicating the ensure.
17854 context = CONTEXT_THROUGH_ENSURE;
17855 break;
17856 case PM_CONTEXT_NONE:
17857 // This case should never happen.
17858 assert(false && "unreachable");
17859 break;
17860 case PM_CONTEXT_BEGIN:
17863 case PM_CONTEXT_CASE_IN:
17866 case PM_CONTEXT_ELSE:
17867 case PM_CONTEXT_ELSIF:
17868 case PM_CONTEXT_EMBEXPR:
17870 case PM_CONTEXT_FOR:
17871 case PM_CONTEXT_IF:
17876 case PM_CONTEXT_PARENS:
17877 case PM_CONTEXT_POSTEXE:
17879 case PM_CONTEXT_TERNARY:
17880 case PM_CONTEXT_UNLESS:
17881 case PM_CONTEXT_UNTIL:
17882 case PM_CONTEXT_WHILE:
17883 // In these contexts we should continue walking up the list of
17884 // contexts.
17885 break;
17886 }
17887
17888 context_node = context_node->prev;
17889 }
17890
17891#undef CONTEXT_NONE
17892#undef CONTEXT_ENSURE
17893#undef CONTEXT_ELSE
17894}
17895
17899static void
17900parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17901 pm_context_node_t *context_node = parser->current_context;
17902
17903 while (context_node != NULL) {
17904 switch (context_node->context) {
17905 case PM_CONTEXT_DEF:
17907 case PM_CONTEXT_DEFINED:
17911 // These are the good cases. We're allowed to have a block exit
17912 // in these contexts.
17913 return;
17914 case PM_CONTEXT_CLASS:
17918 case PM_CONTEXT_MAIN:
17919 case PM_CONTEXT_MODULE:
17923 case PM_CONTEXT_SCLASS:
17927 // These are the bad cases. We're not allowed to have a retry in
17928 // these contexts.
17929 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17930 return;
17931 case PM_CONTEXT_NONE:
17932 // This case should never happen.
17933 assert(false && "unreachable");
17934 break;
17935 case PM_CONTEXT_BEGIN:
17944 case PM_CONTEXT_CASE_IN:
17947 case PM_CONTEXT_ELSE:
17948 case PM_CONTEXT_ELSIF:
17949 case PM_CONTEXT_EMBEXPR:
17951 case PM_CONTEXT_FOR:
17952 case PM_CONTEXT_IF:
17960 case PM_CONTEXT_PARENS:
17961 case PM_CONTEXT_POSTEXE:
17963 case PM_CONTEXT_PREEXE:
17965 case PM_CONTEXT_TERNARY:
17966 case PM_CONTEXT_UNLESS:
17967 case PM_CONTEXT_UNTIL:
17968 case PM_CONTEXT_WHILE:
17969 // In these contexts we should continue walking up the list of
17970 // contexts.
17971 break;
17972 }
17973
17974 context_node = context_node->prev;
17975 }
17976}
17977
17982typedef struct {
17985
17987 const uint8_t *start;
17988
17990 const uint8_t *end;
17991
18000
18005static void
18006parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
18008 pm_location_t location;
18009
18010 if (callback_data->shared) {
18011 location = (pm_location_t) { .start = start, .end = end };
18012 } else {
18013 location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
18014 }
18015
18016 PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
18017}
18018
18022static void
18023parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
18024 const pm_string_t *unescaped = &node->unescaped;
18026 .parser = parser,
18027 .start = node->base.location.start,
18028 .end = node->base.location.end,
18029 .shared = unescaped->type == PM_STRING_SHARED
18030 };
18031
18032 pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18033}
18034
18038static inline pm_node_t *
18039parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18040 switch (parser->current.type) {
18042 parser_lex(parser);
18043
18044 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18045 pm_accepts_block_stack_push(parser, true);
18046 bool parsed_bare_hash = false;
18047
18048 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18049 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18050
18051 // Handle the case where we don't have a comma and we have a
18052 // newline followed by a right bracket.
18053 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18054 break;
18055 }
18056
18057 // Ensure that we have a comma between elements in the array.
18058 if (array->elements.size > 0) {
18059 if (accept1(parser, PM_TOKEN_COMMA)) {
18060 // If there was a comma but we also accepts a newline,
18061 // then this is a syntax error.
18062 if (accepted_newline) {
18063 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18064 }
18065 } else {
18066 // If there was no comma, then we need to add a syntax
18067 // error.
18068 const uint8_t *location = parser->previous.end;
18069 PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18070
18071 parser->previous.start = location;
18072 parser->previous.type = PM_TOKEN_MISSING;
18073 }
18074 }
18075
18076 // If we have a right bracket immediately following a comma,
18077 // this is allowed since it's a trailing comma. In this case we
18078 // can break out of the loop.
18079 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18080
18081 pm_node_t *element;
18082
18083 if (accept1(parser, PM_TOKEN_USTAR)) {
18084 pm_token_t operator = parser->previous;
18085 pm_node_t *expression = NULL;
18086
18087 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18088 pm_parser_scope_forwarding_positionals_check(parser, &operator);
18089 } else {
18090 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18091 }
18092
18093 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18094 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18095 if (parsed_bare_hash) {
18096 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18097 }
18098
18099 element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18100 pm_static_literals_t hash_keys = { 0 };
18101
18103 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18104 }
18105
18106 pm_static_literals_free(&hash_keys);
18107 parsed_bare_hash = true;
18108 } else {
18109 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18110
18111 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18112 if (parsed_bare_hash) {
18113 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18114 }
18115
18116 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18117 pm_static_literals_t hash_keys = { 0 };
18118 pm_hash_key_static_literals_add(parser, &hash_keys, element);
18119
18120 pm_token_t operator;
18121 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18122 operator = parser->previous;
18123 } else {
18124 operator = not_provided(parser);
18125 }
18126
18127 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18128 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18129 pm_keyword_hash_node_elements_append(hash, assoc);
18130
18131 element = (pm_node_t *) hash;
18132 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18133 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18134 }
18135
18136 pm_static_literals_free(&hash_keys);
18137 parsed_bare_hash = true;
18138 }
18139 }
18140
18141 pm_array_node_elements_append(array, element);
18142 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18143 }
18144
18145 accept1(parser, PM_TOKEN_NEWLINE);
18146
18147 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18148 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18149 parser->previous.start = parser->previous.end;
18150 parser->previous.type = PM_TOKEN_MISSING;
18151 }
18152
18153 pm_array_node_close_set(array, &parser->previous);
18154 pm_accepts_block_stack_pop(parser);
18155
18156 return (pm_node_t *) array;
18157 }
18160 pm_token_t opening = parser->current;
18161
18162 pm_node_list_t current_block_exits = { 0 };
18163 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18164
18165 parser_lex(parser);
18166 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
18167
18168 // If this is the end of the file or we match a right parenthesis, then
18169 // we have an empty parentheses node, and we can immediately return.
18170 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18171 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18172
18173 pop_block_exits(parser, previous_block_exits);
18174 pm_node_list_free(&current_block_exits);
18175
18176 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
18177 }
18178
18179 // Otherwise, we're going to parse the first statement in the list
18180 // of statements within the parentheses.
18181 pm_accepts_block_stack_push(parser, true);
18182 context_push(parser, PM_CONTEXT_PARENS);
18183 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18184 context_pop(parser);
18185
18186 // Determine if this statement is followed by a terminator. In the
18187 // case of a single statement, this is fine. But in the case of
18188 // multiple statements it's required.
18189 bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18190 if (terminator_found) {
18191 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18192 }
18193
18194 // If we hit a right parenthesis, then we're done parsing the
18195 // parentheses node, and we can check which kind of node we should
18196 // return.
18197 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18199 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18200 }
18201
18202 parser_lex(parser);
18203 pm_accepts_block_stack_pop(parser);
18204
18205 pop_block_exits(parser, previous_block_exits);
18206 pm_node_list_free(&current_block_exits);
18207
18208 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18209 // If we have a single statement and are ending on a right
18210 // parenthesis, then we need to check if this is possibly a
18211 // multiple target node.
18212 pm_multi_target_node_t *multi_target;
18213
18214 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18215 multi_target = (pm_multi_target_node_t *) statement;
18216 } else {
18217 multi_target = pm_multi_target_node_create(parser);
18218 pm_multi_target_node_targets_append(parser, multi_target, statement);
18219 }
18220
18221 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18222 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18223
18224 multi_target->lparen_loc = lparen_loc;
18225 multi_target->rparen_loc = rparen_loc;
18226 multi_target->base.location.start = lparen_loc.start;
18227 multi_target->base.location.end = rparen_loc.end;
18228
18229 pm_node_t *result;
18230 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18231 result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18232 accept1(parser, PM_TOKEN_NEWLINE);
18233 } else {
18234 result = (pm_node_t *) multi_target;
18235 }
18236
18237 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18238 // All set, this is explicitly allowed by the parent
18239 // context.
18240 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18241 // All set, we're inside a for loop and we're parsing
18242 // multiple targets.
18243 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18244 // Multi targets are not allowed when it's not a
18245 // statement level.
18246 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18247 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18248 // Multi targets must be followed by an equal sign in
18249 // order to be valid (or a right parenthesis if they are
18250 // nested).
18251 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18252 }
18253
18254 return result;
18255 }
18256
18257 // If we have a single statement and are ending on a right parenthesis
18258 // and we didn't return a multiple assignment node, then we can return a
18259 // regular parentheses node now.
18260 pm_statements_node_t *statements = pm_statements_node_create(parser);
18261 pm_statements_node_body_append(parser, statements, statement, true);
18262
18263 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18264 }
18265
18266 // If we have more than one statement in the set of parentheses,
18267 // then we are going to parse all of them as a list of statements.
18268 // We'll do that here.
18269 context_push(parser, PM_CONTEXT_PARENS);
18270 pm_statements_node_t *statements = pm_statements_node_create(parser);
18271 pm_statements_node_body_append(parser, statements, statement, true);
18272
18273 // If we didn't find a terminator and we didn't find a right
18274 // parenthesis, then this is a syntax error.
18275 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18276 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18277 }
18278
18279 // Parse each statement within the parentheses.
18280 while (true) {
18281 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18282 pm_statements_node_body_append(parser, statements, node, true);
18283
18284 // If we're recovering from a syntax error, then we need to stop
18285 // parsing the statements now.
18286 if (parser->recovering) {
18287 // If this is the level of context where the recovery has
18288 // happened, then we can mark the parser as done recovering.
18289 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18290 break;
18291 }
18292
18293 // If we couldn't parse an expression at all, then we need to
18294 // bail out of the loop.
18295 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18296
18297 // If we successfully parsed a statement, then we are going to
18298 // need terminator to delimit them.
18299 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18300 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18301 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18302 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18303 break;
18304 } else if (!match1(parser, PM_TOKEN_EOF)) {
18305 // If we're at the end of the file, then we're going to add
18306 // an error after this for the ) anyway.
18307 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18308 }
18309 }
18310
18311 context_pop(parser);
18312 pm_accepts_block_stack_pop(parser);
18313 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18314
18315 // When we're parsing multi targets, we allow them to be followed by
18316 // a right parenthesis if they are at the statement level. This is
18317 // only possible if they are the final statement in a parentheses.
18318 // We need to explicitly reject that here.
18319 {
18320 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18321
18322 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18323 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18324 pm_multi_target_node_targets_append(parser, multi_target, statement);
18325
18326 statement = (pm_node_t *) multi_target;
18327 statements->body.nodes[statements->body.size - 1] = statement;
18328 }
18329
18330 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18331 const uint8_t *offset = statement->location.end;
18332 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18333 pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18334
18335 statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18336 statements->body.nodes[statements->body.size - 1] = statement;
18337
18338 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18339 }
18340 }
18341
18342 pop_block_exits(parser, previous_block_exits);
18343 pm_node_list_free(&current_block_exits);
18344
18345 pm_void_statements_check(parser, statements, true);
18346 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18347 }
18348 case PM_TOKEN_BRACE_LEFT: {
18349 // If we were passed a current_hash_keys via the parser, then that
18350 // means we're already parsing a hash and we want to share the set
18351 // of hash keys with this inner hash we're about to parse for the
18352 // sake of warnings. We'll set it to NULL after we grab it to make
18353 // sure subsequent expressions don't use it. Effectively this is a
18354 // way of getting around passing it to every call to
18355 // parse_expression.
18356 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18357 parser->current_hash_keys = NULL;
18358
18359 pm_accepts_block_stack_push(parser, true);
18360 parser_lex(parser);
18361
18362 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18363
18364 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18365 if (current_hash_keys != NULL) {
18366 parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18367 } else {
18368 pm_static_literals_t hash_keys = { 0 };
18369 parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18370 pm_static_literals_free(&hash_keys);
18371 }
18372
18373 accept1(parser, PM_TOKEN_NEWLINE);
18374 }
18375
18376 pm_accepts_block_stack_pop(parser);
18377 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18378 pm_hash_node_closing_loc_set(node, &parser->previous);
18379
18380 return (pm_node_t *) node;
18381 }
18383 parser_lex(parser);
18384
18385 pm_token_t opening = parser->previous;
18386 opening.type = PM_TOKEN_STRING_BEGIN;
18387 opening.end = opening.start + 1;
18388
18389 pm_token_t content = parser->previous;
18390 content.type = PM_TOKEN_STRING_CONTENT;
18391 content.start = content.start + 1;
18392
18393 pm_token_t closing = not_provided(parser);
18394 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18395 pm_node_flag_set(node, parse_unescaped_encoding(parser));
18396
18397 // Characters can be followed by strings in which case they are
18398 // automatically concatenated.
18399 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18400 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18401 }
18402
18403 return node;
18404 }
18406 parser_lex(parser);
18407 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18408
18409 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18410 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18411 }
18412
18413 return node;
18414 }
18415 case PM_TOKEN_CONSTANT: {
18416 parser_lex(parser);
18417 pm_token_t constant = parser->previous;
18418
18419 // If a constant is immediately followed by parentheses, then this is in
18420 // fact a method call, not a constant read.
18421 if (
18422 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18423 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18424 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18425 match1(parser, PM_TOKEN_BRACE_LEFT)
18426 ) {
18427 pm_arguments_t arguments = { 0 };
18428 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18429 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18430 }
18431
18432 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18433
18434 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18435 // If we get here, then we have a comma immediately following a
18436 // constant, so we're going to parse this as a multiple assignment.
18437 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18438 }
18439
18440 return node;
18441 }
18442 case PM_TOKEN_UCOLON_COLON: {
18443 parser_lex(parser);
18444 pm_token_t delimiter = parser->previous;
18445
18446 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18447 pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18448
18449 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18450 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18451 }
18452
18453 return node;
18454 }
18455 case PM_TOKEN_UDOT_DOT:
18456 case PM_TOKEN_UDOT_DOT_DOT: {
18457 pm_token_t operator = parser->current;
18458 parser_lex(parser);
18459
18460 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18461
18462 // Unary .. and ... are special because these are non-associative
18463 // operators that can also be unary operators. In this case we need
18464 // to explicitly reject code that has a .. or ... that follows this
18465 // expression.
18466 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18467 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18468 }
18469
18470 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18471 }
18472 case PM_TOKEN_FLOAT:
18473 parser_lex(parser);
18474 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18476 parser_lex(parser);
18477 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18479 parser_lex(parser);
18480 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18482 parser_lex(parser);
18483 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18485 parser_lex(parser);
18486 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18487
18488 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18489 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18490 }
18491
18492 return node;
18493 }
18495 parser_lex(parser);
18496 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18497
18498 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18499 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18500 }
18501
18502 return node;
18503 }
18505 parser_lex(parser);
18506 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18507
18508 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18509 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18510 }
18511
18512 return node;
18513 }
18515 case PM_TOKEN_METHOD_NAME: {
18516 parser_lex(parser);
18517 pm_token_t identifier = parser->previous;
18518 pm_node_t *node = parse_variable_call(parser);
18519
18520 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18521 // If parse_variable_call returned with a call node, then we
18522 // know the identifier is not in the local table. In that case
18523 // we need to check if there are arguments following the
18524 // identifier.
18525 pm_call_node_t *call = (pm_call_node_t *) node;
18526 pm_arguments_t arguments = { 0 };
18527
18528 if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18529 // Since we found arguments, we need to turn off the
18530 // variable call bit in the flags.
18531 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18532
18533 call->opening_loc = arguments.opening_loc;
18534 call->arguments = arguments.arguments;
18535 call->closing_loc = arguments.closing_loc;
18536 call->block = arguments.block;
18537
18538 if (arguments.block != NULL) {
18539 call->base.location.end = arguments.block->location.end;
18540 } else if (arguments.closing_loc.start == NULL) {
18541 if (arguments.arguments != NULL) {
18542 call->base.location.end = arguments.arguments->base.location.end;
18543 } else {
18544 call->base.location.end = call->message_loc.end;
18545 }
18546 } else {
18547 call->base.location.end = arguments.closing_loc.end;
18548 }
18549 }
18550 } else {
18551 // Otherwise, we know the identifier is in the local table. This
18552 // can still be a method call if it is followed by arguments or
18553 // a block, so we need to check for that here.
18554 if (
18555 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18556 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18557 match1(parser, PM_TOKEN_BRACE_LEFT)
18558 ) {
18559 pm_arguments_t arguments = { 0 };
18560 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18561 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18562
18564 // If we're about to convert an 'it' implicit local
18565 // variable read into a method call, we need to remove
18566 // it from the list of implicit local variables.
18567 parse_target_implicit_parameter(parser, node);
18568 } else {
18569 // Otherwise, we're about to convert a regular local
18570 // variable read into a method call, in which case we
18571 // need to indicate that this was not a read for the
18572 // purposes of warnings.
18574
18575 if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18576 parse_target_implicit_parameter(parser, node);
18577 } else {
18579 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18580 }
18581 }
18582
18583 pm_node_destroy(parser, node);
18584 return (pm_node_t *) fcall;
18585 }
18586 }
18587
18588 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18589 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18590 }
18591
18592 return node;
18593 }
18595 // Here we have found a heredoc. We'll parse it and add it to the
18596 // list of strings.
18597 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18598 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18599
18600 size_t common_whitespace = (size_t) -1;
18601 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18602
18603 parser_lex(parser);
18604 pm_token_t opening = parser->previous;
18605
18606 pm_node_t *node;
18607 pm_node_t *part;
18608
18609 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18610 // If we get here, then we have an empty heredoc. We'll create
18611 // an empty content token and return an empty string node.
18612 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18613 pm_token_t content = parse_strings_empty_content(parser->previous.start);
18614
18615 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18616 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18617 } else {
18618 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18619 }
18620
18621 node->location.end = opening.end;
18622 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18623 // If we get here, then we tried to find something in the
18624 // heredoc but couldn't actually parse anything, so we'll just
18625 // return a missing node.
18626 //
18627 // parse_string_part handles its own errors, so there is no need
18628 // for us to add one here.
18629 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18630 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18631 // If we get here, then the part that we parsed was plain string
18632 // content and we're at the end of the heredoc, so we can return
18633 // just a string node with the heredoc opening and closing as
18634 // its opening and closing.
18635 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18636 pm_string_node_t *cast = (pm_string_node_t *) part;
18637
18638 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18639 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18640 cast->base.location = cast->opening_loc;
18641
18642 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18643 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18644 cast->base.type = PM_X_STRING_NODE;
18645 }
18646
18647 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18648 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18649 }
18650
18651 node = (pm_node_t *) cast;
18652 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18653 } else {
18654 // If we get here, then we have multiple parts in the heredoc,
18655 // so we'll need to create an interpolated string node to hold
18656 // them all.
18657 pm_node_list_t parts = { 0 };
18658 pm_node_list_append(&parts, part);
18659
18660 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18661 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18662 pm_node_list_append(&parts, part);
18663 }
18664 }
18665
18666 // Now that we have all of the parts, create the correct type of
18667 // interpolated node.
18668 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18669 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18670 cast->parts = parts;
18671
18672 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18673 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18674
18675 cast->base.location = cast->opening_loc;
18676 node = (pm_node_t *) cast;
18677 } else {
18678 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18679 pm_node_list_free(&parts);
18680
18681 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18682 pm_interpolated_string_node_closing_set(cast, &parser->previous);
18683
18684 cast->base.location = cast->opening_loc;
18685 node = (pm_node_t *) cast;
18686 }
18687
18688 // If this is a heredoc that is indented with a ~, then we need
18689 // to dedent each line by the common leading whitespace.
18690 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18691 pm_node_list_t *nodes;
18692 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18693 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18694 } else {
18695 nodes = &((pm_interpolated_string_node_t *) node)->parts;
18696 }
18697
18698 parse_heredoc_dedent(parser, nodes, common_whitespace);
18699 }
18700 }
18701
18702 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18703 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18704 }
18705
18706 return node;
18707 }
18709 parser_lex(parser);
18710 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18711
18712 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18713 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18714 }
18715
18716 return node;
18717 }
18718 case PM_TOKEN_INTEGER: {
18719 pm_node_flags_t base = parser->integer_base;
18720 parser_lex(parser);
18721 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18722 }
18724 pm_node_flags_t base = parser->integer_base;
18725 parser_lex(parser);
18726 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18727 }
18729 pm_node_flags_t base = parser->integer_base;
18730 parser_lex(parser);
18731 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18732 }
18734 pm_node_flags_t base = parser->integer_base;
18735 parser_lex(parser);
18736 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18737 }
18739 parser_lex(parser);
18740 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18742 parser_lex(parser);
18743 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18745 parser_lex(parser);
18746 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18748 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18749 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18750 }
18751
18752 parser_lex(parser);
18753 pm_token_t keyword = parser->previous;
18754
18755 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18756 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18757
18758 switch (PM_NODE_TYPE(new_name)) {
18764 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18765 }
18766 } else {
18767 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18768 }
18769
18770 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18771 }
18772 case PM_SYMBOL_NODE:
18775 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18776 }
18777 }
18778 /* fallthrough */
18779 default:
18780 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18781 }
18782 }
18783 case PM_TOKEN_KEYWORD_CASE: {
18784 size_t opening_newline_index = token_newline_index(parser);
18785 parser_lex(parser);
18786
18787 pm_token_t case_keyword = parser->previous;
18788 pm_node_t *predicate = NULL;
18789
18790 pm_node_list_t current_block_exits = { 0 };
18791 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18792
18793 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18794 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18795 predicate = NULL;
18796 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18797 predicate = NULL;
18798 } else if (!token_begins_expression_p(parser->current.type)) {
18799 predicate = NULL;
18800 } else {
18801 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18802 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18803 }
18804
18805 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18806 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18807 parser_lex(parser);
18808
18809 pop_block_exits(parser, previous_block_exits);
18810 pm_node_list_free(&current_block_exits);
18811
18812 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18813 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18814 }
18815
18816 // At this point we can create a case node, though we don't yet know
18817 // if it is a case-in or case-when node.
18818 pm_token_t end_keyword = not_provided(parser);
18819 pm_node_t *node;
18820
18821 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18822 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18823 pm_static_literals_t literals = { 0 };
18824
18825 // At this point we've seen a when keyword, so we know this is a
18826 // case-when node. We will continue to parse the when nodes
18827 // until we hit the end of the list.
18828 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18829 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18830 parser_lex(parser);
18831
18832 pm_token_t when_keyword = parser->previous;
18833 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18834
18835 do {
18836 if (accept1(parser, PM_TOKEN_USTAR)) {
18837 pm_token_t operator = parser->previous;
18838 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18839
18840 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18841 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18842
18843 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18844 } else {
18845 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18846 pm_when_node_conditions_append(when_node, condition);
18847
18848 // If we found a missing node, then this is a syntax
18849 // error and we should stop looping.
18850 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18851
18852 // If this is a string node, then we need to mark it
18853 // as frozen because when clause strings are frozen.
18854 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18855 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18856 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18857 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18858 }
18859
18860 pm_when_clause_static_literals_add(parser, &literals, condition);
18861 }
18862 } while (accept1(parser, PM_TOKEN_COMMA));
18863
18864 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18865 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18866 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18867 }
18868 } else {
18869 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18870 pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18871 }
18872
18874 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18875 if (statements != NULL) {
18876 pm_when_node_statements_set(when_node, statements);
18877 }
18878 }
18879
18880 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18881 }
18882
18883 // If we didn't parse any conditions (in or when) then we need
18884 // to indicate that we have an error.
18885 if (case_node->conditions.size == 0) {
18886 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18887 }
18888
18889 pm_static_literals_free(&literals);
18890 node = (pm_node_t *) case_node;
18891 } else {
18892 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18893
18894 // If this is a case-match node (i.e., it is a pattern matching
18895 // case statement) then we must have a predicate.
18896 if (predicate == NULL) {
18897 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18898 }
18899
18900 // At this point we expect that we're parsing a case-in node. We
18901 // will continue to parse the in nodes until we hit the end of
18902 // the list.
18903 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18904 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18905
18906 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18907 parser->pattern_matching_newlines = true;
18908
18909 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18910 parser->command_start = false;
18911 parser_lex(parser);
18912
18913 pm_token_t in_keyword = parser->previous;
18914
18915 pm_constant_id_list_t captures = { 0 };
18916 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18917
18918 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18919 pm_constant_id_list_free(&captures);
18920
18921 // Since we're in the top-level of the case-in node we need
18922 // to check for guard clauses in the form of `if` or
18923 // `unless` statements.
18924 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18925 pm_token_t keyword = parser->previous;
18926 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18927 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18928 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18929 pm_token_t keyword = parser->previous;
18930 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18931 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18932 }
18933
18934 // Now we need to check for the terminator of the in node's
18935 // pattern. It can be a newline or semicolon optionally
18936 // followed by a `then` keyword.
18937 pm_token_t then_keyword;
18938 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18939 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18940 then_keyword = parser->previous;
18941 } else {
18942 then_keyword = not_provided(parser);
18943 }
18944 } else {
18945 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18946 then_keyword = parser->previous;
18947 }
18948
18949 // Now we can actually parse the statements associated with
18950 // the in node.
18951 pm_statements_node_t *statements;
18953 statements = NULL;
18954 } else {
18955 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18956 }
18957
18958 // Now that we have the full pattern and statements, we can
18959 // create the node and attach it to the case node.
18960 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
18961 pm_case_match_node_condition_append(case_node, condition);
18962 }
18963
18964 // If we didn't parse any conditions (in or when) then we need
18965 // to indicate that we have an error.
18966 if (case_node->conditions.size == 0) {
18967 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18968 }
18969
18970 node = (pm_node_t *) case_node;
18971 }
18972
18973 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18974 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18975 pm_token_t else_keyword = parser->previous;
18976 pm_else_node_t *else_node;
18977
18978 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18979 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18980 } else {
18981 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18982 }
18983
18984 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18985 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18986 } else {
18987 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18988 }
18989 }
18990
18991 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18992 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
18993
18994 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18995 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18996 } else {
18997 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18998 }
18999
19000 pop_block_exits(parser, previous_block_exits);
19001 pm_node_list_free(&current_block_exits);
19002
19003 return node;
19004 }
19006 size_t opening_newline_index = token_newline_index(parser);
19007 parser_lex(parser);
19008
19009 pm_token_t begin_keyword = parser->previous;
19010 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19011
19012 pm_node_list_t current_block_exits = { 0 };
19013 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19014 pm_statements_node_t *begin_statements = NULL;
19015
19017 pm_accepts_block_stack_push(parser, true);
19018 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19019 pm_accepts_block_stack_pop(parser);
19020 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19021 }
19022
19023 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19024 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19025 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
19026
19027 begin_node->base.location.end = parser->previous.end;
19028 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
19029
19030 pop_block_exits(parser, previous_block_exits);
19031 pm_node_list_free(&current_block_exits);
19032
19033 return (pm_node_t *) begin_node;
19034 }
19036 pm_node_list_t current_block_exits = { 0 };
19037 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19038
19039 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19040 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19041 }
19042
19043 parser_lex(parser);
19044 pm_token_t keyword = parser->previous;
19045
19046 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19047 pm_token_t opening = parser->previous;
19048 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19049
19050 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19051 pm_context_t context = parser->current_context->context;
19052 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19053 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19054 }
19055
19056 flush_block_exits(parser, previous_block_exits);
19057 pm_node_list_free(&current_block_exits);
19058
19059 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19060 }
19064 parser_lex(parser);
19065
19066 pm_token_t keyword = parser->previous;
19067 pm_arguments_t arguments = { 0 };
19068
19069 if (
19070 token_begins_expression_p(parser->current.type) ||
19071 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19072 ) {
19073 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19074
19075 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19076 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19077 }
19078 }
19079
19080 switch (keyword.type) {
19082 pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19083 if (!parser->partial_script) parse_block_exit(parser, node);
19084 return node;
19085 }
19086 case PM_TOKEN_KEYWORD_NEXT: {
19087 pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19088 if (!parser->partial_script) parse_block_exit(parser, node);
19089 return node;
19090 }
19092 pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19093 parse_return(parser, node);
19094 return node;
19095 }
19096 default:
19097 assert(false && "unreachable");
19098 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19099 }
19100 }
19102 parser_lex(parser);
19103
19104 pm_token_t keyword = parser->previous;
19105 pm_arguments_t arguments = { 0 };
19106 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19107
19108 if (
19109 arguments.opening_loc.start == NULL &&
19110 arguments.arguments == NULL &&
19111 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19112 ) {
19113 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19114 }
19115
19116 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19117 }
19119 parser_lex(parser);
19120
19121 pm_token_t keyword = parser->previous;
19122 pm_arguments_t arguments = { 0 };
19123 parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19124
19125 // It's possible that we've parsed a block argument through our
19126 // call to parse_arguments_list. If we found one, we should mark it
19127 // as invalid and destroy it, as we don't have a place for it on the
19128 // yield node.
19129 if (arguments.block != NULL) {
19130 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19131 pm_node_destroy(parser, arguments.block);
19132 arguments.block = NULL;
19133 }
19134
19135 pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19136 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19137
19138 return node;
19139 }
19141 size_t opening_newline_index = token_newline_index(parser);
19142 parser_lex(parser);
19143
19144 pm_token_t class_keyword = parser->previous;
19145 pm_do_loop_stack_push(parser, false);
19146
19147 pm_node_list_t current_block_exits = { 0 };
19148 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19149
19150 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19151 pm_token_t operator = parser->previous;
19152 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19153
19154 pm_parser_scope_push(parser, true);
19155 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19156 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19157 }
19158
19159 pm_node_t *statements = NULL;
19161 pm_accepts_block_stack_push(parser, true);
19162 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19163 pm_accepts_block_stack_pop(parser);
19164 }
19165
19166 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19167 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19168 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19169 } else {
19170 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19171 }
19172
19173 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19174
19175 pm_constant_id_list_t locals;
19176 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19177
19178 pm_parser_scope_pop(parser);
19179 pm_do_loop_stack_pop(parser);
19180
19181 flush_block_exits(parser, previous_block_exits);
19182 pm_node_list_free(&current_block_exits);
19183
19184 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19185 }
19186
19187 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19188 pm_token_t name = parser->previous;
19189 if (name.type != PM_TOKEN_CONSTANT) {
19190 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19191 }
19192
19193 pm_token_t inheritance_operator;
19194 pm_node_t *superclass;
19195
19196 if (match1(parser, PM_TOKEN_LESS)) {
19197 inheritance_operator = parser->current;
19198 lex_state_set(parser, PM_LEX_STATE_BEG);
19199
19200 parser->command_start = true;
19201 parser_lex(parser);
19202
19203 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19204 } else {
19205 inheritance_operator = not_provided(parser);
19206 superclass = NULL;
19207 }
19208
19209 pm_parser_scope_push(parser, true);
19210
19211 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19212 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19213 } else {
19214 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19215 }
19216 pm_node_t *statements = NULL;
19217
19219 pm_accepts_block_stack_push(parser, true);
19220 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19221 pm_accepts_block_stack_pop(parser);
19222 }
19223
19224 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19225 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19226 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19227 } else {
19228 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19229 }
19230
19231 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19232
19233 if (context_def_p(parser)) {
19234 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19235 }
19236
19237 pm_constant_id_list_t locals;
19238 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19239
19240 pm_parser_scope_pop(parser);
19241 pm_do_loop_stack_pop(parser);
19242
19243 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19244 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19245 }
19246
19247 pop_block_exits(parser, previous_block_exits);
19248 pm_node_list_free(&current_block_exits);
19249
19250 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19251 }
19252 case PM_TOKEN_KEYWORD_DEF: {
19253 pm_node_list_t current_block_exits = { 0 };
19254 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19255
19256 pm_token_t def_keyword = parser->current;
19257 size_t opening_newline_index = token_newline_index(parser);
19258
19259 pm_node_t *receiver = NULL;
19260 pm_token_t operator = not_provided(parser);
19261 pm_token_t name;
19262
19263 // This context is necessary for lexing `...` in a bare params
19264 // correctly. It must be pushed before lexing the first param, so it
19265 // is here.
19266 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19267 parser_lex(parser);
19268
19269 // This will be false if the method name is not a valid identifier
19270 // but could be followed by an operator.
19271 bool valid_name = true;
19272
19273 switch (parser->current.type) {
19274 case PM_CASE_OPERATOR:
19275 pm_parser_scope_push(parser, true);
19276 lex_state_set(parser, PM_LEX_STATE_ENDFN);
19277 parser_lex(parser);
19278
19279 name = parser->previous;
19280 break;
19281 case PM_TOKEN_IDENTIFIER: {
19282 parser_lex(parser);
19283
19284 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19285 receiver = parse_variable_call(parser);
19286
19287 pm_parser_scope_push(parser, true);
19288 lex_state_set(parser, PM_LEX_STATE_FNAME);
19289 parser_lex(parser);
19290
19291 operator = parser->previous;
19292 name = parse_method_definition_name(parser);
19293 } else {
19294 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19295 pm_parser_scope_push(parser, true);
19296
19297 name = parser->previous;
19298 }
19299
19300 break;
19301 }
19305 valid_name = false;
19306 /* fallthrough */
19307 case PM_TOKEN_CONSTANT:
19315 pm_parser_scope_push(parser, true);
19316 parser_lex(parser);
19317
19318 pm_token_t identifier = parser->previous;
19319
19320 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19321 lex_state_set(parser, PM_LEX_STATE_FNAME);
19322 parser_lex(parser);
19323 operator = parser->previous;
19324
19325 switch (identifier.type) {
19326 case PM_TOKEN_CONSTANT:
19327 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19328 break;
19330 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19331 break;
19333 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19334 break;
19336 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19337 break;
19339 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19340 break;
19342 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19343 break;
19345 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19346 break;
19348 receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19349 break;
19351 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19352 break;
19354 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19355 break;
19357 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19358 break;
19359 default:
19360 break;
19361 }
19362
19363 name = parse_method_definition_name(parser);
19364 } else {
19365 if (!valid_name) {
19366 PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19367 }
19368
19369 name = identifier;
19370 }
19371 break;
19372 }
19374 // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19375 // the inner expression of this parenthesis should not be
19376 // processed under this context. Thus, the context is popped
19377 // here.
19378 context_pop(parser);
19379 parser_lex(parser);
19380
19381 pm_token_t lparen = parser->previous;
19382 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19383
19384 accept1(parser, PM_TOKEN_NEWLINE);
19385 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19386 pm_token_t rparen = parser->previous;
19387
19388 lex_state_set(parser, PM_LEX_STATE_FNAME);
19389 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19390
19391 operator = parser->previous;
19392 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
19393
19394 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19395 // reason as described the above.
19396 pm_parser_scope_push(parser, true);
19397 context_push(parser, PM_CONTEXT_DEF_PARAMS);
19398 name = parse_method_definition_name(parser);
19399 break;
19400 }
19401 default:
19402 pm_parser_scope_push(parser, true);
19403 name = parse_method_definition_name(parser);
19404 break;
19405 }
19406
19407 pm_token_t lparen;
19408 pm_token_t rparen;
19409 pm_parameters_node_t *params;
19410
19411 switch (parser->current.type) {
19413 parser_lex(parser);
19414 lparen = parser->previous;
19415
19416 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19417 params = NULL;
19418 } else {
19419 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19420 }
19421
19422 lex_state_set(parser, PM_LEX_STATE_BEG);
19423 parser->command_start = true;
19424
19425 context_pop(parser);
19426 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19427 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19428 parser->previous.start = parser->previous.end;
19429 parser->previous.type = PM_TOKEN_MISSING;
19430 }
19431
19432 rparen = parser->previous;
19433 break;
19434 }
19435 case PM_CASE_PARAMETER: {
19436 // If we're about to lex a label, we need to add the label
19437 // state to make sure the next newline is ignored.
19438 if (parser->current.type == PM_TOKEN_LABEL) {
19439 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19440 }
19441
19442 lparen = not_provided(parser);
19443 rparen = not_provided(parser);
19444 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19445
19446 context_pop(parser);
19447 break;
19448 }
19449 default: {
19450 lparen = not_provided(parser);
19451 rparen = not_provided(parser);
19452 params = NULL;
19453
19454 context_pop(parser);
19455 break;
19456 }
19457 }
19458
19459 pm_node_t *statements = NULL;
19460 pm_token_t equal;
19461 pm_token_t end_keyword;
19462
19463 if (accept1(parser, PM_TOKEN_EQUAL)) {
19464 if (token_is_setter_name(&name)) {
19465 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19466 }
19467 equal = parser->previous;
19468
19469 context_push(parser, PM_CONTEXT_DEF);
19470 pm_do_loop_stack_push(parser, false);
19471 statements = (pm_node_t *) pm_statements_node_create(parser);
19472
19473 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19474
19475 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19476 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19477
19478 pm_token_t rescue_keyword = parser->previous;
19479 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19480 context_pop(parser);
19481
19482 statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19483 }
19484
19485 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19486 pm_do_loop_stack_pop(parser);
19487 context_pop(parser);
19488 end_keyword = not_provided(parser);
19489 } else {
19490 equal = not_provided(parser);
19491
19492 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19493 lex_state_set(parser, PM_LEX_STATE_BEG);
19494 parser->command_start = true;
19495 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19496 } else {
19497 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19498 }
19499
19500 pm_accepts_block_stack_push(parser, true);
19501 pm_do_loop_stack_push(parser, false);
19502
19504 pm_accepts_block_stack_push(parser, true);
19505 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19506 pm_accepts_block_stack_pop(parser);
19507 }
19508
19510 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19511 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19512 } else {
19513 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19514 }
19515
19516 pm_accepts_block_stack_pop(parser);
19517 pm_do_loop_stack_pop(parser);
19518
19519 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19520 end_keyword = parser->previous;
19521 }
19522
19523 pm_constant_id_list_t locals;
19524 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19525 pm_parser_scope_pop(parser);
19526
19532 pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19533
19534 flush_block_exits(parser, previous_block_exits);
19535 pm_node_list_free(&current_block_exits);
19536
19537 return (pm_node_t *) pm_def_node_create(
19538 parser,
19539 name_id,
19540 &name,
19541 receiver,
19542 params,
19543 statements,
19544 &locals,
19545 &def_keyword,
19546 &operator,
19547 &lparen,
19548 &rparen,
19549 &equal,
19550 &end_keyword
19551 );
19552 }
19554 parser_lex(parser);
19555 pm_token_t keyword = parser->previous;
19556
19557 pm_token_t lparen;
19558 pm_token_t rparen;
19559 pm_node_t *expression;
19560 context_push(parser, PM_CONTEXT_DEFINED);
19561
19562 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19563 lparen = parser->previous;
19564 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19565
19566 if (parser->recovering) {
19567 rparen = not_provided(parser);
19568 } else {
19569 accept1(parser, PM_TOKEN_NEWLINE);
19570 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19571 rparen = parser->previous;
19572 }
19573 } else {
19574 lparen = not_provided(parser);
19575 rparen = not_provided(parser);
19576 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19577 }
19578
19579 context_pop(parser);
19580 return (pm_node_t *) pm_defined_node_create(
19581 parser,
19582 &lparen,
19583 expression,
19584 &rparen,
19585 &PM_LOCATION_TOKEN_VALUE(&keyword)
19586 );
19587 }
19589 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19590 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19591 }
19592
19593 parser_lex(parser);
19594 pm_token_t keyword = parser->previous;
19595
19596 if (context_def_p(parser)) {
19597 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19598 }
19599
19600 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19601 pm_token_t opening = parser->previous;
19602 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19603
19604 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19605 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19606 }
19608 parser_lex(parser);
19609 return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19610 case PM_TOKEN_KEYWORD_FOR: {
19611 size_t opening_newline_index = token_newline_index(parser);
19612 parser_lex(parser);
19613
19614 pm_token_t for_keyword = parser->previous;
19615 pm_node_t *index;
19616
19617 context_push(parser, PM_CONTEXT_FOR_INDEX);
19618
19619 // First, parse out the first index expression.
19620 if (accept1(parser, PM_TOKEN_USTAR)) {
19621 pm_token_t star_operator = parser->previous;
19622 pm_node_t *name = NULL;
19623
19624 if (token_begins_expression_p(parser->current.type)) {
19625 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19626 }
19627
19628 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19629 } else if (token_begins_expression_p(parser->current.type)) {
19630 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19631 } else {
19632 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19633 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19634 }
19635
19636 // Now, if there are multiple index expressions, parse them out.
19637 if (match1(parser, PM_TOKEN_COMMA)) {
19638 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19639 } else {
19640 index = parse_target(parser, index, false, false);
19641 }
19642
19643 context_pop(parser);
19644 pm_do_loop_stack_push(parser, true);
19645
19646 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19647 pm_token_t in_keyword = parser->previous;
19648
19649 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19650 pm_do_loop_stack_pop(parser);
19651
19652 pm_token_t do_keyword;
19653 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19654 do_keyword = parser->previous;
19655 } else {
19656 do_keyword = not_provided(parser);
19657 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19658 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19659 }
19660 }
19661
19662 pm_statements_node_t *statements = NULL;
19663 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19664 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19665 }
19666
19667 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19668 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19669
19670 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19671 }
19673 if (parser_end_of_line_p(parser)) {
19674 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19675 }
19676
19677 size_t opening_newline_index = token_newline_index(parser);
19678 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19679 parser_lex(parser);
19680
19681 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19683 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19684 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19685 }
19686
19687 parser_lex(parser);
19688 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19689 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19690
19691 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19692 pm_node_destroy(parser, name);
19693 } else {
19694 pm_undef_node_append(undef, name);
19695
19696 while (match1(parser, PM_TOKEN_COMMA)) {
19697 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19698 parser_lex(parser);
19699 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19700
19701 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19702 pm_node_destroy(parser, name);
19703 break;
19704 }
19705
19706 pm_undef_node_append(undef, name);
19707 }
19708 }
19709
19710 return (pm_node_t *) undef;
19711 }
19712 case PM_TOKEN_KEYWORD_NOT: {
19713 parser_lex(parser);
19714
19715 pm_token_t message = parser->previous;
19716 pm_arguments_t arguments = { 0 };
19717 pm_node_t *receiver = NULL;
19718
19719 accept1(parser, PM_TOKEN_NEWLINE);
19720
19721 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19722 pm_token_t lparen = parser->previous;
19723
19724 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19725 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous);
19726 } else {
19727 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
19728 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19729
19730 if (!parser->recovering) {
19731 accept1(parser, PM_TOKEN_NEWLINE);
19732 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19733 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19734 }
19735 }
19736 } else {
19737 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19738 }
19739
19740 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19741 }
19743 size_t opening_newline_index = token_newline_index(parser);
19744 parser_lex(parser);
19745
19746 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19747 }
19749 pm_node_list_t current_block_exits = { 0 };
19750 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19751
19752 size_t opening_newline_index = token_newline_index(parser);
19753 parser_lex(parser);
19754 pm_token_t module_keyword = parser->previous;
19755
19756 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19757 pm_token_t name;
19758
19759 // If we can recover from a syntax error that occurred while parsing
19760 // the name of the module, then we'll handle that here.
19761 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19762 pop_block_exits(parser, previous_block_exits);
19763 pm_node_list_free(&current_block_exits);
19764
19765 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19766 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19767 }
19768
19769 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19770 pm_token_t double_colon = parser->previous;
19771
19772 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19773 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19774 }
19775
19776 // Here we retrieve the name of the module. If it wasn't a constant,
19777 // then it's possible that `module foo` was passed, which is a
19778 // syntax error. We handle that here as well.
19779 name = parser->previous;
19780 if (name.type != PM_TOKEN_CONSTANT) {
19781 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19782 }
19783
19784 pm_parser_scope_push(parser, true);
19785 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19786 pm_node_t *statements = NULL;
19787
19789 pm_accepts_block_stack_push(parser, true);
19790 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19791 pm_accepts_block_stack_pop(parser);
19792 }
19793
19795 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19796 statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19797 } else {
19798 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19799 }
19800
19801 pm_constant_id_list_t locals;
19802 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19803
19804 pm_parser_scope_pop(parser);
19805 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19806
19807 if (context_def_p(parser)) {
19808 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19809 }
19810
19811 pop_block_exits(parser, previous_block_exits);
19812 pm_node_list_free(&current_block_exits);
19813
19814 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19815 }
19817 parser_lex(parser);
19818 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19819 case PM_TOKEN_KEYWORD_REDO: {
19820 parser_lex(parser);
19821
19822 pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19823 if (!parser->partial_script) parse_block_exit(parser, node);
19824
19825 return node;
19826 }
19828 parser_lex(parser);
19829
19830 pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19831 parse_retry(parser, node);
19832
19833 return node;
19834 }
19836 parser_lex(parser);
19837 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19839 parser_lex(parser);
19840 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19842 size_t opening_newline_index = token_newline_index(parser);
19843
19844 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19845 pm_do_loop_stack_push(parser, true);
19846
19847 parser_lex(parser);
19848 pm_token_t keyword = parser->previous;
19849 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19850
19851 pm_do_loop_stack_pop(parser);
19852 context_pop(parser);
19853
19854 pm_token_t do_keyword;
19855 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19856 do_keyword = parser->previous;
19857 } else {
19858 do_keyword = not_provided(parser);
19859 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19860 }
19861
19862 pm_statements_node_t *statements = NULL;
19863 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19864 pm_accepts_block_stack_push(parser, true);
19865 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19866 pm_accepts_block_stack_pop(parser);
19867 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19868 }
19869
19870 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19871 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19872
19873 return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19874 }
19876 size_t opening_newline_index = token_newline_index(parser);
19877
19878 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19879 pm_do_loop_stack_push(parser, true);
19880
19881 parser_lex(parser);
19882 pm_token_t keyword = parser->previous;
19883 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19884
19885 pm_do_loop_stack_pop(parser);
19886 context_pop(parser);
19887
19888 pm_token_t do_keyword;
19889 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19890 do_keyword = parser->previous;
19891 } else {
19892 do_keyword = not_provided(parser);
19893 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19894 }
19895
19896 pm_statements_node_t *statements = NULL;
19897 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19898 pm_accepts_block_stack_push(parser, true);
19899 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19900 pm_accepts_block_stack_pop(parser);
19901 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19902 }
19903
19904 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19905 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19906
19907 return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19908 }
19910 parser_lex(parser);
19911 pm_token_t opening = parser->previous;
19912 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19913
19914 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19915 accept1(parser, PM_TOKEN_WORDS_SEP);
19916 if (match1(parser, PM_TOKEN_STRING_END)) break;
19917
19918 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19919 pm_token_t opening = not_provided(parser);
19920 pm_token_t closing = not_provided(parser);
19921 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19922 }
19923
19924 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19925 }
19926
19927 pm_token_t closing = parser->current;
19928 if (match1(parser, PM_TOKEN_EOF)) {
19929 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19930 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19931 } else {
19932 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19933 }
19934 pm_array_node_close_set(array, &closing);
19935
19936 return (pm_node_t *) array;
19937 }
19939 parser_lex(parser);
19940 pm_token_t opening = parser->previous;
19941 pm_array_node_t *array = pm_array_node_create(parser, &opening);
19942
19943 // This is the current node that we are parsing that will be added to the
19944 // list of elements.
19945 pm_node_t *current = NULL;
19946
19947 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19948 switch (parser->current.type) {
19949 case PM_TOKEN_WORDS_SEP: {
19950 if (current == NULL) {
19951 // If we hit a separator before we have any content, then we don't
19952 // need to do anything.
19953 } else {
19954 // If we hit a separator after we've hit content, then we need to
19955 // append that content to the list and reset the current node.
19956 pm_array_node_elements_append(array, current);
19957 current = NULL;
19958 }
19959
19960 parser_lex(parser);
19961 break;
19962 }
19964 pm_token_t opening = not_provided(parser);
19965 pm_token_t closing = not_provided(parser);
19966
19967 if (current == NULL) {
19968 // If we hit content and the current node is NULL, then this is
19969 // the first string content we've seen. In that case we're going
19970 // to create a new string node and set that to the current.
19971 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
19972 parser_lex(parser);
19973 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19974 // If we hit string content and the current node is an
19975 // interpolated string, then we need to append the string content
19976 // to the list of child nodes.
19977 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
19978 parser_lex(parser);
19979
19980 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19981 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19982 // If we hit string content and the current node is a symbol node,
19983 // then we need to convert the current node into an interpolated
19984 // string and add the string content to the list of child nodes.
19985 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19986 pm_token_t bounds = not_provided(parser);
19987
19988 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19989 pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
19990 pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
19991 parser_lex(parser);
19992
19993 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19994 pm_interpolated_symbol_node_append(interpolated, first_string);
19995 pm_interpolated_symbol_node_append(interpolated, second_string);
19996
19997 xfree(current);
19998 current = (pm_node_t *) interpolated;
19999 } else {
20000 assert(false && "unreachable");
20001 }
20002
20003 break;
20004 }
20005 case PM_TOKEN_EMBVAR: {
20006 bool start_location_set = false;
20007 if (current == NULL) {
20008 // If we hit an embedded variable and the current node is NULL,
20009 // then this is the start of a new string. We'll set the current
20010 // node to a new interpolated string.
20011 pm_token_t opening = not_provided(parser);
20012 pm_token_t closing = not_provided(parser);
20013 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20014 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20015 // If we hit an embedded variable and the current node is a string
20016 // node, then we'll convert the current into an interpolated
20017 // string and add the string node to the list of parts.
20018 pm_token_t opening = not_provided(parser);
20019 pm_token_t closing = not_provided(parser);
20020 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20021
20022 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20023 pm_interpolated_symbol_node_append(interpolated, current);
20024 interpolated->base.location.start = current->location.start;
20025 start_location_set = true;
20026 current = (pm_node_t *) interpolated;
20027 } else {
20028 // If we hit an embedded variable and the current node is an
20029 // interpolated string, then we'll just add the embedded variable.
20030 }
20031
20032 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20033 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20034 if (!start_location_set) {
20035 current->location.start = part->location.start;
20036 }
20037 break;
20038 }
20040 bool start_location_set = false;
20041 if (current == NULL) {
20042 // If we hit an embedded expression and the current node is NULL,
20043 // then this is the start of a new string. We'll set the current
20044 // node to a new interpolated string.
20045 pm_token_t opening = not_provided(parser);
20046 pm_token_t closing = not_provided(parser);
20047 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20048 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20049 // If we hit an embedded expression and the current node is a
20050 // string node, then we'll convert the current into an
20051 // interpolated string and add the string node to the list of
20052 // parts.
20053 pm_token_t opening = not_provided(parser);
20054 pm_token_t closing = not_provided(parser);
20055 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20056
20057 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20058 pm_interpolated_symbol_node_append(interpolated, current);
20059 interpolated->base.location.start = current->location.start;
20060 start_location_set = true;
20061 current = (pm_node_t *) interpolated;
20062 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20063 // If we hit an embedded expression and the current node is an
20064 // interpolated string, then we'll just continue on.
20065 } else {
20066 assert(false && "unreachable");
20067 }
20068
20069 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20070 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20071 if (!start_location_set) {
20072 current->location.start = part->location.start;
20073 }
20074 break;
20075 }
20076 default:
20077 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20078 parser_lex(parser);
20079 break;
20080 }
20081 }
20082
20083 // If we have a current node, then we need to append it to the list.
20084 if (current) {
20085 pm_array_node_elements_append(array, current);
20086 }
20087
20088 pm_token_t closing = parser->current;
20089 if (match1(parser, PM_TOKEN_EOF)) {
20090 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20091 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20092 } else {
20093 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20094 }
20095 pm_array_node_close_set(array, &closing);
20096
20097 return (pm_node_t *) array;
20098 }
20100 parser_lex(parser);
20101 pm_token_t opening = parser->previous;
20102 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20103
20104 // skip all leading whitespaces
20105 accept1(parser, PM_TOKEN_WORDS_SEP);
20106
20107 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20108 accept1(parser, PM_TOKEN_WORDS_SEP);
20109 if (match1(parser, PM_TOKEN_STRING_END)) break;
20110
20111 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20112 pm_token_t opening = not_provided(parser);
20113 pm_token_t closing = not_provided(parser);
20114
20115 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20116 pm_array_node_elements_append(array, string);
20117 }
20118
20119 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20120 }
20121
20122 pm_token_t closing = parser->current;
20123 if (match1(parser, PM_TOKEN_EOF)) {
20124 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20125 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20126 } else {
20127 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20128 }
20129
20130 pm_array_node_close_set(array, &closing);
20131 return (pm_node_t *) array;
20132 }
20134 parser_lex(parser);
20135 pm_token_t opening = parser->previous;
20136 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20137
20138 // This is the current node that we are parsing that will be added
20139 // to the list of elements.
20140 pm_node_t *current = NULL;
20141
20142 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20143 switch (parser->current.type) {
20144 case PM_TOKEN_WORDS_SEP: {
20145 // Reset the explicit encoding if we hit a separator
20146 // since each element can have its own encoding.
20147 parser->explicit_encoding = NULL;
20148
20149 if (current == NULL) {
20150 // If we hit a separator before we have any content,
20151 // then we don't need to do anything.
20152 } else {
20153 // If we hit a separator after we've hit content,
20154 // then we need to append that content to the list
20155 // and reset the current node.
20156 pm_array_node_elements_append(array, current);
20157 current = NULL;
20158 }
20159
20160 parser_lex(parser);
20161 break;
20162 }
20164 pm_token_t opening = not_provided(parser);
20165 pm_token_t closing = not_provided(parser);
20166
20167 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20168 pm_node_flag_set(string, parse_unescaped_encoding(parser));
20169 parser_lex(parser);
20170
20171 if (current == NULL) {
20172 // If we hit content and the current node is NULL,
20173 // then this is the first string content we've seen.
20174 // In that case we're going to create a new string
20175 // node and set that to the current.
20176 current = string;
20177 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20178 // If we hit string content and the current node is
20179 // an interpolated string, then we need to append
20180 // the string content to the list of child nodes.
20181 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20182 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20183 // If we hit string content and the current node is
20184 // a string node, then we need to convert the
20185 // current node into an interpolated string and add
20186 // the string content to the list of child nodes.
20187 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20188 pm_interpolated_string_node_append(interpolated, current);
20189 pm_interpolated_string_node_append(interpolated, string);
20190 current = (pm_node_t *) interpolated;
20191 } else {
20192 assert(false && "unreachable");
20193 }
20194
20195 break;
20196 }
20197 case PM_TOKEN_EMBVAR: {
20198 if (current == NULL) {
20199 // If we hit an embedded variable and the current
20200 // node is NULL, then this is the start of a new
20201 // string. We'll set the current node to a new
20202 // interpolated string.
20203 pm_token_t opening = not_provided(parser);
20204 pm_token_t closing = not_provided(parser);
20205 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20206 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20207 // If we hit an embedded variable and the current
20208 // node is a string node, then we'll convert the
20209 // current into an interpolated string and add the
20210 // string node to the list of parts.
20211 pm_token_t opening = not_provided(parser);
20212 pm_token_t closing = not_provided(parser);
20213 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20214 pm_interpolated_string_node_append(interpolated, current);
20215 current = (pm_node_t *) interpolated;
20216 } else {
20217 // If we hit an embedded variable and the current
20218 // node is an interpolated string, then we'll just
20219 // add the embedded variable.
20220 }
20221
20222 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20223 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20224 break;
20225 }
20227 if (current == NULL) {
20228 // If we hit an embedded expression and the current
20229 // node is NULL, then this is the start of a new
20230 // string. We'll set the current node to a new
20231 // interpolated string.
20232 pm_token_t opening = not_provided(parser);
20233 pm_token_t closing = not_provided(parser);
20234 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20235 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20236 // If we hit an embedded expression and the current
20237 // node is a string node, then we'll convert the
20238 // current into an interpolated string and add the
20239 // string node to the list of parts.
20240 pm_token_t opening = not_provided(parser);
20241 pm_token_t closing = not_provided(parser);
20242 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20243 pm_interpolated_string_node_append(interpolated, current);
20244 current = (pm_node_t *) interpolated;
20245 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20246 // If we hit an embedded expression and the current
20247 // node is an interpolated string, then we'll just
20248 // continue on.
20249 } else {
20250 assert(false && "unreachable");
20251 }
20252
20253 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20254 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20255 break;
20256 }
20257 default:
20258 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20259 parser_lex(parser);
20260 break;
20261 }
20262 }
20263
20264 // If we have a current node, then we need to append it to the list.
20265 if (current) {
20266 pm_array_node_elements_append(array, current);
20267 }
20268
20269 pm_token_t closing = parser->current;
20270 if (match1(parser, PM_TOKEN_EOF)) {
20271 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20272 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20273 } else {
20274 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20275 }
20276
20277 pm_array_node_close_set(array, &closing);
20278 return (pm_node_t *) array;
20279 }
20280 case PM_TOKEN_REGEXP_BEGIN: {
20281 pm_token_t opening = parser->current;
20282 parser_lex(parser);
20283
20284 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20285 // If we get here, then we have an end immediately after a start. In
20286 // that case we'll create an empty content token and return an
20287 // uninterpolated regular expression.
20288 pm_token_t content = (pm_token_t) {
20290 .start = parser->previous.end,
20291 .end = parser->previous.end
20292 };
20293
20294 parser_lex(parser);
20295
20296 pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20298
20299 return node;
20300 }
20301
20303
20304 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20305 // In this case we've hit string content so we know the regular
20306 // expression at least has something in it. We'll need to check if the
20307 // following token is the end (in which case we can return a plain
20308 // regular expression) or if it's not then it has interpolation.
20309 pm_string_t unescaped = parser->current_string;
20310 pm_token_t content = parser->current;
20311 bool ascii_only = parser->current_regular_expression_ascii_only;
20312 parser_lex(parser);
20313
20314 // If we hit an end, then we can create a regular expression
20315 // node without interpolation, which can be represented more
20316 // succinctly and more easily compiled.
20317 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20318 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20319
20320 // If we're not immediately followed by a =~, then we want
20321 // to parse all of the errors at this point. If it is
20322 // followed by a =~, then it will get parsed higher up while
20323 // parsing the named captures as well.
20324 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20325 parse_regular_expression_errors(parser, node);
20326 }
20327
20328 pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20329 return (pm_node_t *) node;
20330 }
20331
20332 // If we get here, then we have interpolation so we'll need to create
20333 // a regular expression node with interpolation.
20334 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20335
20336 pm_token_t opening = not_provided(parser);
20337 pm_token_t closing = not_provided(parser);
20338 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20339
20340 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20341 // This is extremely strange, but the first string part of a
20342 // regular expression will always be tagged as binary if we
20343 // are in a US-ASCII file, no matter its contents.
20344 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20345 }
20346
20347 pm_interpolated_regular_expression_node_append(interpolated, part);
20348 } else {
20349 // If the first part of the body of the regular expression is not a
20350 // string content, then we have interpolation and we need to create an
20351 // interpolated regular expression node.
20352 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20353 }
20354
20355 // Now that we're here and we have interpolation, we'll parse all of the
20356 // parts into the list.
20357 pm_node_t *part;
20358 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20359 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20360 pm_interpolated_regular_expression_node_append(interpolated, part);
20361 }
20362 }
20363
20364 pm_token_t closing = parser->current;
20365 if (match1(parser, PM_TOKEN_EOF)) {
20366 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20367 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20368 } else {
20369 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20370 }
20371
20372 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20373 return (pm_node_t *) interpolated;
20374 }
20375 case PM_TOKEN_BACKTICK:
20377 parser_lex(parser);
20378 pm_token_t opening = parser->previous;
20379
20380 // When we get here, we don't know if this string is going to have
20381 // interpolation or not, even though it is allowed. Still, we want to be
20382 // able to return a string node without interpolation if we can since
20383 // it'll be faster.
20384 if (match1(parser, PM_TOKEN_STRING_END)) {
20385 // If we get here, then we have an end immediately after a start. In
20386 // that case we'll create an empty content token and return an
20387 // uninterpolated string.
20388 pm_token_t content = (pm_token_t) {
20390 .start = parser->previous.end,
20391 .end = parser->previous.end
20392 };
20393
20394 parser_lex(parser);
20395 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20396 }
20397
20399
20400 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20401 // In this case we've hit string content so we know the string
20402 // at least has something in it. We'll need to check if the
20403 // following token is the end (in which case we can return a
20404 // plain string) or if it's not then it has interpolation.
20405 pm_string_t unescaped = parser->current_string;
20406 pm_token_t content = parser->current;
20407 parser_lex(parser);
20408
20409 if (match1(parser, PM_TOKEN_STRING_END)) {
20410 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20411 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20412 parser_lex(parser);
20413 return node;
20414 }
20415
20416 // If we get here, then we have interpolation so we'll need to
20417 // create a string node with interpolation.
20418 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20419
20420 pm_token_t opening = not_provided(parser);
20421 pm_token_t closing = not_provided(parser);
20422
20423 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20424 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20425
20426 pm_interpolated_xstring_node_append(node, part);
20427 } else {
20428 // If the first part of the body of the string is not a string
20429 // content, then we have interpolation and we need to create an
20430 // interpolated string node.
20431 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20432 }
20433
20434 pm_node_t *part;
20435 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20436 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20437 pm_interpolated_xstring_node_append(node, part);
20438 }
20439 }
20440
20441 pm_token_t closing = parser->current;
20442 if (match1(parser, PM_TOKEN_EOF)) {
20443 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20444 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20445 } else {
20446 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20447 }
20448 pm_interpolated_xstring_node_closing_set(node, &closing);
20449
20450 return (pm_node_t *) node;
20451 }
20452 case PM_TOKEN_USTAR: {
20453 parser_lex(parser);
20454
20455 // * operators at the beginning of expressions are only valid in the
20456 // context of a multiple assignment. We enforce that here. We'll
20457 // still lex past it though and create a missing node place.
20458 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20459 pm_parser_err_prefix(parser, diag_id);
20460 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20461 }
20462
20463 pm_token_t operator = parser->previous;
20464 pm_node_t *name = NULL;
20465
20466 if (token_begins_expression_p(parser->current.type)) {
20467 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20468 }
20469
20470 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20471
20472 if (match1(parser, PM_TOKEN_COMMA)) {
20473 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20474 } else {
20475 return parse_target_validate(parser, splat, true);
20476 }
20477 }
20478 case PM_TOKEN_BANG: {
20479 if (binding_power > PM_BINDING_POWER_UNARY) {
20480 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20481 }
20482
20483 parser_lex(parser);
20484
20485 pm_token_t operator = parser->previous;
20486 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20487 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20488
20489 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20490 return (pm_node_t *) node;
20491 }
20492 case PM_TOKEN_TILDE: {
20493 if (binding_power > PM_BINDING_POWER_UNARY) {
20494 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20495 }
20496 parser_lex(parser);
20497
20498 pm_token_t operator = parser->previous;
20499 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20500 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20501
20502 return (pm_node_t *) node;
20503 }
20504 case PM_TOKEN_UMINUS: {
20505 if (binding_power > PM_BINDING_POWER_UNARY) {
20506 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20507 }
20508 parser_lex(parser);
20509
20510 pm_token_t operator = parser->previous;
20511 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20512 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20513
20514 return (pm_node_t *) node;
20515 }
20516 case PM_TOKEN_UMINUS_NUM: {
20517 parser_lex(parser);
20518
20519 pm_token_t operator = parser->previous;
20520 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20521
20522 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20523 pm_token_t exponent_operator = parser->previous;
20524 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20525 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20526 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20527 } else {
20528 switch (PM_NODE_TYPE(node)) {
20529 case PM_INTEGER_NODE:
20530 case PM_FLOAT_NODE:
20531 case PM_RATIONAL_NODE:
20532 case PM_IMAGINARY_NODE:
20533 parse_negative_numeric(node);
20534 break;
20535 default:
20536 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20537 break;
20538 }
20539 }
20540
20541 return node;
20542 }
20544 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20546
20547 size_t opening_newline_index = token_newline_index(parser);
20548 pm_accepts_block_stack_push(parser, true);
20549 parser_lex(parser);
20550
20551 pm_token_t operator = parser->previous;
20552 pm_parser_scope_push(parser, false);
20553
20554 pm_block_parameters_node_t *block_parameters;
20555
20556 switch (parser->current.type) {
20558 pm_token_t opening = parser->current;
20559 parser_lex(parser);
20560
20561 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20562 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20563 } else {
20564 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20565 }
20566
20567 accept1(parser, PM_TOKEN_NEWLINE);
20568 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20569
20570 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20571 break;
20572 }
20573 case PM_CASE_PARAMETER: {
20574 pm_accepts_block_stack_push(parser, false);
20575 pm_token_t opening = not_provided(parser);
20576 block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20577 pm_accepts_block_stack_pop(parser);
20578 break;
20579 }
20580 default: {
20581 block_parameters = NULL;
20582 break;
20583 }
20584 }
20585
20586 pm_token_t opening;
20587 pm_node_t *body = NULL;
20588 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20589
20590 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20591 opening = parser->previous;
20592
20593 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20594 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20595 }
20596
20597 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20598 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20599 } else {
20600 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20601 opening = parser->previous;
20602
20604 pm_accepts_block_stack_push(parser, true);
20605 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20606 pm_accepts_block_stack_pop(parser);
20607 }
20608
20609 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20610 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20611 body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20612 } else {
20613 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20614 }
20615
20616 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20617 }
20618
20619 pm_constant_id_list_t locals;
20620 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20621 pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20622
20623 pm_parser_scope_pop(parser);
20624 pm_accepts_block_stack_pop(parser);
20625
20626 return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20627 }
20628 case PM_TOKEN_UPLUS: {
20629 if (binding_power > PM_BINDING_POWER_UNARY) {
20630 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20631 }
20632 parser_lex(parser);
20633
20634 pm_token_t operator = parser->previous;
20635 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20636 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20637
20638 return (pm_node_t *) node;
20639 }
20641 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20642 case PM_TOKEN_SYMBOL_BEGIN: {
20643 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20644 parser_lex(parser);
20645
20646 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20647 }
20648 default: {
20649 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20650
20651 if (recoverable != PM_CONTEXT_NONE) {
20652 parser->recovering = true;
20653
20654 // If the given error is not the generic one, then we'll add it
20655 // here because it will provide more context in addition to the
20656 // recoverable error that we will also add.
20657 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20658 pm_parser_err_prefix(parser, diag_id);
20659 }
20660
20661 // If we get here, then we are assuming this token is closing a
20662 // parent context, so we'll indicate that to the user so that
20663 // they know how we behaved.
20664 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20665 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20666 // We're going to make a special case here, because "cannot
20667 // parse expression" is pretty generic, and we know here that we
20668 // have an unexpected token.
20669 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20670 } else {
20671 pm_parser_err_prefix(parser, diag_id);
20672 }
20673
20674 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20675 }
20676 }
20677}
20678
20688static pm_node_t *
20689parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20690 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20691
20692 // Contradicting binding powers, the right-hand-side value of the assignment
20693 // allows the `rescue` modifier.
20694 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20695 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20696
20697 pm_token_t rescue = parser->current;
20698 parser_lex(parser);
20699
20700 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20701 context_pop(parser);
20702
20703 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20704 }
20705
20706 return value;
20707}
20708
20713static void
20714parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20715 switch (PM_NODE_TYPE(node)) {
20716 case PM_BEGIN_NODE: {
20717 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20718 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20719 break;
20720 }
20723 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20724 break;
20725 }
20726 case PM_PARENTHESES_NODE: {
20727 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20728 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20729 break;
20730 }
20731 case PM_STATEMENTS_NODE: {
20732 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20733 const pm_node_t *statement;
20734
20735 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20736 parse_assignment_value_local(parser, statement);
20737 }
20738 break;
20739 }
20740 default:
20741 break;
20742 }
20743}
20744
20757static pm_node_t *
20758parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20759 bool permitted = true;
20760 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20761
20762 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20763 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20764
20765 parse_assignment_value_local(parser, value);
20766 bool single_value = true;
20767
20768 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20769 single_value = false;
20770
20771 pm_token_t opening = not_provided(parser);
20772 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20773
20774 pm_array_node_elements_append(array, value);
20775 value = (pm_node_t *) array;
20776
20777 while (accept1(parser, PM_TOKEN_COMMA)) {
20778 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20779
20780 pm_array_node_elements_append(array, element);
20781 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20782
20783 parse_assignment_value_local(parser, element);
20784 }
20785 }
20786
20787 // Contradicting binding powers, the right-hand-side value of the assignment
20788 // allows the `rescue` modifier.
20789 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20790 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20791
20792 pm_token_t rescue = parser->current;
20793 parser_lex(parser);
20794
20795 bool accepts_command_call_inner = false;
20796
20797 // RHS can accept command call iff the value is a call with arguments
20798 // but without parenthesis.
20799 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20800 pm_call_node_t *call_node = (pm_call_node_t *) value;
20801 if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20802 accepts_command_call_inner = true;
20803 }
20804 }
20805
20806 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20807 context_pop(parser);
20808
20809 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20810 }
20811
20812 return value;
20813}
20814
20822static void
20823parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20824 if (call_node->arguments != NULL) {
20825 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20826 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20827 call_node->arguments = NULL;
20828 }
20829
20830 if (call_node->block != NULL) {
20831 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20832 pm_node_destroy(parser, (pm_node_t *) call_node->block);
20833 call_node->block = NULL;
20834 }
20835}
20836
20861
20866static void
20867parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20869
20870 pm_parser_t *parser = callback_data->parser;
20871 pm_call_node_t *call = callback_data->call;
20872 pm_constant_id_list_t *names = &callback_data->names;
20873
20874 const uint8_t *source = pm_string_source(capture);
20875 size_t length = pm_string_length(capture);
20876
20877 pm_location_t location;
20878 pm_constant_id_t name;
20879
20880 // If the name of the capture group isn't a valid identifier, we do
20881 // not add it to the local table.
20882 if (!pm_slice_is_valid_local(parser, source, source + length)) return;
20883
20884 if (callback_data->shared) {
20885 // If the unescaped string is a slice of the source, then we can
20886 // copy the names directly. The pointers will line up.
20887 location = (pm_location_t) { .start = source, .end = source + length };
20888 name = pm_parser_constant_id_location(parser, location.start, location.end);
20889 } else {
20890 // Otherwise, the name is a slice of the malloc-ed owned string,
20891 // in which case we need to copy it out into a new string.
20892 location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20893
20894 void *memory = xmalloc(length);
20895 if (memory == NULL) abort();
20896
20897 memcpy(memory, source, length);
20898 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20899 }
20900
20901 // Add this name to the list of constants if it is valid, not duplicated,
20902 // and not a keyword.
20903 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20904 pm_constant_id_list_append(names, name);
20905
20906 int depth;
20907 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20908 // If the local is not already a local but it is a keyword, then we
20909 // do not want to add a capture for this.
20910 if (pm_local_is_keyword((const char *) source, length)) return;
20911
20912 // If the identifier is not already a local, then we will add it to
20913 // the local table.
20914 pm_parser_local_add(parser, name, location.start, location.end, 0);
20915 }
20916
20917 // Here we lazily create the MatchWriteNode since we know we're
20918 // about to add a target.
20919 if (callback_data->match == NULL) {
20920 callback_data->match = pm_match_write_node_create(parser, call);
20921 }
20922
20923 // Next, create the local variable target and add it to the list of
20924 // targets for the match.
20925 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20926 pm_node_list_append(&callback_data->match->targets, target);
20927 }
20928}
20929
20934static pm_node_t *
20935parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20937 .parser = parser,
20938 .call = call,
20939 .names = { 0 },
20940 .shared = content->type == PM_STRING_SHARED
20941 };
20942
20944 .parser = parser,
20945 .start = call->receiver->location.start,
20946 .end = call->receiver->location.end,
20947 .shared = content->type == PM_STRING_SHARED
20948 };
20949
20950 pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20951 pm_constant_id_list_free(&callback_data.names);
20952
20953 if (callback_data.match != NULL) {
20954 return (pm_node_t *) callback_data.match;
20955 } else {
20956 return (pm_node_t *) call;
20957 }
20958}
20959
20960static inline pm_node_t *
20961parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20962 pm_token_t token = parser->current;
20963
20964 switch (token.type) {
20965 case PM_TOKEN_EQUAL: {
20966 switch (PM_NODE_TYPE(node)) {
20967 case PM_CALL_NODE: {
20968 // If we have no arguments to the call node and we need this
20969 // to be a target then this is either a method call or a
20970 // local variable write. This _must_ happen before the value
20971 // is parsed because it could be referenced in the value.
20972 pm_call_node_t *call_node = (pm_call_node_t *) node;
20974 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20975 }
20976 }
20977 /* fallthrough */
20978 case PM_CASE_WRITABLE: {
20979 parser_lex(parser);
20980 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20981
20982 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20983 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20984 }
20985
20986 return parse_write(parser, node, &token, value);
20987 }
20988 case PM_SPLAT_NODE: {
20989 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20990 pm_multi_target_node_targets_append(parser, multi_target, node);
20991
20992 parser_lex(parser);
20993 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20994 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
20995 }
20997 case PM_FALSE_NODE:
21000 case PM_NIL_NODE:
21001 case PM_SELF_NODE:
21002 case PM_TRUE_NODE: {
21003 // In these special cases, we have specific error messages
21004 // and we will replace them with local variable writes.
21005 parser_lex(parser);
21006 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21007 return parse_unwriteable_write(parser, node, &token, value);
21008 }
21009 default:
21010 // In this case we have an = sign, but we don't know what
21011 // it's for. We need to treat it as an error. We'll mark it
21012 // as an error and skip past it.
21013 parser_lex(parser);
21014 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21015 return node;
21016 }
21017 }
21019 switch (PM_NODE_TYPE(node)) {
21022 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21023 /* fallthrough */
21025 parser_lex(parser);
21026
21027 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21028 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
21029
21030 pm_node_destroy(parser, node);
21031 return result;
21032 }
21034 parser_lex(parser);
21035
21036 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21037 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21038
21039 pm_node_destroy(parser, node);
21040 return result;
21041 }
21042 case PM_CONSTANT_PATH_NODE: {
21043 parser_lex(parser);
21044
21045 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21046 pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21047
21048 return parse_shareable_constant_write(parser, write);
21049 }
21050 case PM_CONSTANT_READ_NODE: {
21051 parser_lex(parser);
21052
21053 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21054 pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21055
21056 pm_node_destroy(parser, node);
21057 return parse_shareable_constant_write(parser, write);
21058 }
21060 parser_lex(parser);
21061
21062 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21063 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21064
21065 pm_node_destroy(parser, node);
21066 return result;
21067 }
21069 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21070 parser_lex(parser);
21071
21072 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21073 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
21074
21075 parse_target_implicit_parameter(parser, node);
21076 pm_node_destroy(parser, node);
21077 return result;
21078 }
21080 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21081 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21082 parse_target_implicit_parameter(parser, node);
21083 }
21084
21086 parser_lex(parser);
21087
21088 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21089 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21090
21091 pm_node_destroy(parser, node);
21092 return result;
21093 }
21094 case PM_CALL_NODE: {
21095 pm_call_node_t *cast = (pm_call_node_t *) node;
21096
21097 // If we have a vcall (a method with no arguments and no
21098 // receiver that could have been a local variable) then we
21099 // will transform it into a local variable write.
21101 pm_location_t *message_loc = &cast->message_loc;
21102 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21103
21104 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21105 parser_lex(parser);
21106
21107 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21108 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21109
21110 pm_node_destroy(parser, (pm_node_t *) cast);
21111 return result;
21112 }
21113
21114 // Move past the token here so that we have already added
21115 // the local variable by this point.
21116 parser_lex(parser);
21117
21118 // If there is no call operator and the message is "[]" then
21119 // this is an aref expression, and we can transform it into
21120 // an aset expression.
21121 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21122 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21123 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21124 }
21125
21126 // If this node cannot be writable, then we have an error.
21127 if (pm_call_node_writable_p(parser, cast)) {
21128 parse_write_name(parser, &cast->name);
21129 } else {
21130 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21131 }
21132
21133 parse_call_operator_write(parser, cast, &token);
21134 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21135 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21136 }
21137 case PM_MULTI_WRITE_NODE: {
21138 parser_lex(parser);
21139 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21140 return node;
21141 }
21142 default:
21143 parser_lex(parser);
21144
21145 // In this case we have an &&= sign, but we don't know what it's for.
21146 // We need to treat it as an error. For now, we'll mark it as an error
21147 // and just skip right past it.
21148 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21149 return node;
21150 }
21151 }
21153 switch (PM_NODE_TYPE(node)) {
21156 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21157 /* fallthrough */
21159 parser_lex(parser);
21160
21161 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21162 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21163
21164 pm_node_destroy(parser, node);
21165 return result;
21166 }
21168 parser_lex(parser);
21169
21170 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21171 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21172
21173 pm_node_destroy(parser, node);
21174 return result;
21175 }
21176 case PM_CONSTANT_PATH_NODE: {
21177 parser_lex(parser);
21178
21179 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21180 pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21181
21182 return parse_shareable_constant_write(parser, write);
21183 }
21184 case PM_CONSTANT_READ_NODE: {
21185 parser_lex(parser);
21186
21187 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21188 pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21189
21190 pm_node_destroy(parser, node);
21191 return parse_shareable_constant_write(parser, write);
21192 }
21194 parser_lex(parser);
21195
21196 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21197 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21198
21199 pm_node_destroy(parser, node);
21200 return result;
21201 }
21203 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21204 parser_lex(parser);
21205
21206 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21207 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
21208
21209 parse_target_implicit_parameter(parser, node);
21210 pm_node_destroy(parser, node);
21211 return result;
21212 }
21214 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21215 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21216 parse_target_implicit_parameter(parser, node);
21217 }
21218
21220 parser_lex(parser);
21221
21222 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21223 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21224
21225 pm_node_destroy(parser, node);
21226 return result;
21227 }
21228 case PM_CALL_NODE: {
21229 pm_call_node_t *cast = (pm_call_node_t *) node;
21230
21231 // If we have a vcall (a method with no arguments and no
21232 // receiver that could have been a local variable) then we
21233 // will transform it into a local variable write.
21235 pm_location_t *message_loc = &cast->message_loc;
21236 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21237
21238 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21239 parser_lex(parser);
21240
21241 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21242 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21243
21244 pm_node_destroy(parser, (pm_node_t *) cast);
21245 return result;
21246 }
21247
21248 // Move past the token here so that we have already added
21249 // the local variable by this point.
21250 parser_lex(parser);
21251
21252 // If there is no call operator and the message is "[]" then
21253 // this is an aref expression, and we can transform it into
21254 // an aset expression.
21255 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21256 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21257 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21258 }
21259
21260 // If this node cannot be writable, then we have an error.
21261 if (pm_call_node_writable_p(parser, cast)) {
21262 parse_write_name(parser, &cast->name);
21263 } else {
21264 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21265 }
21266
21267 parse_call_operator_write(parser, cast, &token);
21268 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21269 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21270 }
21271 case PM_MULTI_WRITE_NODE: {
21272 parser_lex(parser);
21273 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21274 return node;
21275 }
21276 default:
21277 parser_lex(parser);
21278
21279 // In this case we have an ||= sign, but we don't know what it's for.
21280 // We need to treat it as an error. For now, we'll mark it as an error
21281 // and just skip right past it.
21282 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21283 return node;
21284 }
21285 }
21297 switch (PM_NODE_TYPE(node)) {
21300 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21301 /* fallthrough */
21303 parser_lex(parser);
21304
21305 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21306 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21307
21308 pm_node_destroy(parser, node);
21309 return result;
21310 }
21312 parser_lex(parser);
21313
21314 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21315 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21316
21317 pm_node_destroy(parser, node);
21318 return result;
21319 }
21320 case PM_CONSTANT_PATH_NODE: {
21321 parser_lex(parser);
21322
21323 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21324 pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21325
21326 return parse_shareable_constant_write(parser, write);
21327 }
21328 case PM_CONSTANT_READ_NODE: {
21329 parser_lex(parser);
21330
21331 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21332 pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21333
21334 pm_node_destroy(parser, node);
21335 return parse_shareable_constant_write(parser, write);
21336 }
21338 parser_lex(parser);
21339
21340 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21341 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21342
21343 pm_node_destroy(parser, node);
21344 return result;
21345 }
21347 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21348 parser_lex(parser);
21349
21350 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21351 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
21352
21353 parse_target_implicit_parameter(parser, node);
21354 pm_node_destroy(parser, node);
21355 return result;
21356 }
21358 if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
21359 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
21360 parse_target_implicit_parameter(parser, node);
21361 }
21362
21364 parser_lex(parser);
21365
21366 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21367 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21368
21369 pm_node_destroy(parser, node);
21370 return result;
21371 }
21372 case PM_CALL_NODE: {
21373 parser_lex(parser);
21374 pm_call_node_t *cast = (pm_call_node_t *) node;
21375
21376 // If we have a vcall (a method with no arguments and no
21377 // receiver that could have been a local variable) then we
21378 // will transform it into a local variable write.
21380 pm_location_t *message_loc = &cast->message_loc;
21381 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21382
21383 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21384 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21385 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21386
21387 pm_node_destroy(parser, (pm_node_t *) cast);
21388 return result;
21389 }
21390
21391 // If there is no call operator and the message is "[]" then
21392 // this is an aref expression, and we can transform it into
21393 // an aset expression.
21394 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21395 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21396 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21397 }
21398
21399 // If this node cannot be writable, then we have an error.
21400 if (pm_call_node_writable_p(parser, cast)) {
21401 parse_write_name(parser, &cast->name);
21402 } else {
21403 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21404 }
21405
21406 parse_call_operator_write(parser, cast, &token);
21407 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21408 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21409 }
21410 case PM_MULTI_WRITE_NODE: {
21411 parser_lex(parser);
21412 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21413 return node;
21414 }
21415 default:
21416 parser_lex(parser);
21417
21418 // In this case we have an operator but we don't know what it's for.
21419 // We need to treat it as an error. For now, we'll mark it as an error
21420 // and just skip right past it.
21421 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21422 return node;
21423 }
21424 }
21426 case PM_TOKEN_KEYWORD_AND: {
21427 parser_lex(parser);
21428
21429 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21430 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21431 }
21433 case PM_TOKEN_PIPE_PIPE: {
21434 parser_lex(parser);
21435
21436 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21437 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21438 }
21439 case PM_TOKEN_EQUAL_TILDE: {
21440 // Note that we _must_ parse the value before adding the local
21441 // variables in order to properly mirror the behavior of Ruby. For
21442 // example,
21443 //
21444 // /(?<foo>bar)/ =~ foo
21445 //
21446 // In this case, `foo` should be a method call and not a local yet.
21447 parser_lex(parser);
21448 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21449
21450 // By default, we're going to create a call node and then return it.
21451 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21452 pm_node_t *result = (pm_node_t *) call;
21453
21454 // If the receiver of this =~ is a regular expression node, then we
21455 // need to introduce local variables for it based on its named
21456 // capture groups.
21458 // It's possible to have an interpolated regular expression node
21459 // that only contains strings. This is because it can be split
21460 // up by a heredoc. In this case we need to concat the unescaped
21461 // strings together and then parse them as a regular expression.
21463
21464 bool interpolated = false;
21465 size_t total_length = 0;
21466
21467 pm_node_t *part;
21468 PM_NODE_LIST_FOREACH(parts, index, part) {
21469 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21470 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21471 } else {
21472 interpolated = true;
21473 break;
21474 }
21475 }
21476
21477 if (!interpolated && total_length > 0) {
21478 void *memory = xmalloc(total_length);
21479 if (!memory) abort();
21480
21481 uint8_t *cursor = memory;
21482 PM_NODE_LIST_FOREACH(parts, index, part) {
21483 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21484 size_t length = pm_string_length(unescaped);
21485
21486 memcpy(cursor, pm_string_source(unescaped), length);
21487 cursor += length;
21488 }
21489
21490 pm_string_t owned;
21491 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21492
21493 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21494 pm_string_free(&owned);
21495 }
21496 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21497 // If we have a regular expression node, then we can just parse
21498 // the named captures directly off the unescaped string.
21499 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21500 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21501 }
21502
21503 return result;
21504 }
21506 case PM_TOKEN_USTAR:
21508 // The only times this will occur are when we are in an error state,
21509 // but we'll put them in here so that errors can propagate.
21515 case PM_TOKEN_CARET:
21516 case PM_TOKEN_PIPE:
21517 case PM_TOKEN_AMPERSAND:
21519 case PM_TOKEN_LESS_LESS:
21520 case PM_TOKEN_MINUS:
21521 case PM_TOKEN_PLUS:
21522 case PM_TOKEN_PERCENT:
21523 case PM_TOKEN_SLASH:
21524 case PM_TOKEN_STAR:
21525 case PM_TOKEN_STAR_STAR: {
21526 parser_lex(parser);
21527 pm_token_t operator = parser->previous;
21528 switch (PM_NODE_TYPE(node)) {
21532 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21533 }
21534 break;
21535 }
21536 case PM_AND_NODE: {
21537 pm_and_node_t *cast = (pm_and_node_t *) node;
21539 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21540 }
21541 break;
21542 }
21543 case PM_OR_NODE: {
21544 pm_or_node_t *cast = (pm_or_node_t *) node;
21546 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21547 }
21548 break;
21549 }
21550 default:
21551 break;
21552 }
21553
21554 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21555 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21556 }
21557 case PM_TOKEN_GREATER:
21559 case PM_TOKEN_LESS:
21560 case PM_TOKEN_LESS_EQUAL: {
21561 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21562 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21563 }
21564
21565 parser_lex(parser);
21566 pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21567 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21568 }
21570 case PM_TOKEN_DOT: {
21571 parser_lex(parser);
21572 pm_token_t operator = parser->previous;
21573 pm_arguments_t arguments = { 0 };
21574
21575 // This if statement handles the foo.() syntax.
21576 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21577 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21578 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21579 }
21580
21581 switch (PM_NODE_TYPE(node)) {
21585 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21586 }
21587 break;
21588 }
21589 case PM_AND_NODE: {
21590 pm_and_node_t *cast = (pm_and_node_t *) node;
21592 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21593 }
21594 break;
21595 }
21596 case PM_OR_NODE: {
21597 pm_or_node_t *cast = (pm_or_node_t *) node;
21599 PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21600 }
21601 break;
21602 }
21603 default:
21604 break;
21605 }
21606
21607 pm_token_t message;
21608
21609 switch (parser->current.type) {
21610 case PM_CASE_OPERATOR:
21611 case PM_CASE_KEYWORD:
21612 case PM_TOKEN_CONSTANT:
21614 case PM_TOKEN_METHOD_NAME: {
21615 parser_lex(parser);
21616 message = parser->previous;
21617 break;
21618 }
21619 default: {
21620 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21621 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21622 }
21623 }
21624
21625 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21626 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21627
21628 if (
21629 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21630 arguments.arguments == NULL &&
21631 arguments.opening_loc.start == NULL &&
21632 match1(parser, PM_TOKEN_COMMA)
21633 ) {
21634 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21635 } else {
21636 return (pm_node_t *) call;
21637 }
21638 }
21639 case PM_TOKEN_DOT_DOT:
21640 case PM_TOKEN_DOT_DOT_DOT: {
21641 parser_lex(parser);
21642
21643 pm_node_t *right = NULL;
21644 if (token_begins_expression_p(parser->current.type)) {
21645 right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21646 }
21647
21648 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21649 }
21651 pm_token_t keyword = parser->current;
21652 parser_lex(parser);
21653
21654 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21655 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21656 }
21658 pm_token_t keyword = parser->current;
21659 parser_lex(parser);
21660
21661 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21662 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21663 }
21665 parser_lex(parser);
21666 pm_statements_node_t *statements = pm_statements_node_create(parser);
21667 pm_statements_node_body_append(parser, statements, node, true);
21668
21669 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21670 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21671 }
21673 parser_lex(parser);
21674 pm_statements_node_t *statements = pm_statements_node_create(parser);
21675 pm_statements_node_body_append(parser, statements, node, true);
21676
21677 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21678 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21679 }
21681 context_push(parser, PM_CONTEXT_TERNARY);
21682 pm_node_list_t current_block_exits = { 0 };
21683 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21684
21685 pm_token_t qmark = parser->current;
21686 parser_lex(parser);
21687
21688 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21689
21690 if (parser->recovering) {
21691 // If parsing the true expression of this ternary resulted in a syntax
21692 // error that we can recover from, then we're going to put missing nodes
21693 // and tokens into the remaining places. We want to be sure to do this
21694 // before the `expect` function call to make sure it doesn't
21695 // accidentally move past a ':' token that occurs after the syntax
21696 // error.
21697 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21698 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21699
21700 context_pop(parser);
21701 pop_block_exits(parser, previous_block_exits);
21702 pm_node_list_free(&current_block_exits);
21703
21704 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21705 }
21706
21707 accept1(parser, PM_TOKEN_NEWLINE);
21708 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21709
21710 pm_token_t colon = parser->previous;
21711 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21712
21713 context_pop(parser);
21714 pop_block_exits(parser, previous_block_exits);
21715 pm_node_list_free(&current_block_exits);
21716
21717 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21718 }
21719 case PM_TOKEN_COLON_COLON: {
21720 parser_lex(parser);
21721 pm_token_t delimiter = parser->previous;
21722
21723 switch (parser->current.type) {
21724 case PM_TOKEN_CONSTANT: {
21725 parser_lex(parser);
21726 pm_node_t *path;
21727
21728 if (
21729 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21730 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21731 ) {
21732 // If we have a constant immediately following a '::' operator, then
21733 // this can either be a constant path or a method call, depending on
21734 // what follows the constant.
21735 //
21736 // If we have parentheses, then this is a method call. That would
21737 // look like Foo::Bar().
21738 pm_token_t message = parser->previous;
21739 pm_arguments_t arguments = { 0 };
21740
21741 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21742 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21743 } else {
21744 // Otherwise, this is a constant path. That would look like Foo::Bar.
21745 path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21746 }
21747
21748 // If this is followed by a comma then it is a multiple assignment.
21749 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21750 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21751 }
21752
21753 return path;
21754 }
21755 case PM_CASE_OPERATOR:
21756 case PM_CASE_KEYWORD:
21758 case PM_TOKEN_METHOD_NAME: {
21759 parser_lex(parser);
21760 pm_token_t message = parser->previous;
21761
21762 // If we have an identifier following a '::' operator, then it is for
21763 // sure a method call.
21764 pm_arguments_t arguments = { 0 };
21765 parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21766 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21767
21768 // If this is followed by a comma then it is a multiple assignment.
21769 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21770 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21771 }
21772
21773 return (pm_node_t *) call;
21774 }
21776 // If we have a parenthesis following a '::' operator, then it is the
21777 // method call shorthand. That would look like Foo::(bar).
21778 pm_arguments_t arguments = { 0 };
21779 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21780
21781 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21782 }
21783 default: {
21784 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21785 return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21786 }
21787 }
21788 }
21790 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21791 parser_lex(parser);
21792 accept1(parser, PM_TOKEN_NEWLINE);
21793
21794 pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21795 context_pop(parser);
21796
21797 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
21798 }
21799 case PM_TOKEN_BRACKET_LEFT: {
21800 parser_lex(parser);
21801
21802 pm_arguments_t arguments = { 0 };
21803 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21804
21805 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21806 pm_accepts_block_stack_push(parser, true);
21807 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21808 pm_accepts_block_stack_pop(parser);
21809 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21810 }
21811
21812 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21813
21814 // If we have a comma after the closing bracket then this is a multiple
21815 // assignment and we should parse the targets.
21816 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21817 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21818 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21819 }
21820
21821 // If we're at the end of the arguments, we can now check if there is a
21822 // block node that starts with a {. If there is, then we can parse it and
21823 // add it to the arguments.
21824 pm_block_node_t *block = NULL;
21825 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21826 block = parse_block(parser, (uint16_t) (depth + 1));
21827 pm_arguments_validate_block(parser, &arguments, block);
21828 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21829 block = parse_block(parser, (uint16_t) (depth + 1));
21830 }
21831
21832 if (block != NULL) {
21833 if (arguments.block != NULL) {
21834 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
21835 if (arguments.arguments == NULL) {
21836 arguments.arguments = pm_arguments_node_create(parser);
21837 }
21838 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21839 }
21840
21841 arguments.block = (pm_node_t *) block;
21842 }
21843
21844 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
21845 }
21846 case PM_TOKEN_KEYWORD_IN: {
21847 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21848 parser->pattern_matching_newlines = true;
21849
21850 pm_token_t operator = parser->current;
21851 parser->command_start = false;
21852 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21853 parser_lex(parser);
21854
21855 pm_constant_id_list_t captures = { 0 };
21856 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21857
21858 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21859 pm_constant_id_list_free(&captures);
21860
21861 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
21862 }
21864 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21865 parser->pattern_matching_newlines = true;
21866
21867 pm_token_t operator = parser->current;
21868 parser->command_start = false;
21869 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21870 parser_lex(parser);
21871
21872 pm_constant_id_list_t captures = { 0 };
21873 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21874
21875 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21876 pm_constant_id_list_free(&captures);
21877
21878 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
21879 }
21880 default:
21881 assert(false && "unreachable");
21882 return NULL;
21883 }
21884}
21885
21886#undef PM_PARSE_PATTERN_SINGLE
21887#undef PM_PARSE_PATTERN_TOP
21888#undef PM_PARSE_PATTERN_MULTI
21889
21894static inline bool
21895pm_call_node_command_p(const pm_call_node_t *node) {
21896 return (
21897 (node->opening_loc.start == NULL) &&
21898 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21899 (node->arguments != NULL || node->block != NULL)
21900 );
21901}
21902
21911static pm_node_t *
21912parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21913 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21914 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21915 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
21916 }
21917
21918 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21919
21920 switch (PM_NODE_TYPE(node)) {
21921 case PM_MISSING_NODE:
21922 // If we found a syntax error, then the type of node returned by
21923 // parse_expression_prefix is going to be a missing node.
21924 return node;
21930 case PM_UNDEF_NODE:
21931 // These expressions are statements, and cannot be followed by
21932 // operators (except modifiers).
21933 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21934 return node;
21935 }
21936 break;
21937 case PM_CALL_NODE:
21938 // If we have a call node, then we need to check if it looks like a
21939 // method call without parentheses that contains arguments. If it
21940 // does, then it has different rules for parsing infix operators,
21941 // namely that it only accepts composition (and/or) and modifiers
21942 // (if/unless/etc.).
21943 if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21944 return node;
21945 }
21946 break;
21947 case PM_SYMBOL_NODE:
21948 // If we have a symbol node that is being parsed as a label, then we
21949 // need to immediately return, because there should never be an
21950 // infix operator following this node.
21951 if (pm_symbol_node_label_p(node)) {
21952 return node;
21953 }
21954 default:
21955 break;
21956 }
21957
21958 // Otherwise we'll look and see if the next token can be parsed as an infix
21959 // operator. If it can, then we'll parse it using parse_expression_infix.
21960 pm_binding_powers_t current_binding_powers;
21961 pm_token_type_t current_token_type;
21962
21963 while (
21964 current_token_type = parser->current.type,
21965 current_binding_powers = pm_binding_powers[current_token_type],
21966 binding_power <= current_binding_powers.left &&
21967 current_binding_powers.binary
21968 ) {
21969 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21970
21971 switch (PM_NODE_TYPE(node)) {
21973 // Multi-write nodes are statements, and cannot be followed by
21974 // operators except modifiers.
21975 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21976 return node;
21977 }
21978 break;
21985 // These expressions are statements, by virtue of the right-hand
21986 // side of their write being an implicit array.
21987 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21988 return node;
21989 }
21990 break;
21991 case PM_CALL_NODE:
21992 // These expressions are also statements, by virtue of the
21993 // right-hand side of the expression (i.e., the last argument to
21994 // the call node) being an implicit array.
21995 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21996 return node;
21997 }
21998 break;
21999 default:
22000 break;
22001 }
22002
22003 // If the operator is nonassoc and we should not be able to parse the
22004 // upcoming infix operator, break.
22005 if (current_binding_powers.nonassoc) {
22006 // If this is a non-assoc operator and we are about to parse the
22007 // exact same operator, then we need to add an error.
22008 if (match1(parser, current_token_type)) {
22009 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22010 break;
22011 }
22012
22013 // If this is an endless range, then we need to reject a couple of
22014 // additional operators because it violates the normal operator
22015 // precedence rules. Those patterns are:
22016 //
22017 // 1.. & 2
22018 // 1.. * 2
22019 //
22020 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22022 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
22023 break;
22024 }
22025
22026 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22027 break;
22028 }
22029 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22030 break;
22031 }
22032 }
22033
22034 if (accepts_command_call) {
22035 // A command-style method call is only accepted on method chains.
22036 // Thus, we check whether the parsed node can continue method chains.
22037 // The method chain can continue if the parsed node is one of the following five kinds:
22038 // (1) index access: foo[1]
22039 // (2) attribute access: foo.bar
22040 // (3) method call with parenthesis: foo.bar(1)
22041 // (4) method call with a block: foo.bar do end
22042 // (5) constant path: foo::Bar
22043 switch (node->type) {
22044 case PM_CALL_NODE: {
22045 pm_call_node_t *cast = (pm_call_node_t *)node;
22046 if (
22047 // (1) foo[1]
22048 !(
22049 cast->call_operator_loc.start == NULL &&
22050 cast->message_loc.start != NULL &&
22051 cast->message_loc.start[0] == '[' &&
22052 cast->message_loc.end[-1] == ']'
22053 ) &&
22054 // (2) foo.bar
22055 !(
22056 cast->call_operator_loc.start != NULL &&
22057 cast->arguments == NULL &&
22058 cast->block == NULL &&
22059 cast->opening_loc.start == NULL
22060 ) &&
22061 // (3) foo.bar(1)
22062 !(
22063 cast->call_operator_loc.start != NULL &&
22064 cast->opening_loc.start != NULL
22065 ) &&
22066 // (4) foo.bar do end
22067 !(
22068 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22069 )
22070 ) {
22071 accepts_command_call = false;
22072 }
22073 break;
22074 }
22075 // (5) foo::Bar
22077 break;
22078 default:
22079 accepts_command_call = false;
22080 break;
22081 }
22082 }
22083 }
22084
22085 return node;
22086}
22087
22092static pm_statements_node_t *
22093wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22094 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22095 if (statements == NULL) {
22096 statements = pm_statements_node_create(parser);
22097 }
22098
22099 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22100 pm_arguments_node_arguments_append(
22101 arguments,
22102 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22103 );
22104
22105 pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22106 parser,
22107 arguments,
22108 pm_parser_constant_id_constant(parser, "print", 5)
22109 ), true);
22110 }
22111
22112 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22113 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22114 if (statements == NULL) {
22115 statements = pm_statements_node_create(parser);
22116 }
22117
22118 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22119 pm_arguments_node_arguments_append(
22120 arguments,
22121 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22122 );
22123
22124 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22125 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22126
22127 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22128 parser,
22129 pm_parser_constant_id_constant(parser, "$F", 2),
22130 (pm_node_t *) call
22131 );
22132
22133 pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22134 }
22135
22136 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22137 pm_arguments_node_arguments_append(
22138 arguments,
22139 (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22140 );
22141
22142 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22143 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22144 pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22145 parser,
22146 (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22147 &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22148 (pm_node_t *) pm_true_node_synthesized_create(parser)
22149 ));
22150
22151 pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22152 pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22153 }
22154
22155 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22156 pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22157 parser,
22158 (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22159 statements
22160 ), true);
22161
22162 statements = wrapped_statements;
22163 }
22164
22165 return statements;
22166}
22167
22171static pm_node_t *
22172parse_program(pm_parser_t *parser) {
22173 // If the current scope is NULL, then we want to push a new top level scope.
22174 // The current scope could exist in the event that we are parsing an eval
22175 // and the user has passed into scopes that already exist.
22176 if (parser->current_scope == NULL) {
22177 pm_parser_scope_push(parser, true);
22178 }
22179
22180 pm_node_list_t current_block_exits = { 0 };
22181 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22182
22183 parser_lex(parser);
22184 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22185
22186 if (statements != NULL && !parser->parsing_eval) {
22187 // If we have statements, then the top-level statement should be
22188 // explicitly checked as well. We have to do this here because
22189 // everywhere else we check all but the last statement.
22190 assert(statements->body.size > 0);
22191 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22192 }
22193
22194 pm_constant_id_list_t locals;
22195 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22196 pm_parser_scope_pop(parser);
22197
22198 // At the top level, see if we need to wrap the statements in a program
22199 // node with a while loop based on the options.
22201 statements = wrap_statements(parser, statements);
22202 } else {
22203 flush_block_exits(parser, previous_block_exits);
22204 pm_node_list_free(&current_block_exits);
22205 }
22206
22207 // If this is an empty file, then we're still going to parse all of the
22208 // statements in order to gather up all of the comments and such. Here we'll
22209 // correct the location information.
22210 if (statements == NULL) {
22211 statements = pm_statements_node_create(parser);
22212 pm_statements_node_location_set(statements, parser->start, parser->start);
22213 }
22214
22215 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22216}
22217
22218/******************************************************************************/
22219/* External functions */
22220/******************************************************************************/
22221
22231static const char *
22232pm_strnstr(const char *big, const char *little, size_t big_length) {
22233 size_t little_length = strlen(little);
22234
22235 for (const char *big_end = big + big_length; big < big_end; big++) {
22236 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22237 }
22238
22239 return NULL;
22240}
22241
22242#ifdef _WIN32
22243#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22244#else
22250static void
22251pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22252 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22253 pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22254 }
22255}
22256#endif
22257
22262static void
22263pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22264 const char *switches = pm_strnstr(engine, " -", length);
22265 if (switches == NULL) return;
22266
22267 pm_options_t next_options = *options;
22268 options->shebang_callback(
22269 &next_options,
22270 (const uint8_t *) (switches + 1),
22271 length - ((size_t) (switches - engine)) - 1,
22272 options->shebang_callback_data
22273 );
22274
22275 size_t encoding_length;
22276 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22277 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22278 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22279 }
22280
22281 parser->command_line = next_options.command_line;
22282 parser->frozen_string_literal = next_options.frozen_string_literal;
22283}
22284
22289pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22290 assert(source != NULL);
22291
22292 *parser = (pm_parser_t) {
22293 .node_id = 0,
22294 .lex_state = PM_LEX_STATE_BEG,
22295 .enclosure_nesting = 0,
22296 .lambda_enclosure_nesting = -1,
22297 .brace_nesting = 0,
22298 .do_loop_stack = 0,
22299 .accepts_block_stack = 0,
22300 .lex_modes = {
22301 .index = 0,
22302 .stack = {{ .mode = PM_LEX_DEFAULT }},
22303 .current = &parser->lex_modes.stack[0],
22304 },
22305 .start = source,
22306 .end = source + size,
22307 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22308 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22309 .next_start = NULL,
22310 .heredoc_end = NULL,
22311 .data_loc = { .start = NULL, .end = NULL },
22312 .comment_list = { 0 },
22313 .magic_comment_list = { 0 },
22314 .warning_list = { 0 },
22315 .error_list = { 0 },
22316 .current_scope = NULL,
22317 .current_context = NULL,
22318 .encoding = PM_ENCODING_UTF_8_ENTRY,
22319 .encoding_changed_callback = NULL,
22320 .encoding_comment_start = source,
22321 .lex_callback = NULL,
22322 .filepath = { 0 },
22323 .constant_pool = { 0 },
22324 .newline_list = { 0 },
22325 .integer_base = 0,
22326 .current_string = PM_STRING_EMPTY,
22327 .start_line = 1,
22328 .explicit_encoding = NULL,
22329 .command_line = 0,
22330 .parsing_eval = false,
22331 .partial_script = false,
22332 .command_start = true,
22333 .recovering = false,
22334 .encoding_locked = false,
22335 .encoding_changed = false,
22336 .pattern_matching_newlines = false,
22337 .in_keyword_arg = false,
22338 .current_block_exits = NULL,
22339 .semantic_token_seen = false,
22340 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22341 .current_regular_expression_ascii_only = false,
22342 .warn_mismatched_indentation = true
22343 };
22344
22345 // Initialize the constant pool. We're going to completely guess as to the
22346 // number of constants that we'll need based on the size of the input. The
22347 // ratio we chose here is actually less arbitrary than you might think.
22348 //
22349 // We took ~50K Ruby files and measured the size of the file versus the
22350 // number of constants that were found in those files. Then we found the
22351 // average and standard deviation of the ratios of constants/bytesize. Then
22352 // we added 1.34 standard deviations to the average to get a ratio that
22353 // would fit 75% of the files (for a two-tailed distribution). This works
22354 // because there was about a 0.77 correlation and the distribution was
22355 // roughly normal.
22356 //
22357 // This ratio will need to change if we add more constants to the constant
22358 // pool for another node type.
22359 uint32_t constant_size = ((uint32_t) size) / 95;
22360 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22361
22362 // Initialize the newline list. Similar to the constant pool, we're going to
22363 // guess at the number of newlines that we'll need based on the size of the
22364 // input.
22365 size_t newline_size = size / 22;
22366 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22367
22368 // If options were provided to this parse, establish them here.
22369 if (options != NULL) {
22370 // filepath option
22371 parser->filepath = options->filepath;
22372
22373 // line option
22374 parser->start_line = options->line;
22375
22376 // encoding option
22377 size_t encoding_length = pm_string_length(&options->encoding);
22378 if (encoding_length > 0) {
22379 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22380 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22381 }
22382
22383 // encoding_locked option
22384 parser->encoding_locked = options->encoding_locked;
22385
22386 // frozen_string_literal option
22388
22389 // command_line option
22390 parser->command_line = options->command_line;
22391
22392 // version option
22393 parser->version = options->version;
22394
22395 // partial_script
22396 parser->partial_script = options->partial_script;
22397
22398 // scopes option
22399 parser->parsing_eval = options->scopes_count > 0;
22400 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22401
22402 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22403 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22404 pm_parser_scope_push(parser, scope_index == 0);
22405
22406 // Scopes given from the outside are not allowed to have numbered
22407 // parameters.
22408 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22409
22410 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22411 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22412
22413 const uint8_t *source = pm_string_source(local);
22414 size_t length = pm_string_length(local);
22415
22416 void *allocated = xmalloc(length);
22417 if (allocated == NULL) continue;
22418
22419 memcpy(allocated, source, length);
22420 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22421 }
22422 }
22423 }
22424
22425 pm_accepts_block_stack_push(parser, true);
22426
22427 // Skip past the UTF-8 BOM if it exists.
22428 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22429 parser->current.end += 3;
22430 parser->encoding_comment_start += 3;
22431
22432 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22434 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22435 }
22436 }
22437
22438 // If the -x command line flag is set, or the first shebang of the file does
22439 // not include "ruby", then we'll search for a shebang that does include
22440 // "ruby" and start parsing from there.
22441 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22442
22443 // If the first two bytes of the source are a shebang, then we will do a bit
22444 // of extra processing.
22445 //
22446 // First, we'll indicate that the encoding comment is at the end of the
22447 // shebang. This means that when a shebang is present the encoding comment
22448 // can begin on the second line.
22449 //
22450 // Second, we will check if the shebang includes "ruby". If it does, then we
22451 // we will start parsing from there. We will also potentially warning the
22452 // user if there is a carriage return at the end of the shebang. We will
22453 // also potentially call the shebang callback if this is the main script to
22454 // allow the caller to parse the shebang and find any command-line options.
22455 // If the shebang does not include "ruby" and this is the main script being
22456 // parsed, then we will start searching the file for a shebang that does
22457 // contain "ruby" as if -x were passed on the command line.
22458 const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22459 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22460
22461 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22462 const char *engine;
22463
22464 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22465 if (newline != NULL) {
22466 parser->encoding_comment_start = newline + 1;
22467
22468 if (options == NULL || options->main_script) {
22469 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22470 }
22471 }
22472
22473 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22474 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22475 }
22476
22477 search_shebang = false;
22478 } else if (options->main_script && !parser->parsing_eval) {
22479 search_shebang = true;
22480 }
22481 }
22482
22483 // Here we're going to find the first shebang that includes "ruby" and start
22484 // parsing from there.
22485 if (search_shebang) {
22486 // If a shebang that includes "ruby" is not found, then we're going to a
22487 // a load error to the list of errors on the parser.
22488 bool found_shebang = false;
22489
22490 // This is going to point to the start of each line as we check it.
22491 // We'll maintain a moving window looking at each line at they come.
22492 const uint8_t *cursor = parser->start;
22493
22494 // The newline pointer points to the end of the current line that we're
22495 // considering. If it is NULL, then we're at the end of the file.
22496 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22497
22498 while (newline != NULL) {
22499 pm_newline_list_append(&parser->newline_list, newline);
22500
22501 cursor = newline + 1;
22502 newline = next_newline(cursor, parser->end - cursor);
22503
22504 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22505 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22506 const char *engine;
22507 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22508 found_shebang = true;
22509
22510 if (newline != NULL) {
22511 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22512 parser->encoding_comment_start = newline + 1;
22513 }
22514
22515 if (options != NULL && options->shebang_callback != NULL) {
22516 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22517 }
22518
22519 break;
22520 }
22521 }
22522 }
22523
22524 if (found_shebang) {
22525 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22526 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22527 } else {
22528 pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22529 pm_newline_list_clear(&parser->newline_list);
22530 }
22531 }
22532
22533 // The encoding comment can start after any amount of inline whitespace, so
22534 // here we'll advance it to the first non-inline-whitespace character so
22535 // that it is ready for future comparisons.
22536 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22537}
22538
22544pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
22545 parser->encoding_changed_callback = callback;
22546}
22547
22551static inline void
22552pm_comment_list_free(pm_list_t *list) {
22553 pm_list_node_t *node, *next;
22554
22555 for (node = list->head; node != NULL; node = next) {
22556 next = node->next;
22557
22558 pm_comment_t *comment = (pm_comment_t *) node;
22559 xfree(comment);
22560 }
22561}
22562
22566static inline void
22567pm_magic_comment_list_free(pm_list_t *list) {
22568 pm_list_node_t *node, *next;
22569
22570 for (node = list->head; node != NULL; node = next) {
22571 next = node->next;
22572
22575 }
22576}
22577
22582pm_parser_free(pm_parser_t *parser) {
22583 pm_string_free(&parser->filepath);
22584 pm_diagnostic_list_free(&parser->error_list);
22585 pm_diagnostic_list_free(&parser->warning_list);
22586 pm_comment_list_free(&parser->comment_list);
22587 pm_magic_comment_list_free(&parser->magic_comment_list);
22588 pm_constant_pool_free(&parser->constant_pool);
22589 pm_newline_list_free(&parser->newline_list);
22590
22591 while (parser->current_scope != NULL) {
22592 // Normally, popping the scope doesn't free the locals since it is
22593 // assumed that ownership has transferred to the AST. However if we have
22594 // scopes while we're freeing the parser, it's likely they came from
22595 // eval scopes and we need to free them explicitly here.
22596 pm_parser_scope_pop(parser);
22597 }
22598
22599 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22600 lex_mode_pop(parser);
22601 }
22602}
22603
22608pm_parse(pm_parser_t *parser) {
22609 return parse_program(parser);
22610}
22611
22617static bool
22618pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) {
22619#define LINE_SIZE 4096
22620 char line[LINE_SIZE];
22621
22622 while (memset(line, '\n', LINE_SIZE), fgets(line, LINE_SIZE, stream) != NULL) {
22623 size_t length = LINE_SIZE;
22624 while (length > 0 && line[length - 1] == '\n') length--;
22625
22626 if (length == LINE_SIZE) {
22627 // If we read a line that is the maximum size and it doesn't end
22628 // with a newline, then we'll just append it to the buffer and
22629 // continue reading.
22630 length--;
22631 pm_buffer_append_string(buffer, line, length);
22632 continue;
22633 }
22634
22635 // Append the line to the buffer.
22636 length--;
22637 pm_buffer_append_string(buffer, line, length);
22638
22639 // Check if the line matches the __END__ marker. If it does, then stop
22640 // reading and return false. In most circumstances, this means we should
22641 // stop reading from the stream so that the DATA constant can pick it
22642 // up.
22643 switch (length) {
22644 case 7:
22645 if (strncmp(line, "__END__", 7) == 0) return false;
22646 break;
22647 case 8:
22648 if (strncmp(line, "__END__\n", 8) == 0) return false;
22649 break;
22650 case 9:
22651 if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22652 break;
22653 }
22654 }
22655
22656 return true;
22657#undef LINE_SIZE
22658}
22659
22669static bool
22670pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22671 pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22672
22673 for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22674 if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22675 return true;
22676 }
22677 }
22678
22679 return false;
22680}
22681
22689pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) {
22690 pm_buffer_init(buffer);
22691
22692 bool eof = pm_parse_stream_read(buffer, stream, fgets);
22693 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22694 pm_node_t *node = pm_parse(parser);
22695
22696 while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22697 pm_node_destroy(parser, node);
22698 eof = pm_parse_stream_read(buffer, stream, fgets);
22699
22700 pm_parser_free(parser);
22701 pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22702 node = pm_parse(parser);
22703 }
22704
22705 return node;
22706}
22707
22712pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22713 pm_options_t options = { 0 };
22714 pm_options_read(&options, data);
22715
22716 pm_parser_t parser;
22717 pm_parser_init(&parser, source, size, &options);
22718
22719 pm_node_t *node = pm_parse(&parser);
22720 pm_node_destroy(&parser, node);
22721
22722 bool result = parser.error_list.size == 0;
22723 pm_parser_free(&parser);
22724 pm_options_free(&options);
22725
22726 return result;
22727}
22728
22729#undef PM_CASE_KEYWORD
22730#undef PM_CASE_OPERATOR
22731#undef PM_CASE_WRITABLE
22732#undef PM_STRING_EMPTY
22733#undef PM_LOCATION_NODE_BASE_VALUE
22734#undef PM_LOCATION_NODE_VALUE
22735#undef PM_LOCATION_NULL_VALUE
22736#undef PM_LOCATION_TOKEN_VALUE
22737
22738// We optionally support serializing to a binary string. For systems that don't
22739// want or need this functionality, it can be turned off with the
22740// PRISM_EXCLUDE_SERIALIZATION define.
22741#ifndef PRISM_EXCLUDE_SERIALIZATION
22742
22743static inline void
22744pm_serialize_header(pm_buffer_t *buffer) {
22745 pm_buffer_append_string(buffer, "PRISM", 5);
22746 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22747 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22748 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22749 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22750}
22751
22756pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22757 pm_serialize_header(buffer);
22758 pm_serialize_content(parser, node, buffer);
22759 pm_buffer_append_byte(buffer, '\0');
22760}
22761
22767pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22768 pm_options_t options = { 0 };
22769 pm_options_read(&options, data);
22770
22771 pm_parser_t parser;
22772 pm_parser_init(&parser, source, size, &options);
22773
22774 pm_node_t *node = pm_parse(&parser);
22775
22776 pm_serialize_header(buffer);
22777 pm_serialize_content(&parser, node, buffer);
22778 pm_buffer_append_byte(buffer, '\0');
22779
22780 pm_node_destroy(&parser, node);
22781 pm_parser_free(&parser);
22782 pm_options_free(&options);
22783}
22784
22790pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) {
22791 pm_parser_t parser;
22792 pm_options_t options = { 0 };
22793 pm_options_read(&options, data);
22794
22795 pm_buffer_t parser_buffer;
22796 pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options);
22797 pm_serialize_header(buffer);
22798 pm_serialize_content(&parser, node, buffer);
22799 pm_buffer_append_byte(buffer, '\0');
22800
22801 pm_node_destroy(&parser, node);
22802 pm_buffer_free(&parser_buffer);
22803 pm_parser_free(&parser);
22804 pm_options_free(&options);
22805}
22806
22811pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22812 pm_options_t options = { 0 };
22813 pm_options_read(&options, data);
22814
22815 pm_parser_t parser;
22816 pm_parser_init(&parser, source, size, &options);
22817
22818 pm_node_t *node = pm_parse(&parser);
22819 pm_serialize_header(buffer);
22820 pm_serialize_encoding(parser.encoding, buffer);
22821 pm_buffer_append_varsint(buffer, parser.start_line);
22822 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22823
22824 pm_node_destroy(&parser, node);
22825 pm_parser_free(&parser);
22826 pm_options_free(&options);
22827}
22828
22829#endif
22830
22831/******************************************************************************/
22832/* Slice queries for the Ruby API */
22833/******************************************************************************/
22834
22836typedef enum {
22838 PM_SLICE_TYPE_ERROR = -1,
22839
22841 PM_SLICE_TYPE_NONE,
22842
22844 PM_SLICE_TYPE_LOCAL,
22845
22847 PM_SLICE_TYPE_CONSTANT,
22848
22850 PM_SLICE_TYPE_METHOD_NAME
22851} pm_slice_type_t;
22852
22856pm_slice_type_t
22857pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22858 // first, get the right encoding object
22859 const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22860 if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22861
22862 // check that there is at least one character
22863 if (length == 0) return PM_SLICE_TYPE_NONE;
22864
22865 size_t width;
22866 if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22867 // valid because alphabetical
22868 } else if (*source == '_') {
22869 // valid because underscore
22870 width = 1;
22871 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22872 // valid because multibyte
22873 } else {
22874 // invalid because no match
22875 return PM_SLICE_TYPE_NONE;
22876 }
22877
22878 // determine the type of the slice based on the first character
22879 const uint8_t *end = source + length;
22880 pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22881
22882 // next, iterate through all of the bytes of the string to ensure that they
22883 // are all valid identifier characters
22884 source += width;
22885
22886 while (source < end) {
22887 if ((width = encoding->alnum_char(source, end - source)) != 0) {
22888 // valid because alphanumeric
22889 source += width;
22890 } else if (*source == '_') {
22891 // valid because underscore
22892 source++;
22893 } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22894 // valid because multibyte
22895 source += width;
22896 } else {
22897 // invalid because no match
22898 break;
22899 }
22900 }
22901
22902 // accept a ! or ? at the end of the slice as a method name
22903 if (*source == '!' || *source == '?' || *source == '=') {
22904 source++;
22905 result = PM_SLICE_TYPE_METHOD_NAME;
22906 }
22907
22908 // valid if we are at the end of the slice
22909 return source == end ? result : PM_SLICE_TYPE_NONE;
22910}
22911
22916pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22917 switch (pm_slice_type(source, length, encoding_name)) {
22918 case PM_SLICE_TYPE_ERROR:
22919 return PM_STRING_QUERY_ERROR;
22920 case PM_SLICE_TYPE_NONE:
22921 case PM_SLICE_TYPE_CONSTANT:
22922 case PM_SLICE_TYPE_METHOD_NAME:
22923 return PM_STRING_QUERY_FALSE;
22924 case PM_SLICE_TYPE_LOCAL:
22925 return PM_STRING_QUERY_TRUE;
22926 }
22927
22928 assert(false && "unreachable");
22929 return PM_STRING_QUERY_FALSE;
22930}
22931
22936pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22937 switch (pm_slice_type(source, length, encoding_name)) {
22938 case PM_SLICE_TYPE_ERROR:
22939 return PM_STRING_QUERY_ERROR;
22940 case PM_SLICE_TYPE_NONE:
22941 case PM_SLICE_TYPE_LOCAL:
22942 case PM_SLICE_TYPE_METHOD_NAME:
22943 return PM_STRING_QUERY_FALSE;
22944 case PM_SLICE_TYPE_CONSTANT:
22945 return PM_STRING_QUERY_TRUE;
22946 }
22947
22948 assert(false && "unreachable");
22949 return PM_STRING_QUERY_FALSE;
22950}
22951
22956pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22957#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22958#define C1(c) (*source == c)
22959#define C2(s) (memcmp(source, s, 2) == 0)
22960#define C3(s) (memcmp(source, s, 3) == 0)
22961
22962 switch (pm_slice_type(source, length, encoding_name)) {
22963 case PM_SLICE_TYPE_ERROR:
22964 return PM_STRING_QUERY_ERROR;
22965 case PM_SLICE_TYPE_NONE:
22966 break;
22967 case PM_SLICE_TYPE_LOCAL:
22968 // numbered parameters are not valid method names
22969 return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22970 case PM_SLICE_TYPE_CONSTANT:
22971 // all constants are valid method names
22972 case PM_SLICE_TYPE_METHOD_NAME:
22973 // all method names are valid method names
22974 return PM_STRING_QUERY_TRUE;
22975 }
22976
22977 switch (length) {
22978 case 1:
22979 return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22980 case 2:
22981 return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22982 case 3:
22983 return B(C3("===") || C3("<=>") || C3("[]="));
22984 default:
22985 return PM_STRING_QUERY_FALSE;
22986 }
22987
22988#undef B
22989#undef C1
22990#undef C2
22991#undef C3
22992}
struct pm_block_parameter_node pm_block_parameter_node_t
BlockParameterNode.
struct pm_else_node pm_else_node_t
ElseNode.
struct pm_assoc_node pm_assoc_node_t
AssocNode.
struct pm_undef_node pm_undef_node_t
UndefNode.
struct pm_local_variable_target_node pm_local_variable_target_node_t
LocalVariableTargetNode.
struct pm_block_node pm_block_node_t
BlockNode.
struct pm_hash_pattern_node pm_hash_pattern_node_t
HashPatternNode.
struct pm_optional_parameter_node pm_optional_parameter_node_t
OptionalParameterNode.
struct pm_x_string_node pm_x_string_node_t
XStringNode.
struct pm_class_variable_write_node pm_class_variable_write_node_t
ClassVariableWriteNode.
struct pm_interpolated_string_node pm_interpolated_string_node_t
InterpolatedStringNode.
struct pm_call_node pm_call_node_t
CallNode.
struct pm_class_variable_read_node pm_class_variable_read_node_t
ClassVariableReadNode.
@ PM_RANGE_FLAGS_EXCLUDE_END
... operator
Definition ast.h:7854
struct pm_local_variable_read_node pm_local_variable_read_node_t
LocalVariableReadNode.
struct pm_arguments_node pm_arguments_node_t
ArgumentsNode.
@ PM_DEFINED_NODE
DefinedNode.
Definition ast.h:709
@ PM_PRE_EXECUTION_NODE
PreExecutionNode.
Definition ast.h:931
@ PM_RETRY_NODE
RetryNode.
Definition ast.h:964
@ PM_REDO_NODE
RedoNode.
Definition ast.h:943
@ PM_CONSTANT_PATH_WRITE_NODE
ConstantPathWriteNode.
Definition ast.h:694
@ PM_SOURCE_LINE_NODE
SourceLineNode.
Definition ast.h:985
@ PM_UNLESS_NODE
UnlessNode.
Definition ast.h:1009
@ PM_CALL_NODE
CallNode.
Definition ast.h:628
@ PM_NIL_NODE
NilNode.
Definition ast.h:895
@ PM_GLOBAL_VARIABLE_READ_NODE
GlobalVariableReadNode.
Definition ast.h:757
@ PM_RATIONAL_NODE
RationalNode.
Definition ast.h:940
@ PM_FIND_PATTERN_NODE
FindPatternNode.
Definition ast.h:727
@ PM_ARRAY_NODE
ArrayNode.
Definition ast.h:589
@ PM_CONSTANT_PATH_TARGET_NODE
ConstantPathTargetNode.
Definition ast.h:691
@ PM_OR_NODE
OrNode.
Definition ast.h:913
@ PM_MULTI_WRITE_NODE
MultiWriteNode.
Definition ast.h:889
@ PM_IF_NODE
IfNode.
Definition ast.h:772
@ PM_INTERPOLATED_STRING_NODE
InterpolatedStringNode.
Definition ast.h:826
@ PM_FALSE_NODE
FalseNode.
Definition ast.h:724
@ PM_HASH_NODE
HashNode.
Definition ast.h:766
@ PM_MATCH_PREDICATE_NODE
MatchPredicateNode.
Definition ast.h:871
@ PM_X_STRING_NODE
XStringNode.
Definition ast.h:1021
@ PM_GLOBAL_VARIABLE_TARGET_NODE
GlobalVariableTargetNode.
Definition ast.h:760
@ PM_AND_NODE
AndNode.
Definition ast.h:583
@ PM_CONSTANT_TARGET_NODE
ConstantTargetNode.
Definition ast.h:700
@ PM_IT_LOCAL_VARIABLE_READ_NODE
ItLocalVariableReadNode.
Definition ast.h:835
@ PM_SOURCE_FILE_NODE
SourceFileNode.
Definition ast.h:982
@ PM_NO_KEYWORDS_PARAMETER_NODE
NoKeywordsParameterNode.
Definition ast.h:898
@ PM_MULTI_TARGET_NODE
MultiTargetNode.
Definition ast.h:886
@ PM_SPLAT_NODE
SplatNode.
Definition ast.h:988
@ PM_CLASS_VARIABLE_READ_NODE
ClassVariableReadNode.
Definition ast.h:661
@ PM_ELSE_NODE
ElseNode.
Definition ast.h:712
@ PM_INTERPOLATED_MATCH_LAST_LINE_NODE
InterpolatedMatchLastLineNode.
Definition ast.h:820
@ PM_SYMBOL_NODE
SymbolNode.
Definition ast.h:1000
@ PM_RESCUE_MODIFIER_NODE
RescueModifierNode.
Definition ast.h:955
@ PM_ALIAS_METHOD_NODE
AliasMethodNode.
Definition ast.h:577
@ PM_MATCH_REQUIRED_NODE
MatchRequiredNode.
Definition ast.h:874
@ PM_BACK_REFERENCE_READ_NODE
BackReferenceReadNode.
Definition ast.h:601
@ PM_BLOCK_ARGUMENT_NODE
BlockArgumentNode.
Definition ast.h:607
@ PM_MISSING_NODE
MissingNode.
Definition ast.h:880
@ PM_SELF_NODE
SelfNode.
Definition ast.h:970
@ PM_TRUE_NODE
TrueNode.
Definition ast.h:1003
@ PM_ASSOC_SPLAT_NODE
AssocSplatNode.
Definition ast.h:598
@ PM_RANGE_NODE
RangeNode.
Definition ast.h:937
@ PM_LOCAL_VARIABLE_READ_NODE
LocalVariableReadNode.
Definition ast.h:859
@ PM_NEXT_NODE
NextNode.
Definition ast.h:892
@ PM_REGULAR_EXPRESSION_NODE
RegularExpressionNode.
Definition ast.h:946
@ PM_CONSTANT_WRITE_NODE
ConstantWriteNode.
Definition ast.h:703
@ PM_HASH_PATTERN_NODE
HashPatternNode.
Definition ast.h:769
@ PM_UNDEF_NODE
UndefNode.
Definition ast.h:1006
@ PM_ENSURE_NODE
EnsureNode.
Definition ast.h:721
@ PM_LOCAL_VARIABLE_WRITE_NODE
LocalVariableWriteNode.
Definition ast.h:865
@ PM_KEYWORD_HASH_NODE
KeywordHashNode.
Definition ast.h:841
@ PM_PARENTHESES_NODE
ParenthesesNode.
Definition ast.h:919
@ PM_CLASS_VARIABLE_WRITE_NODE
ClassVariableWriteNode.
Definition ast.h:667
@ PM_POST_EXECUTION_NODE
PostExecutionNode.
Definition ast.h:928
@ PM_RETURN_NODE
ReturnNode.
Definition ast.h:967
@ PM_ARRAY_PATTERN_NODE
ArrayPatternNode.
Definition ast.h:592
@ PM_MATCH_LAST_LINE_NODE
MatchLastLineNode.
Definition ast.h:868
@ PM_CONSTANT_PATH_NODE
ConstantPathNode.
Definition ast.h:682
@ PM_INTERPOLATED_SYMBOL_NODE
InterpolatedSymbolNode.
Definition ast.h:829
@ PM_CLASS_VARIABLE_TARGET_NODE
ClassVariableTargetNode.
Definition ast.h:664
@ PM_BREAK_NODE
BreakNode.
Definition ast.h:622
@ PM_IMAGINARY_NODE
ImaginaryNode.
Definition ast.h:775
@ PM_CONSTANT_READ_NODE
ConstantReadNode.
Definition ast.h:697
@ PM_GLOBAL_VARIABLE_WRITE_NODE
GlobalVariableWriteNode.
Definition ast.h:763
@ PM_SOURCE_ENCODING_NODE
SourceEncodingNode.
Definition ast.h:979
@ PM_BEGIN_NODE
BeginNode.
Definition ast.h:604
@ PM_INSTANCE_VARIABLE_READ_NODE
InstanceVariableReadNode.
Definition ast.h:808
@ PM_FLIP_FLOP_NODE
FlipFlopNode.
Definition ast.h:730
@ PM_INSTANCE_VARIABLE_WRITE_NODE
InstanceVariableWriteNode.
Definition ast.h:814
@ PM_INSTANCE_VARIABLE_TARGET_NODE
InstanceVariableTargetNode.
Definition ast.h:811
@ PM_CASE_NODE
CaseNode.
Definition ast.h:646
@ PM_FLOAT_NODE
FloatNode.
Definition ast.h:733
@ PM_ASSOC_NODE
AssocNode.
Definition ast.h:595
@ PM_INTEGER_NODE
IntegerNode.
Definition ast.h:817
@ PM_LOCAL_VARIABLE_TARGET_NODE
LocalVariableTargetNode.
Definition ast.h:862
@ PM_STRING_NODE
StringNode.
Definition ast.h:994
@ PM_ALIAS_GLOBAL_VARIABLE_NODE
AliasGlobalVariableNode.
Definition ast.h:574
@ PM_NUMBERED_REFERENCE_READ_NODE
NumberedReferenceReadNode.
Definition ast.h:904
@ PM_STATEMENTS_NODE
StatementsNode.
Definition ast.h:991
@ PM_BLOCK_NODE
BlockNode.
Definition ast.h:613
@ PM_INTERPOLATED_REGULAR_EXPRESSION_NODE
InterpolatedRegularExpressionNode.
Definition ast.h:823
struct pm_begin_node pm_begin_node_t
BeginNode.
struct pm_statements_node pm_statements_node_t
StatementsNode.
struct pm_instance_variable_write_node pm_instance_variable_write_node_t
InstanceVariableWriteNode.
struct pm_keyword_hash_node pm_keyword_hash_node_t
KeywordHashNode.
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE
We store the flags enum in every node in the tree.
Definition ast.h:1046
@ PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition ast.h:7937
struct pm_constant_path_node pm_constant_path_node_t
ConstantPathNode.
struct pm_local_variable_write_node pm_local_variable_write_node_t
LocalVariableWriteNode.
@ PM_STRING_FLAGS_FROZEN
frozen by virtue of a frozen_string_literal: true comment or --enable-frozen-string-literal
Definition ast.h:7920
@ PM_STRING_FLAGS_FORCED_BINARY_ENCODING
internal bytes forced the encoding to binary
Definition ast.h:7917
@ PM_STRING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:7914
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING
if the arguments contain forwarding
Definition ast.h:7746
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS
if the arguments contain keywords
Definition ast.h:7749
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT
if the arguments contain a keyword splat
Definition ast.h:7752
struct pm_parameters_node pm_parameters_node_t
ParametersNode.
#define PM_NODE_FLAG_P(node, flag)
Return true if the given flag is set on the given node.
Definition ast.h:1063
struct pm_case_node pm_case_node_t
CaseNode.
struct pm_if_node pm_if_node_t
IfNode.
struct pm_rescue_modifier_node pm_rescue_modifier_node_t
RescueModifierNode.
struct pm_splat_node pm_splat_node_t
SplatNode.
struct pm_match_write_node pm_match_write_node_t
MatchWriteNode.
struct pm_multi_write_node pm_multi_write_node_t
MultiWriteNode.
struct pm_interpolated_x_string_node pm_interpolated_x_string_node_t
InterpolatedXStringNode.
struct pm_constant_write_node pm_constant_write_node_t
ConstantWriteNode.
struct pm_flip_flop_node pm_flip_flop_node_t
FlipFlopNode.
#define PM_NODE_TYPE_P(node, type)
Return true if the type of the given node matches the given type.
Definition ast.h:1058
#define PM_NODE_TYPE(node)
Cast the type to an enum to allow the compiler to provide exhaustiveness checking.
Definition ast.h:1053
struct pm_global_variable_read_node pm_global_variable_read_node_t
GlobalVariableReadNode.
struct pm_match_last_line_node pm_match_last_line_node_t
MatchLastLineNode.
struct pm_hash_node pm_hash_node_t
HashNode.
struct pm_block_local_variable_node pm_block_local_variable_node_t
BlockLocalVariableNode.
struct pm_multi_target_node pm_multi_target_node_t
MultiTargetNode.
@ PM_INTEGER_BASE_FLAGS_HEXADECIMAL
0x prefix
Definition ast.h:7811
@ PM_INTEGER_BASE_FLAGS_OCTAL
0o or 0 prefix
Definition ast.h:7808
@ PM_INTEGER_BASE_FLAGS_DECIMAL
0d or no prefix
Definition ast.h:7805
@ PM_INTEGER_BASE_FLAGS_BINARY
0b prefix
Definition ast.h:7802
struct pm_rational_node pm_rational_node_t
RationalNode.
struct pm_ensure_node pm_ensure_node_t
EnsureNode.
struct pm_forwarding_parameter_node pm_forwarding_parameter_node_t
ForwardingParameterNode.
struct pm_when_node pm_when_node_t
WhenNode.
enum pm_token_type pm_token_type_t
This enum represents every type of token in the Ruby source.
struct pm_range_node pm_range_node_t
RangeNode.
struct pm_and_node pm_and_node_t
AndNode.
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
When we're serializing to Java, we want to skip serializing the location fields as they won't be used...
Definition ast.h:7946
@ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
&.
Definition ast.h:7774
@ PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE
a call that is an attribute write, so the value being written should be returned
Definition ast.h:7780
@ PM_CALL_NODE_FLAGS_VARIABLE_CALL
a call that could have been a local variable
Definition ast.h:7777
struct pm_constant_read_node pm_constant_read_node_t
ConstantReadNode.
struct pm_or_node pm_or_node_t
OrNode.
struct pm_case_match_node pm_case_match_node_t
CaseMatchNode.
struct pm_imaginary_node pm_imaginary_node_t
ImaginaryNode.
struct pm_array_pattern_node pm_array_pattern_node_t
ArrayPatternNode.
struct pm_integer_node pm_integer_node_t
IntegerNode.
struct pm_constant_path_target_node pm_constant_path_target_node_t
ConstantPathTargetNode.
struct pm_global_variable_target_node pm_global_variable_target_node_t
GlobalVariableTargetNode.
struct pm_node_list pm_node_list_t
A list of nodes in the source, most often used for lists of children.
struct pm_required_parameter_node pm_required_parameter_node_t
RequiredParameterNode.
struct pm_symbol_node pm_symbol_node_t
SymbolNode.
struct pm_block_parameters_node pm_block_parameters_node_t
BlockParametersNode.
struct pm_parentheses_node pm_parentheses_node_t
ParenthesesNode.
@ PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition ast.h:7892
@ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
x - ignores whitespace and allows comments in regular expressions
Definition ast.h:7865
struct pm_instance_variable_read_node pm_instance_variable_read_node_t
InstanceVariableReadNode.
struct pm_constant_target_node pm_constant_target_node_t
ConstantTargetNode.
struct pm_node pm_node_t
This is the base structure that represents a node in the syntax tree.
struct pm_interpolated_symbol_node pm_interpolated_symbol_node_t
InterpolatedSymbolNode.
struct pm_class_variable_target_node pm_class_variable_target_node_t
ClassVariableTargetNode.
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition ast.h:1040
struct pm_regular_expression_node pm_regular_expression_node_t
RegularExpressionNode.
@ PM_TOKEN_STAR_STAR
**
Definition ast.h:469
@ PM_TOKEN_DOT_DOT_DOT
the ... range operator or forwarding parameter
Definition ast.h:124
@ PM_TOKEN_MINUS_EQUAL
-=
Definition ast.h:385
@ PM_TOKEN_IGNORED_NEWLINE
an ignored newline
Definition ast.h:196
@ PM_TOKEN_BANG_EQUAL
!=
Definition ast.h:64
@ PM_TOKEN_KEYWORD___FILE__
FILE
Definition ast.h:349
@ PM_TOKEN_KEYWORD_WHEN
when
Definition ast.h:334
@ PM_TOKEN_FLOAT
a floating point number
Definition ast.h:160
@ PM_TOKEN_PLUS_EQUAL
+=
Definition ast.h:442
@ PM_TOKEN_DOT_DOT
the .
Definition ast.h:121
@ PM_TOKEN_UDOT_DOT
unary .
Definition ast.h:496
@ PM_TOKEN_AMPERSAND_DOT
&.
Definition ast.h:49
@ PM_TOKEN_NEWLINE
a newline character outside of other tokens
Definition ast.h:391
@ PM_TOKEN_NUMBERED_REFERENCE
a numbered reference to a capture group in the previous regular expression match
Definition ast.h:394
@ PM_TOKEN_AMPERSAND
&
Definition ast.h:40
@ PM_TOKEN_KEYWORD_YIELD
yield
Definition ast.h:343
@ PM_TOKEN_KEYWORD_END
end
Definition ast.h:253
@ PM_TOKEN_LAMBDA_BEGIN
{
Definition ast.h:361
@ PM_TOKEN_KEYWORD_UNTIL_MODIFIER
until in the modifier form
Definition ast.h:331
@ PM_TOKEN_EQUAL_EQUAL_EQUAL
===
Definition ast.h:151
@ PM_TOKEN_INTEGER_RATIONAL
an integer with a rational suffix
Definition ast.h:208
@ PM_TOKEN_USTAR
unary *
Definition ast.h:511
@ PM_TOKEN_TILDE
~ or ~@
Definition ast.h:487
@ PM_TOKEN_KEYWORD___ENCODING__
ENCODING
Definition ast.h:346
@ PM_TOKEN_REGEXP_END
the end of a regular expression
Definition ast.h:451
@ PM_TOKEN_KEYWORD_UNTIL
until
Definition ast.h:328
@ PM_TOKEN_COMMA
,
Definition ast.h:109
@ PM_TOKEN_MAXIMUM
The maximum token value.
Definition ast.h:523
@ PM_TOKEN_GREATER
Definition ast.h:175
@ PM_TOKEN_INTEGER
an integer (any base)
Definition ast.h:202
@ PM_TOKEN_SLASH_EQUAL
/=
Definition ast.h:460
@ PM_TOKEN_UMINUS_NUM
-@ for a number
Definition ast.h:505
@ PM_TOKEN_EMBVAR
Definition ast.h:142
@ PM_TOKEN_KEYWORD_UNLESS_MODIFIER
unless in the modifier form
Definition ast.h:325
@ PM_TOKEN_INTEGER_RATIONAL_IMAGINARY
an integer with a rational and imaginary suffix
Definition ast.h:211
@ PM_TOKEN_FLOAT_RATIONAL_IMAGINARY
a floating pointer number with a rational and imaginary suffix
Definition ast.h:169
@ PM_TOKEN_BRACKET_LEFT_RIGHT
[]
Definition ast.h:82
@ PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL
&&=
Definition ast.h:46
@ PM_TOKEN_KEYWORD_CLASS
class
Definition ast.h:232
@ PM_TOKEN_KEYWORD_BEGIN
begin
Definition ast.h:220
@ PM_TOKEN_NOT_PROVIDED
a token that was not present but it is okay
Definition ast.h:37
@ PM_TOKEN_USTAR_STAR
unary **
Definition ast.h:514
@ PM_TOKEN_GREATER_GREATER_EQUAL
Definition ast.h:184
@ PM_TOKEN_PERCENT_EQUAL
%=
Definition ast.h:409
@ PM_TOKEN_PERCENT
%
Definition ast.h:406
@ PM_TOKEN_KEYWORD_IN
in
Definition ast.h:274
@ PM_TOKEN_BANG
!
Definition ast.h:61
@ PM_TOKEN_KEYWORD_NOT
not
Definition ast.h:286
@ PM_TOKEN_BRACKET_LEFT_ARRAY
[ for the beginning of an array
Definition ast.h:79
@ PM_TOKEN_HEREDOC_END
the end of a heredoc
Definition ast.h:187
@ PM_TOKEN_HEREDOC_START
the start of a heredoc
Definition ast.h:190
@ PM_TOKEN_KEYWORD_DEFINED
defined?
Definition ast.h:238
@ PM_TOKEN_UCOLON_COLON
unary ::
Definition ast.h:493
@ PM_TOKEN_LABEL_END
the end of a label
Definition ast.h:358
@ PM_TOKEN_EQUAL_GREATER
=>
Definition ast.h:154
@ PM_TOKEN_KEYWORD_UNLESS
unless
Definition ast.h:322
@ PM_TOKEN_KEYWORD_ENSURE
ensure
Definition ast.h:259
@ PM_TOKEN_AMPERSAND_EQUAL
&=
Definition ast.h:52
@ PM_TOKEN_EQUAL_EQUAL
==
Definition ast.h:148
@ PM_TOKEN_UPLUS
+@
Definition ast.h:508
@ PM_TOKEN_FLOAT_IMAGINARY
a floating pointer number with an imaginary suffix
Definition ast.h:163
@ PM_TOKEN_KEYWORD_BEGIN_UPCASE
BEGIN.
Definition ast.h:223
@ PM_TOKEN_LESS_EQUAL_GREATER
<=>
Definition ast.h:370
@ PM_TOKEN_KEYWORD_RESCUE_MODIFIER
rescue in the modifier form
Definition ast.h:298
@ PM_TOKEN_MISSING
a token that was expected but not found
Definition ast.h:34
@ PM_TOKEN_MINUS_GREATER
->
Definition ast.h:388
@ PM_TOKEN_KEYWORD_FALSE
false
Definition ast.h:262
@ PM_TOKEN_PIPE_PIPE_EQUAL
||=
Definition ast.h:436
@ PM_TOKEN_KEYWORD_IF
if
Definition ast.h:268
@ PM_TOKEN_EMBEXPR_BEGIN
#{
Definition ast.h:136
@ PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES
( for a parentheses node
Definition ast.h:400
@ PM_TOKEN_EMBDOC_END
=end
Definition ast.h:130
@ PM_TOKEN_KEYWORD_ELSE
else
Definition ast.h:247
@ PM_TOKEN_BACK_REFERENCE
a back reference
Definition ast.h:58
@ PM_TOKEN_BRACKET_LEFT
[
Definition ast.h:76
@ PM_TOKEN_EOF
final token in the file
Definition ast.h:31
@ PM_TOKEN_PIPE_PIPE
||
Definition ast.h:433
@ PM_TOKEN_KEYWORD_NIL
nil
Definition ast.h:283
@ PM_TOKEN_PERCENT_UPPER_W
W
Definition ast.h:424
@ PM_TOKEN_KEYWORD_RETURN
return
Definition ast.h:304
@ PM_TOKEN_CLASS_VARIABLE
a class variable
Definition ast.h:100
@ PM_TOKEN_PIPE
|
Definition ast.h:427
@ PM_TOKEN_PARENTHESIS_LEFT
(
Definition ast.h:397
@ PM_TOKEN_BANG_TILDE
!
Definition ast.h:67
@ PM_TOKEN_DOT
the .
Definition ast.h:118
@ PM_TOKEN_PARENTHESIS_RIGHT
)
Definition ast.h:403
@ PM_TOKEN_KEYWORD_RESCUE
rescue
Definition ast.h:295
@ PM_TOKEN_INSTANCE_VARIABLE
an instance variable
Definition ast.h:199
@ PM_TOKEN_PIPE_EQUAL
|=
Definition ast.h:430
@ PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL
[]=
Definition ast.h:85
@ PM_TOKEN_UAMPERSAND
unary &
Definition ast.h:490
@ PM_TOKEN_MINUS
Definition ast.h:382
@ PM_TOKEN_CONSTANT
a constant
Definition ast.h:115
@ PM_TOKEN_IDENTIFIER
an identifier
Definition ast.h:193
@ PM_TOKEN_EMBDOC_BEGIN
=begin
Definition ast.h:127
@ PM_TOKEN_STAR_EQUAL
*=
Definition ast.h:466
@ PM_TOKEN_KEYWORD_OR
or
Definition ast.h:289
@ PM_TOKEN_KEYWORD_AND
and
Definition ast.h:217
@ PM_TOKEN_LESS
<
Definition ast.h:364
@ PM_TOKEN_KEYWORD_BREAK
break
Definition ast.h:226
@ PM_TOKEN_PERCENT_LOWER_W
w
Definition ast.h:415
@ PM_TOKEN_SYMBOL_BEGIN
the beginning of a symbol
Definition ast.h:484
@ PM_TOKEN_METHOD_NAME
a method name
Definition ast.h:379
@ PM_TOKEN_KEYWORD_CASE
case
Definition ast.h:229
@ PM_TOKEN_WORDS_SEP
a separator between words in a list
Definition ast.h:517
@ PM_TOKEN_FLOAT_RATIONAL
a floating pointer number with a rational suffix
Definition ast.h:166
@ PM_TOKEN_LESS_LESS_EQUAL
<<=
Definition ast.h:376
@ PM_TOKEN_EMBDOC_LINE
a line inside of embedded documentation
Definition ast.h:133
@ PM_TOKEN_KEYWORD_SUPER
super
Definition ast.h:310
@ PM_TOKEN_KEYWORD_DO
do
Definition ast.h:241
@ PM_TOKEN_KEYWORD_REDO
redo
Definition ast.h:292
@ PM_TOKEN_EQUAL_TILDE
=~
Definition ast.h:157
@ PM_TOKEN_EMBEXPR_END
}
Definition ast.h:139
@ PM_TOKEN_KEYWORD_END_UPCASE
END.
Definition ast.h:256
@ PM_TOKEN_KEYWORD___LINE__
LINE
Definition ast.h:352
@ PM_TOKEN_STRING_END
the end of a string
Definition ast.h:481
@ PM_TOKEN_STRING_CONTENT
the contents of a string
Definition ast.h:478
@ PM_TOKEN_BRACE_LEFT
{
Definition ast.h:70
@ PM_TOKEN_COLON_COLON
::
Definition ast.h:106
@ PM_TOKEN_GREATER_GREATER
Definition ast.h:181
@ PM_TOKEN_PERCENT_LOWER_X
x
Definition ast.h:418
@ PM_TOKEN_KEYWORD_SELF
self
Definition ast.h:307
@ PM_TOKEN_PERCENT_LOWER_I
i
Definition ast.h:412
@ PM_TOKEN_KEYWORD_ALIAS
alias
Definition ast.h:214
@ PM_TOKEN_GLOBAL_VARIABLE
a global variable
Definition ast.h:172
@ PM_TOKEN_KEYWORD_IF_MODIFIER
if in the modifier form
Definition ast.h:271
@ PM_TOKEN_SLASH
/
Definition ast.h:457
@ PM_TOKEN_KEYWORD_RETRY
retry
Definition ast.h:301
@ PM_TOKEN_COLON
:
Definition ast.h:103
@ PM_TOKEN_KEYWORD_UNDEF
undef
Definition ast.h:319
@ PM_TOKEN_BRACKET_RIGHT
]
Definition ast.h:88
@ PM_TOKEN_KEYWORD_FOR
for
Definition ast.h:265
@ PM_TOKEN_KEYWORD_THEN
then
Definition ast.h:313
@ PM_TOKEN_QUESTION_MARK
?
Definition ast.h:445
@ PM_TOKEN___END__
marker for the point in the file at which the parser should stop
Definition ast.h:520
@ PM_TOKEN_KEYWORD_WHILE
while
Definition ast.h:337
@ PM_TOKEN_EQUAL
=
Definition ast.h:145
@ PM_TOKEN_KEYWORD_DEF
def
Definition ast.h:235
@ PM_TOKEN_UDOT_DOT_DOT
unary ... operator
Definition ast.h:499
@ PM_TOKEN_STAR
Definition ast.h:463
@ PM_TOKEN_KEYWORD_WHILE_MODIFIER
while in the modifier form
Definition ast.h:340
@ PM_TOKEN_KEYWORD_TRUE
true
Definition ast.h:316
@ PM_TOKEN_BRACE_RIGHT
}
Definition ast.h:73
@ PM_TOKEN_SEMICOLON
;
Definition ast.h:454
@ PM_TOKEN_REGEXP_BEGIN
the beginning of a regular expression
Definition ast.h:448
@ PM_TOKEN_CARET
^
Definition ast.h:91
@ PM_TOKEN_PERCENT_UPPER_I
I
Definition ast.h:421
@ PM_TOKEN_KEYWORD_DO_LOOP
do keyword for a predicate in a while, until, or for loop
Definition ast.h:244
@ PM_TOKEN_KEYWORD_MODULE
module
Definition ast.h:277
@ PM_TOKEN_PLUS
Definition ast.h:439
@ PM_TOKEN_KEYWORD_NEXT
next
Definition ast.h:280
@ PM_TOKEN_BACKTICK
`
Definition ast.h:55
@ PM_TOKEN_INTEGER_IMAGINARY
an integer with an imaginary suffix
Definition ast.h:205
@ PM_TOKEN_LABEL
a label
Definition ast.h:355
@ PM_TOKEN_STAR_STAR_EQUAL
**=
Definition ast.h:472
@ PM_TOKEN_CHARACTER_LITERAL
a character literal
Definition ast.h:97
@ PM_TOKEN_AMPERSAND_AMPERSAND
&&
Definition ast.h:43
@ PM_TOKEN_UMINUS
-@
Definition ast.h:502
@ PM_TOKEN_LESS_LESS
<<
Definition ast.h:373
@ PM_TOKEN_GREATER_EQUAL
>=
Definition ast.h:178
@ PM_TOKEN_COMMENT
a comment
Definition ast.h:112
@ PM_TOKEN_CARET_EQUAL
^=
Definition ast.h:94
@ PM_TOKEN_KEYWORD_ELSIF
elsif
Definition ast.h:250
@ PM_TOKEN_STRING_BEGIN
the beginning of a string
Definition ast.h:475
@ PM_TOKEN_LESS_EQUAL
<=
Definition ast.h:367
struct pm_rescue_node pm_rescue_node_t
RescueNode.
struct pm_array_node pm_array_node_t
ArrayNode.
struct pm_global_variable_write_node pm_global_variable_write_node_t
GlobalVariableWriteNode.
@ PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:7791
struct pm_interpolated_match_last_line_node pm_interpolated_match_last_line_node_t
InterpolatedMatchLastLineNode.
struct pm_unless_node pm_unless_node_t
UnlessNode.
struct pm_interpolated_regular_expression_node pm_interpolated_regular_expression_node_t
InterpolatedRegularExpressionNode.
struct pm_instance_variable_target_node pm_instance_variable_target_node_t
InstanceVariableTargetNode.
struct pm_string_node pm_string_node_t
StringNode.
struct pm_float_node pm_float_node_t
FloatNode.
@ PM_LOOP_FLAGS_BEGIN_MODIFIER
a loop after a begin statement, so the body is executed first before the condition
Definition ast.h:7838
struct pm_find_pattern_node pm_find_pattern_node_t
FindPatternNode.
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:29
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
struct pm_options_scope pm_options_scope_t
A scope of locals surrounding the code that is being parsed.
struct pm_options pm_options_t
The options that can be passed to the parser.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:203
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition options.h:20
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:26
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:209
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition options.h:89
struct pm_locals pm_locals_t
This is a set of local variables in a certain lexical context (method, class, module,...
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:267
struct pm_lex_mode pm_lex_mode_t
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:408
@ PM_CONTEXT_DEFINED
a defined?
Definition parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:435
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
Definition parser.h:566
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:448
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
struct pm_list_node pm_list_node_t
This struct represents an abstract linked list that provides common functionality.
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:70
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition defines.h:234
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:78
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition defines.h:34
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:113
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:50
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:17
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
Definition prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition prism.h:245
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:2133
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:2110
char * pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition prism.h:88
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:2040
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition token_type.c:362
This struct is used to pass information between the regular expression parser and the error callback.
Definition prism.c:17982
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition prism.c:17984
const uint8_t * start
The start of the regular expression.
Definition prism.c:17987
bool shared
Whether or not the source of the regular expression is shared.
Definition prism.c:17998
const uint8_t * end
The end of the regular expression.
Definition prism.c:17990
This struct is used to pass information between the regular expression parser and the named capture c...
Definition prism.c:20841
pm_constant_id_list_t names
The list of names that have been parsed.
Definition prism.c:20852
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition prism.c:20843
pm_match_write_node_t * match
The match write node that is being created.
Definition prism.c:20849
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition prism.c:20846
bool shared
Whether the content of the regular expression is shared.
Definition prism.c:20859
struct pm_node * left
AndNode#left.
Definition ast.h:1279
struct pm_node * right
AndNode#right.
Definition ast.h:1292
pm_node_t base
The embedded base node.
Definition ast.h:1326
struct pm_node_list arguments
ArgumentsNode#arguments.
Definition ast.h:1337
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1583
struct pm_node_list elements
ArrayNode#elements.
Definition ast.h:1365
struct pm_node * constant
ArrayPatternNode#constant.
Definition ast.h:1424
pm_location_t opening_loc
ArrayPatternNode#opening_loc.
Definition ast.h:1464
pm_node_t base
The embedded base node.
Definition ast.h:1418
pm_location_t closing_loc
ArrayPatternNode#closing_loc.
Definition ast.h:1474
struct pm_node * value
AssocNode#value.
Definition ast.h:1521
struct pm_node * key
AssocNode#key.
Definition ast.h:1508
struct pm_ensure_node * ensure_clause
BeginNode#ensure_clause.
Definition ast.h:1668
struct pm_rescue_node * rescue_clause
BeginNode#rescue_clause.
Definition ast.h:1648
struct pm_statements_node * statements
BeginNode#statements.
Definition ast.h:1638
pm_node_t base
The embedded base node.
Definition ast.h:1617
struct pm_else_node * else_clause
BeginNode#else_clause.
Definition ast.h:1658
This struct represents a set of binding powers used for a given token.
Definition prism.c:12943
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12951
pm_binding_power_t left
The left binding power.
Definition prism.c:12945
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12957
pm_binding_power_t right
The right binding power.
Definition prism.c:12948
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:30
pm_location_t opening_loc
CallNode#opening_loc.
Definition ast.h:2180
pm_location_t closing_loc
CallNode#closing_loc.
Definition ast.h:2200
struct pm_node * receiver
CallNode#receiver.
Definition ast.h:2138
pm_constant_id_t name
CallNode::name.
Definition ast.h:2161
pm_node_t base
The embedded base node.
Definition ast.h:2121
pm_location_t call_operator_loc
CallNode#call_operator_loc.
Definition ast.h:2151
pm_location_t message_loc
CallNode#message_loc.
Definition ast.h:2171
struct pm_arguments_node * arguments
CallNode#arguments.
Definition ast.h:2190
struct pm_node * block
CallNode#block.
Definition ast.h:2210
struct pm_node_list conditions
CaseMatchNode#conditions.
Definition ast.h:2568
struct pm_node_list conditions
CaseNode#conditions.
Definition ast.h:2638
pm_location_t location
The location of the comment in the source.
Definition parser.h:463
A list of constant IDs.
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
pm_context_t context
The context that this node represents.
Definition parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:444
This struct represents a diagnostic generated during parsing.
Definition diagnostic.h:359
struct pm_statements_node * statements
ElseNode#statements.
Definition ast.h:3623
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
struct pm_statements_node * statements
EnsureNode#statements.
Definition ast.h:3721
struct pm_node * constant
FindPatternNode#constant.
Definition ast.h:3773
pm_location_t opening_loc
FindPatternNode#opening_loc.
Definition ast.h:3793
pm_node_t base
The embedded base node.
Definition ast.h:3767
pm_location_t closing_loc
FindPatternNode#closing_loc.
Definition ast.h:3798
double value
FloatNode#value.
Definition ast.h:3859
pm_node_t base
The embedded base node.
Definition ast.h:3851
struct pm_node_list elements
HashNode#elements.
Definition ast.h:4285
pm_location_t opening_loc
HashPatternNode#opening_loc.
Definition ast.h:4336
pm_node_t base
The embedded base node.
Definition ast.h:4315
pm_location_t closing_loc
HashPatternNode#closing_loc.
Definition ast.h:4341
struct pm_node * constant
HashPatternNode#constant.
Definition ast.h:4321
All of the information necessary to store to lexing a heredoc.
Definition parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:90
struct pm_statements_node * statements
IfNode#statements.
Definition ast.h:4422
struct pm_node * subsequent
IfNode#subsequent.
Definition ast.h:4441
pm_integer_t value
IntegerNode#value.
Definition ast.h:5088
pm_node_t base
The embedded base node.
Definition ast.h:5080
bool negative
Whether or not the integer is negative.
Definition pm_integer.h:42
pm_node_t base
The embedded base node.
Definition ast.h:5201
pm_location_t opening_loc
InterpolatedStringNode#opening_loc.
Definition ast.h:5207
pm_node_t base
The embedded base node.
Definition ast.h:5234
pm_location_t opening_loc
InterpolatedXStringNode#opening_loc.
Definition ast.h:5273
pm_node_t base
The embedded base node.
Definition ast.h:5267
struct pm_node_list parts
InterpolatedXStringNode#parts.
Definition ast.h:5278
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:512
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:165
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:153
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:156
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:162
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:171
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
Definition parser.h:233
bool line_continuation
True if the previous token ended with a line continuation.
Definition parser.h:249
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:254
union pm_lex_mode::@303336126360075302344075121136356113360170030306 as
The data associated with this type of lex mode.
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:208
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:239
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:246
enum pm_lex_mode::@204051102252353332352362146052355003264223055126 mode
The type of this lex mode.
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
size_t size
The size of the list.
Definition pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition parser.h:537
uint32_t hash
The hash of the local variable.
Definition parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition parser.h:543
uint32_t depth
LocalVariableReadNode#depth.
Definition ast.h:5610
pm_constant_id_t name
LocalVariableReadNode#name.
Definition ast.h:5597
uint32_t depth
LocalVariableWriteNode#depth.
Definition ast.h:5680
pm_constant_id_t name
LocalVariableWriteNode#name.
Definition ast.h:5667
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition parser.h:559
uint32_t size
The number of local variables in the set.
Definition parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:545
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:547
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:550
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:475
struct pm_node_list targets
MatchWriteNode#targets.
Definition ast.h:5862
pm_node_t base
The embedded base node.
Definition ast.h:5947
pm_location_t lparen_loc
MultiTargetNode#lparen_loc.
Definition ast.h:6003
struct pm_node_list lefts
MultiTargetNode#lefts.
Definition ast.h:5963
pm_location_t rparen_loc
MultiTargetNode#rparen_loc.
Definition ast.h:6013
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
size_t size
The number of nodes in the list.
Definition ast.h:560
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:566
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1074
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1080
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1092
size_t locals_count
The number of locals in the scope.
Definition options.h:38
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
Definition options.h:44
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition options.h:144
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition options.h:106
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition options.h:160
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition options.h:167
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:121
int32_t line
The line within the file that the parse starts on.
Definition options.h:115
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition options.h:100
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:153
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition options.h:177
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:126
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:109
pm_options_version_t version
The version of prism that we should be parsing with.
Definition options.h:141
struct pm_node * left
OrNode#left.
Definition ast.h:6355
struct pm_node * right
OrNode#right.
Definition ast.h:6368
struct pm_node * rest
ParametersNode#rest.
Definition ast.h:6412
struct pm_block_parameter_node * block
ParametersNode#block.
Definition ast.h:6432
pm_node_t base
The embedded base node.
Definition ast.h:6396
struct pm_node * keyword_rest
ParametersNode#keyword_rest.
Definition ast.h:6427
struct pm_node * body
ParenthesesNode#body.
Definition ast.h:6455
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:909
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition parser.h:856
pm_token_t previous
The previous token we were considering.
Definition parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:681
struct pm_parser::@236040131255244317313236162207277265316171136011 lex_modes
A stack of lex modes.
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:718
size_t index
The current index into the lexer mode stack.
Definition parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:918
struct pm_node * right
RangeNode#right.
Definition ast.h:6683
struct pm_node * left
RangeNode#left.
Definition ast.h:6669
pm_node_t base
The embedded base node.
Definition ast.h:6713
pm_integer_t numerator
RationalNode#numerator.
Definition ast.h:6723
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:10369
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:10374
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:10371
pm_node_t base
The embedded base node.
Definition ast.h:6780
pm_string_t unescaped
RegularExpressionNode#unescaped.
Definition ast.h:6801
struct pm_node * rescue_expression
RescueModifierNode#rescue_expression.
Definition ast.h:6893
struct pm_rescue_node * subsequent
RescueNode#subsequent.
Definition ast.h:6946
pm_node_t base
The embedded base node.
Definition ast.h:6915
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:626
struct pm_node * expression
SplatNode#expression.
Definition ast.h:7221
struct pm_node_list body
StatementsNode#body.
Definition ast.h:7244
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
pm_node_t base
The embedded base node.
Definition ast.h:7273
pm_string_t unescaped
StringNode#unescaped.
Definition ast.h:7294
pm_location_t closing_loc
StringNode#closing_loc.
Definition ast.h:7289
pm_location_t opening_loc
StringNode#opening_loc.
Definition ast.h:7279
A generic string type that can have various ownership semantics.
Definition pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:38
enum pm_string_t::@346265266332060241255337121126133217326336224105 type
The type of the string.
pm_location_t value_loc
SymbolNode#value_loc.
Definition ast.h:7376
pm_string_t unescaped
SymbolNode#unescaped.
Definition ast.h:7386
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:10343
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:10348
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:10354
This struct represents a token in the Ruby source.
Definition ast.h:530
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:538
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:535
pm_token_type_t type
The type of the token.
Definition ast.h:532
struct pm_statements_node * statements
UnlessNode#statements.
Definition ast.h:7500
struct pm_else_node * else_clause
UnlessNode#else_clause.
Definition ast.h:7510