def _apply_strategy(self, **kwargs): if not self._text.startswith(FILE_INCLUSION_SYM, self._next_index): self.error_msg = \ "Invalid token. Expected a file to be included there " + \ "(starting with '" + FILE_INCLUSION_SYM + "')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.file_inclusion_marker, FILE_INCLUSION_SYM)) if self._text[self._next_index].isspace(): self.error_msg = \ "Invalid token. Expected a file path here, got a whitespace." return False comment_start = find_next_comment(self._text, self._next_index) if comment_start is not None: file_path = self._text[self._next_index:comment_start].rstrip() else: file_path = self._text[self._next_index:].rstrip() self._next_index += len(file_path) self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.file_path, file_path)) return True
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a boolean with key `parsing_slot_def` that is `True` if the current text is part of a slot definition. If this boolean is not in `kwargs`, defaults to `False`. """ parsing_slot_def = kwargs.get("parsing_slot_def", False) if parsing_slot_def: while self._text[self._next_index].isspace(): self._next_index += 1 self._update_furthest_matched_index() if self._text.startswith(SLOT_VAL_SYM, self._next_index): self._tokens.append( LexicalToken(TerminalType.slot_val_marker, SLOT_VAL_SYM)) self._next_index += 1 self._update_furthest_matched_index() while self._text[self._next_index].isspace(): self._next_index += 1 self._update_furthest_matched_index() comment_sym = find_next_comment(self._text, self._next_index) if comment_sym is not None: slot_value = \ self._text[self._next_index:comment_sym].rstrip() else: slot_value = self._text[self._next_index:].rstrip() self._tokens.append( LexicalToken(TerminalType.slot_val, slot_value)) self._next_index += len(slot_value) self._update_furthest_matched_index() return True return False else: raise ValueError( "Tried to extract a slot value within a rule that is not " + \ "part of a slot definition." )
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a boolean with key `inside_choice` that is `True` when the current word is inside a choice and `False` otherwise. If this boolean is not in `kwargs`, defaults to `False`. ´kwargs´ can also contain a boolean with key `parsing_slot_def` which is `True` iff the current is in a rule inside a slot definition. If this boolean is not in `kwargs`, defaults to `False`. """ inside_choice = kwargs.get("inside_choice", False) parsing_slot_def = kwargs.get("parsing_slot_def", False) # TODO this might be better using regexes if self._text[self._start_index].isspace(): self.error_msg = \ "Invalid token. Expected a word instead of a whitespace there." return False # Find whitespace after the word next_word_index = self._start_index + 1 # NOTE exclusive while True: if (next_word_index == len(self._text) or self._text[next_word_index].isspace()): break next_word_index += 1 next_word_index = \ min_if_exist( next_word_index, find_next_comment(self._text, self._start_index) ) if next_word_index == self._start_index: self.error_msg = "Invalid token. Expected a word to start here." return False for current_char in RuleWord._should_be_escaped_chars: if next_word_index == self._start_index: break next_word_index = \ min_if_exist( next_word_index, find_unescaped(self._text, current_char, self._start_index) ) if inside_choice and next_word_index > self._start_index: for char_to_escape in RuleWord._should_be_escaped_in_choices_chars: next_word_index = \ min_if_exist( next_word_index, find_unescaped( self._text, char_to_escape, self._start_index ) ) if parsing_slot_def and next_word_index > self._start_index: for char_to_escape in RuleWord._should_be_escaped_in_slot_def_chars: next_word_index = \ min_if_exist( next_word_index, find_unescaped( self._text, char_to_escape, self._start_index ) ) if next_word_index == self._start_index: self.error_msg = "Invalid token. Expected a word to start here." return False word = self._text[self._start_index:next_word_index] self._next_index = next_word_index self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.word, word)) return True