def _apply_strategy(self, **kwargs): if not self._text.startswith(FILE_INCLUSION_SYM, self._next_index): self.error_msg = \ "Invalid token. Expected a file to be included there " + \ "(starting with '" + FILE_INCLUSION_SYM + "')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.file_inclusion_marker, FILE_INCLUSION_SYM)) if self._text[self._next_index].isspace(): self.error_msg = \ "Invalid token. Expected a file path here, got a whitespace." return False comment_start = find_next_comment(self._text, self._next_index) if comment_start is not None: file_path = self._text[self._next_index:comment_start].rstrip() else: file_path = self._text[self._next_index:].rstrip() self._next_index += len(file_path) self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.file_path, file_path)) return True
def _apply_strategy(self, **kwargs): if not self._text.startswith(ARG_SYM, self._next_index): self.error_msg = \ "Invalid token. Expected an argument declaration there " + \ "(starting with '" + ARG_SYM + "')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.arg_marker, ARG_SYM) ) arg_name = extract_identifier(self._text, self._next_index) if arg_name is None: self.error_msg = \ "Didn't expect the line to end there. Expected an argument name." return False elif len(arg_name) == 0: self.error_msg = \ "Couldn't extract the argument name. Arguments must have a name." return False self._next_index += len(arg_name) self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.arg_name, arg_name)) return True
def _apply_strategy(self, **kwargs): if not self._text.startswith(VARIATION_SYM, self._next_index): self.error_msg = \ "Invalid token. Expected a variation there (starting with '" + \ VARIATION_SYM + "')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.variation_marker, VARIATION_SYM)) variation_name = extract_identifier(self._text, self._next_index) if variation_name is None: self.error_msg = \ "Didn't expect an end of line there. Expected a variation name." return False elif len(variation_name) == 0: self.error_msg = \ "Couldn't extract the name of the variation. Variation names " + \ "must be at least one character long." return False self._next_index += len(variation_name) self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.variation_name, variation_name)) return True
def _apply_strategy(self, **kwargs): unit_start_rule = RuleUnitStart(self._text, self._next_index) if not unit_start_rule.matches(extracting_decl=False): self.error_msg = unit_start_rule.error_msg self._update_furthest_matched_index(unit_start_rule) return False self._next_index = unit_start_rule.get_next_index_to_match() self._update_furthest_matched_index(unit_start_rule) self._tokens.extend(unit_start_rule.get_lexical_tokens()) if self._text.startswith(CASE_GEN_SYM, self._next_index): self._tokens.append( LexicalToken(TerminalType.casegen_marker, CASE_GEN_SYM) ) self._next_index += 1 self._update_furthest_matched_index() identifier = extract_identifier(self._text, self._next_index) if identifier is not None: self._tokens.append( LexicalToken(TerminalType.unit_identifier, identifier) ) self._next_index += len(identifier) self._update_furthest_matched_index() if not self._match_any_order( [None, RuleVariation, RuleRandGen, RuleArgAssignment] ): return False if not self._text.startswith(UNIT_END_SYM, self._next_index): self.error_msg = \ "Invalid token. Expected the unit reference to end here (" + \ "using character '" + UNIT_END_SYM + "')." return False # TODO maybe making a function for this would be useful if self._tokens[0].type == TerminalType.alias_ref_start: unit_end_type = TerminalType.alias_ref_end elif self._tokens[0].type == TerminalType.slot_ref_start: unit_end_type = TerminalType.slot_ref_end elif self._tokens[0].type == TerminalType.intent_ref_start: unit_end_type = TerminalType.intent_ref_end else: # Should never happen raise ValueError( "An unexpected error happened during parsing: tried to " + \ "parse the end of a unit but couldn't find its start in " + \ "the previously parsed data.\nData was: " + str(self._tokens) ) self._next_index += 1 self._update_furthest_matched_index() self._tokens.append(LexicalToken(unit_end_type, UNIT_END_SYM)) return True
def _apply_strategy(self, **kwargs): text = self._text whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match() self._update_furthest_matched_index() # ignore the tokens it found since this whitespace is not meaningful if self._next_index >= len(text): return True if ( text.startswith(COMMENT_SYM, self._next_index) or text.startswith(OLD_COMMENT_SYM, self._next_index) ): if text.startswith(OLD_COMMENT_SYM, self._next_index): Deprecations.get_or_create().warn_old_comment( *(InputFileManager \ .get_or_create() \ .get_current_line_information()) ) matched_text = text[self._next_index:] self._tokens.append(LexicalToken(TerminalType.comment, matched_text)) self._next_index = len(text) self._update_furthest_matched_index() return True # No comment found self.error_msg = \ "Invalid token. Expected a comment there (starting with '" + \ COMMENT_SYM + "' or '" + OLD_COMMENT_SYM + "')." return False
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a boolean at key `parsing_indentation` that is `True` iff the whitespaces currently parsed correspond to an indentation, and `False` if it corresponds to simple whitespaces. If `kwargs` doesn't contain this boolean, defaults to `False`. """ parsing_indentation = kwargs.get("parsing_indentation", False) if parsing_indentation: terminal_type = TerminalType.indentation else: terminal_type = TerminalType.whitespace text = self._text while self._next_index < len(text) and text[ self._next_index].isspace(): self._next_index += 1 self._update_furthest_matched_index() if self._next_index > self._start_index: matched_text = text[self._start_index:self._next_index] self._tokens.append(LexicalToken(terminal_type, matched_text)) return True self.error_msg = "Invalid token. Expected at least one whitespace there." return False
def _apply_strategy(self, **kwargs): if not self._text.startswith(RAND_GEN_SYM, self._next_index): self.error_msg = \ "Invalid token. Expected a random generation modifier to " + \ "begin there (starting with '" + RAND_GEN_SYM + "')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.randgen_marker, RAND_GEN_SYM)) if self._text.startswith(RAND_GEN_OPPOSITE_SYM, self._next_index): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.opposite_randgen_marker, RAND_GEN_OPPOSITE_SYM)) # TODO not sure `extract_identifier` is the best thing to use here randgen_name = extract_identifier(self._text, self._next_index) if randgen_name is None: self.error_msg = \ "Didn't expect the line to end there. Expected a name for " + \ "the random generation modifier, a percentage for it or " + \ "the end of the unit or choice." return False if len(randgen_name) > 0: self._next_index += len(randgen_name) self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.randgen_name, randgen_name)) if self._text.startswith(RAND_GEN_PERCENT_SYM, self._next_index): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.percentgen_marker, RAND_GEN_PERCENT_SYM)) if not self._try_to_match_rule(RulePercentGen): self.error_msg += \ " Percentage for the random generation is required after " + \ "its marker character ('" + RAND_GEN_PERCENT_SYM + "')." return False return True
def _apply_strategy(self, **kwargs): if not self._try_to_match_rule(RuleUnitStart): return False if self._text.startswith(CASE_GEN_SYM, self._next_index): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.casegen_marker, CASE_GEN_SYM)) identifier = extract_identifier(self._text, self._next_index) if identifier is not None: self._next_index += len(identifier) self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.unit_identifier, identifier)) if not self._match_any_order([None, RuleArgDecl, RuleVariation]): return False if not self._text.startswith(UNIT_END_SYM, self._next_index): self.error_msg = \ "Invalid token. Expected the end of the unit declaration " + \ "there (using symbol '" + UNIT_END_SYM + "')." return False # TODO maybe making a function for this would be useful if self._tokens[0].type == TerminalType.alias_decl_start: unit_end_type = TerminalType.alias_decl_end elif self._tokens[0].type == TerminalType.slot_decl_start: unit_end_type = TerminalType.slot_decl_end elif self._tokens[0].type == TerminalType.intent_decl_start: unit_end_type = TerminalType.intent_decl_end else: # Should never happen raise ValueError( "An unexpected error happened during parsing: tried to " + \ "parse the end of a unit but couldn't find its start in " + \ "the previously parsed data.\nData was: " + str(self._tokens) ) self._next_index += 1 self._update_furthest_matched_index() self._tokens.append(LexicalToken(unit_end_type, UNIT_END_SYM)) return True
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a boolean with key `parsing_slot_def` that is `True` if the current text is part of a slot definition. If this boolean is not in `kwargs`, defaults to `False`. """ parsing_slot_def = kwargs.get("parsing_slot_def", False) if parsing_slot_def: while self._text[self._next_index].isspace(): self._next_index += 1 self._update_furthest_matched_index() if self._text.startswith(SLOT_VAL_SYM, self._next_index): self._tokens.append( LexicalToken(TerminalType.slot_val_marker, SLOT_VAL_SYM)) self._next_index += 1 self._update_furthest_matched_index() while self._text[self._next_index].isspace(): self._next_index += 1 self._update_furthest_matched_index() comment_sym = find_next_comment(self._text, self._next_index) if comment_sym is not None: slot_value = \ self._text[self._next_index:comment_sym].rstrip() else: slot_value = self._text[self._next_index:].rstrip() self._tokens.append( LexicalToken(TerminalType.slot_val, slot_value)) self._next_index += len(slot_value) self._update_furthest_matched_index() return True return False else: raise ValueError( "Tried to extract a slot value within a rule that is not " + \ "part of a slot definition." )
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a value with key `extracting_decl`. This is a boolean that should be `True` iff the rule should consider it is parsing a unit declaration and `False` if it is parsing a unit reference. If `kwargs` doesn't contain `extracting_decl`, defaults to `True`. """ extracting_decl = kwargs.get("extracting_decl", True) if self._text.startswith(ALIAS_SYM, self._next_index): if extracting_decl: terminal_type = TerminalType.alias_decl_start else: terminal_type = TerminalType.alias_ref_start text_start = ALIAS_SYM elif self._text.startswith(SLOT_SYM, self._next_index): if extracting_decl: terminal_type = TerminalType.slot_decl_start else: terminal_type = TerminalType.slot_ref_start text_start = SLOT_SYM elif self._text.startswith(INTENT_SYM, self._next_index): if extracting_decl: terminal_type = TerminalType.intent_decl_start else: terminal_type = TerminalType.intent_ref_start text_start = INTENT_SYM else: self.error_msg = \ "Invalid token. Expected a unit start here (starting with " + \ "either '" + ALIAS_SYM + "', '" + SLOT_SYM + "' or '" + \ INTENT_SYM + "'." return False self._next_index += 1 self._update_furthest_matched_index() if self._text.startswith(UNIT_START_SYM, self._next_index): self._tokens.append( LexicalToken(terminal_type, text_start + UNIT_START_SYM)) self._next_index += 1 self._update_furthest_matched_index() return True self.error_msg = \ "Invalid token. Expected a start of unit here (starting with '" + \ UNIT_START_SYM + "'). Did you mean to escape the previous '" + \ text_start + '?' return False
def _apply_strategy(self, **kwargs): while self._text[self._next_index].isdigit(): self._next_index += 1 self._update_furthest_matched_index() percentage = self._text[self._start_index:self._next_index] if self._text[self._next_index] != '.': if len(percentage) == 0: self.error_msg = \ "Invalid token. Expected a percentage for the random " + \ "generation modifier." return False else: percentage += '.' self._next_index += 1 self._update_furthest_matched_index() start_index_non_int_part = self._next_index while self._text[self._next_index].isdigit(): self._next_index += 1 self._update_furthest_matched_index() if self._next_index == start_index_non_int_part: self.error_msg = \ "Invalid token. Cannot have a percentage with an empty " + \ "non-integral part." return False percentage += self._text[start_index_non_int_part:self._next_index] if not self._try_to_match_rule(RuleWhitespaces): self.error_msg = None # Ignore tokens as this whitespace is not meaningful if self._text[self._next_index] == '%': self._next_index += 1 self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.percentgen, percentage)) return True
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a boolean with key `inside_choice` that is `True` when the current word is inside a choice and `False` otherwise. If this boolean is not in `kwargs`, defaults to `False`. ´kwargs´ can also contain a boolean with key `parsing_slot_def` which is `True` iff the current is in a rule inside a slot definition. If this boolean is not in `kwargs`, defaults to `False`. """ inside_choice = kwargs.get("inside_choice", False) parsing_slot_def = kwargs.get("parsing_slot_def", False) # TODO this might be better using regexes if self._text[self._start_index].isspace(): self.error_msg = \ "Invalid token. Expected a word instead of a whitespace there." return False # Find whitespace after the word next_word_index = self._start_index + 1 # NOTE exclusive while True: if (next_word_index == len(self._text) or self._text[next_word_index].isspace()): break next_word_index += 1 next_word_index = \ min_if_exist( next_word_index, find_next_comment(self._text, self._start_index) ) if next_word_index == self._start_index: self.error_msg = "Invalid token. Expected a word to start here." return False for current_char in RuleWord._should_be_escaped_chars: if next_word_index == self._start_index: break next_word_index = \ min_if_exist( next_word_index, find_unescaped(self._text, current_char, self._start_index) ) if inside_choice and next_word_index > self._start_index: for char_to_escape in RuleWord._should_be_escaped_in_choices_chars: next_word_index = \ min_if_exist( next_word_index, find_unescaped( self._text, char_to_escape, self._start_index ) ) if parsing_slot_def and next_word_index > self._start_index: for char_to_escape in RuleWord._should_be_escaped_in_slot_def_chars: next_word_index = \ min_if_exist( next_word_index, find_unescaped( self._text, char_to_escape, self._start_index ) ) if next_word_index == self._start_index: self.error_msg = "Invalid token. Expected a word to start here." return False word = self._text[self._start_index:next_word_index] self._next_index = next_word_index self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.word, word)) return True
def _apply_strategy(self, **kwargs): """ `kwargs` can contain a value with key `extracting_key`. `extracting_key` is a boolean that is `True` if this rule should extract a key and `False` if this rule should extract a value. If `kwargs` doesn't contain `extracting_key`, defaults to `True`. """ extracting_key = kwargs.get("extracting_key", True) if extracting_key: terminal_type = TerminalType.key else: terminal_type = TerminalType.value encloser = None for current_encloser in KEY_VAL_ENCLOSERS: if self._text.startswith(current_encloser, self._next_index): self._next_index += 1 self._update_furthest_matched_index() encloser = current_encloser break if encloser is not None: # Enclosed key/value next_encloser_index = \ find_unescaped(self._text, encloser, self._next_index) if next_encloser_index is None: self.error_msg = \ "Missing key-value encloser. Expected symbol " + encloser + \ " instead of end of line." return False extracted_text = self._text[self._start_index+1:next_encloser_index] self._next_index = next_encloser_index + 1 self._update_furthest_matched_index() self._tokens.append(LexicalToken(terminal_type, extracted_text)) return True else: # Key/value not enclosed end_annotation_index = \ find_unescaped(self._text, ANNOTATION_END, self._next_index) if extracting_key: next_connector_index = \ find_unescaped( self._text, KEY_VAL_CONNECTOR, self._next_index ) end_key_value_index = \ min_if_exist(next_connector_index, end_annotation_index) else: # Extracting value next_key_val_pair_index = \ find_unescaped( self._text, ANNOTATION_SEP, self._next_index ) end_key_value_index = \ min_if_exist(next_key_val_pair_index, end_annotation_index) if end_key_value_index is None: self.error_msg = \ "Couldn't find the end of key/value. " + \ "Didn't expect the end of the line there." return False extracted_text = \ self._text[self._start_index:end_key_value_index].rstrip() self._next_index += len(extracted_text) self._update_furthest_matched_index() self._tokens.append(LexicalToken(terminal_type, extracted_text)) return True
def _apply_strategy(self, **kwargs): start_char = None if self._text.startswith(OLD_CHOICE_START, self._next_index): start_char = OLD_CHOICE_START sep_char = OLD_CHOICE_SEP end_char = OLD_CHOICE_END Deprecations.get_or_create().warn_old_choice( *(InputFileManager \ .get_or_create() \ .get_current_line_information()) ) elif self._text.startswith(CHOICE_START, self._next_index): start_char = CHOICE_START sep_char = CHOICE_SEP end_char = CHOICE_END if start_char is None: self.error_msg = \ "Invalid token. Expected a choice to start there (starting " + \ "with '" + CHOICE_START + "' or '" + OLD_CHOICE_START + "')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.choice_start, start_char)) if self._text.startswith(CASE_GEN_SYM, self._next_index): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.casegen_marker, CASE_GEN_SYM)) if not self._try_to_match_rule(RuleWhitespaces): self.error_msg = None while True: if self._text.startswith(sep_char, self._next_index): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.choice_sep, sep_char)) if not self._try_to_match_rule(RuleWhitespaces): self.error_msg = None rule_content_rule = RuleContentRule(self._text, self._next_index) if not rule_content_rule.matches(inside_choice=True): self.error_msg = None self._update_furthest_matched_index(rule_content_rule) break self._next_index = rule_content_rule.get_next_index_to_match() self._update_furthest_matched_index(rule_content_rule) self._tokens.extend(rule_content_rule.get_lexical_tokens()) if not self._try_to_match_rule(RuleRandGen): self.error_msg = None if not self._text.startswith(end_char, self._next_index): self.error_msg = \ "Invalid token. Unmatched choice opening character. " + \ "Expected the choice to end here (using " + \ "character '" + end_char + "')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append(LexicalToken(TerminalType.choice_end, end_char)) return True
def _apply_strategy(self, **kwargs): if self._text.startswith(ANNOTATION_START, self._next_index): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.annotation_start, ANNOTATION_START)) else: self.error_msg = \ "Invalid token. Expected an annotation there (starting with '" + \ ANNOTATION_START + "')." return False whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match() self._update_furthest_matched_index(whitespaces_rule) # Ignoring the tokens because whitespaces here are not meaningful # Empty annotation if self._text.startswith(ANNOTATION_END, self._next_index): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.annotation_end, ANNOTATION_END)) return True first_key_val_rule = RuleKeyValue(self._text, self._next_index) if not first_key_val_rule.matches(): self.error_msg = first_key_val_rule.error_msg self._update_furthest_matched_index(first_key_val_rule) return False self._tokens.extend(first_key_val_rule.get_lexical_tokens()) self._update_furthest_matched_index(first_key_val_rule) self._next_index = first_key_val_rule.get_next_index_to_match() whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match() self._update_furthest_matched_index() # Ignoring the tokens because whitespaces here are not meaningful if not self._text.startswith(KEY_VAL_CONNECTOR, self._next_index): # Single value self._tokens[-1].type = TerminalType.value else: # Multiple key/value pairs self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.key_value_connector, KEY_VAL_CONNECTOR)) whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match() self._update_furthest_matched_index() # Ignoring the tokens because whitespaces here are not meaningful first_val_rule = RuleKeyValue(self._text, self._next_index) if not first_val_rule.matches(extracting_key=False): self.error_msg = first_val_rule.error_msg self._update_furthest_matched_index(first_val_rule) return False self._next_index = first_val_rule.get_next_index_to_match() self._update_furthest_matched_index(first_val_rule) self._tokens.extend(first_val_rule.get_lexical_tokens()) whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match() self._update_furthest_matched_index() # Ignoring the tokens because whitespaces here are not meaningful while self._text.startswith(ANNOTATION_SEP, self._next_index): self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.separator, ANNOTATION_SEP)) whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match( ) self._update_furthest_matched_index() # Ignoring the tokens because whitespaces here are not meaningful key_rule = RuleKeyValue(self._text, self._next_index) if not key_rule.matches(extracting_key=True): self.error_msg = key_rule.error_msg self._update_furthest_matched_index(key_rule) return False self._next_index = key_rule.get_next_index_to_match() self._update_furthest_matched_index(key_rule) self._tokens.extend(key_rule.get_lexical_tokens()) whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match( ) self._update_furthest_matched_index() # Ignoring the tokens because whitespaces here are not meaningful if not self._text.startswith(KEY_VAL_CONNECTOR, self._next_index): self.error_msg = \ "Cannot mix key-value pairs and single values " + \ "in annotations. Expected a key-value connector " + \ "(using symbol '" + KEY_VAL_CONNECTOR + "')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.key_value_connector, KEY_VAL_CONNECTOR)) whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match( ) self._update_furthest_matched_index() # Ignoring the tokens because whitespaces here are not meaningful value_rule = RuleKeyValue(self._text, self._next_index) if not value_rule.matches(extracting_key=False): self.error_msg = value_rule.error_msg self._update_furthest_matched_index(value_rule) return False self._next_index = value_rule.get_next_index_to_match() self._update_furthest_matched_index(value_rule) self._tokens.extend(value_rule.get_lexical_tokens()) whitespaces_rule = RuleWhitespaces(self._text, self._next_index) if whitespaces_rule.matches(): self._next_index = whitespaces_rule.get_next_index_to_match( ) self._update_furthest_matched_index() # Ignoring the tokens because whitespaces here are not meaningful if not self._text.startswith(ANNOTATION_END, self._next_index): self.error_msg = \ "Invalid token. Expected the annotation to end there (using " + \ "character ')')." return False self._next_index += 1 self._update_furthest_matched_index() self._tokens.append( LexicalToken(TerminalType.annotation_end, ANNOTATION_END)) return True