def warn_old_choice(self, filename=None, line_nb=None, line=None): """ Warns the user on stderr that one of their files contains semicolons comments (which are a deprecated way of making comments). Rather use '//' comments instead of ';' comments. """ if not self._old_choice_warned: self._old_choice_warned = True message = \ "Choices starting with '" + OLD_CHOICE_START + \ "' and ending with '" + OLD_CHOICE_END + \ "' are now deprecated. Please use the new syntax that " + \ "starts with '" + CHOICE_START + "' and ends with '" + \ CHOICE_END + "' instead." if filename is not None: message += \ "\nThis syntax was found in file '" + \ cast_to_unicode(filename) + "'" if line_nb is not None and line is not None: message += \ " at line " + str(line_nb) + ": '" + \ str(line).strip() + "'" message += '.' elif line_nb is not None and line is not None: message += \ "\nThis syntax was found at line " + str(line_nb) + \ ": '" + str(line).strip() + "'." warn(message, DeprecationWarning) print_warn(message)
def warn_old_comment(self, filename=None, line_nb=None, line=None): """ Warns the user on stderr that one of their files contains semicolons comments (which are a deprecated way of making comments). Rather use '//' comments instead of ';' comments. """ if not self._old_comment_warned: self._old_comment_warned = True message = \ "Comments starting with a semi-colon '" + \ OLD_COMMENT_SYM + "' are now deprecated. " + \ "Please use the new double slash '" + COMMENT_SYM + \ "' syntax instead." if filename is not None: message += \ "\nThis syntax was found in file '" + \ cast_to_unicode(filename) + "'" if line_nb is not None and line is not None: message += \ " at line " + str(line_nb) + ": '" + \ str(line).strip() + "'" message += '.' elif line_nb is not None and line is not None: message += \ "\nThis syntax was found at line " + str(line_nb) + \ ": '" + str(line).strip() + "'." warn(message, DeprecationWarning) print_warn(message)
def _ask_confirmation(self): print_warn("Folder '" + self.output_dir_path + "' already exists.") answer = input("Overwrite the whole folder? [y/n] ").lower() print('') if answer == "" or answer.startswith('y'): return True return False
def set_caching_level(self, new_level): print_warn( "Setting caching level to " + str(new_level) + \ " for performance reasons." ) if new_level < 0 or new_level > 100: raise ValueError( "Tried to set the caching level to an invalid level (" + \ str(new_level) + ")." ) self.caching_level = new_level
def open_new_file(self, filepath): """Opens the new (master) file, making the parser ready to parse it.""" try: self.input_file_manager.open_file(filepath) except IOError as e: raise IOError( "There was an error while opening file '" + \ str(cast_to_unicode(filepath)) + "': " + str(e) + "." ) except FileAlreadyOpened as e: err_msg = str(e) current_file_name = self.input_file_manager.get_current_file_name() if current_file_name is not None: err_msg += \ "\nContinuing the parsing of '" + str(current_file_name) + \ "'." print_warn(err_msg)
def warn_semicolon_comments(): """ Warns the user on stdout that one of their files contains semicolons comments (which are a deprecated way of making comments). Rather use '//' comments instead of ';' comments. """ global _SEMICOLON_COMMENTS_DEPRECATION_WARNED if not _SEMICOLON_COMMENTS_DEPRECATION_WARNED: # pylint: disable=bad-continuation print_warn("Comments starting with a semi-colon ';' are "+ "now deprecated. Rather use the new double slash '//'"+ " syntax. This syntax allows to have a syntax closer to "+ "Chatito v2.1.x.") warn("Comments starting with a semi-colon ';' are now deprecated. " + "Rather use the new double slash '//' syntax. This " + "syntax allows to have a syntax closer to Chatito v2.1.x.", DeprecationWarning) _SEMICOLON_COMMENTS_DEPRECATION_WARNED = True
def _parse_intent_annotation(self, annotation): """ Given a dict representing the annotation corresponding to an intent declaration, returns the number of examples asked in the training and testing sets (as a 2-tuple). Returns `None` instead of a number if a number was not provided. @raises - `SyntaxError` if the number of examples provided are actually not integral numbers. - `SyntaxError` if the annotation contains the same information at least twice. Prints a warning if the annotation contains unrecognized keys. """ nb_training_ex = None nb_testing_ex = None for key in annotation: if key is None or key.lower() in ("training", "train"): if nb_training_ex is not None: self.input_file_manager.syntax_error( "Detected a number of examples for training set " + \ "several times." ) nb_training_ex = \ self._str_to_int( annotation[key], "Couldn't parse the annotation of the intent." ) elif key.lower() in ("testing", "test"): if nb_testing_ex is not None: self.input_file_manager.syntax_error( "Detected a number of examples for testing set " + \ "several times." ) nb_testing_ex = \ self._str_to_int( annotation[key], "Couldn't parse the annotation of the intent." ) else: print_warn("Unsupported key in the annotation: '" + key + "'.") return (nb_training_ex, nb_testing_ex)
def _parse_unit_declaration(self, lexical_tokens): """ Parses the tokens `lexical_tokens` that contain a unit declaration. Returns the corresponding concrete unit. """ if lexical_tokens[0].type == TerminalType.alias_decl_start: builder = AliasDefBuilder() elif lexical_tokens[0].type == TerminalType.slot_decl_start: builder = SlotDefBuilder() elif lexical_tokens[0].type == TerminalType.intent_decl_start: builder = IntentDefBuilder() else: # Should never happen raise ValueError( "Tried to parse a line as if it was a unit declaration " + \ "while it wasn't." ) i = 1 while i < len(lexical_tokens): token = lexical_tokens[i] if token.type == TerminalType.unit_identifier: builder.identifier = token.text elif token.type == TerminalType.casegen_marker: builder.casegen = True elif token.type == TerminalType.randgen_marker: builder.randgen = True elif token.type == TerminalType.randgen_name: builder.randgen_name = token.text elif token.type == TerminalType.variation_marker: pass elif token.type == TerminalType.variation_name: builder.variation = token.text elif token.type == TerminalType.arg_marker: pass elif token.type == TerminalType.arg_name: builder.arg_name = token.text elif ( token.type in \ (TerminalType.alias_decl_end, TerminalType.slot_decl_end, TerminalType.intent_decl_end) ): i += 1 break else: raise ValueError( # Should never happen "Detected invalid token type in unit definition: " + \ token.type.name ) i += 1 if (i < len(lexical_tokens) and lexical_tokens[i].type == TerminalType.annotation_start): if not isinstance(builder, IntentDefBuilder): if isinstance(builder, AliasDefBuilder): unit_type = "alias" else: unit_type = "slot" print_warn( "Found an annotation when parsing " + unit_type + " '" + \ identifier + "'\n" + \ "Annotations are currently only supported for intent " + \ "definitions. Any other annotation is ignored." ) else: annotation_tokens = lexical_tokens[i:] annotation = self._annotation_tokens_to_dict(annotation_tokens) (nb_training_ex, nb_testing_ex) = \ self._parse_intent_annotation(annotation) builder.nb_training_ex = nb_training_ex builder.nb_testing_ex = nb_testing_ex return (builder.create_concrete(), builder.variation)
def _tokenize(self, text): # Split string in list of words and raw units (as strings) tokens = [] current = "" escaped = False inside_choice = False for c in text: # Manage escapement if escaped: current += c escaped = False continue # elif c == pu.COMMENT_SYM_DEPRECATED: # break elif inside_choice: if c == pu.CHOICE_CLOSE_SYM: tokens.append(current + c) current = "" inside_choice = False else: current += c elif c == pu.ESCAPE_SYM: escaped = True current += c elif c.isspace(): if not pu.is_unit_start(current) and not pu.is_choice( current): # End of word if current != "": tokens.append(current) tokens.append(' ') current = "" elif current == "" and \ len(tokens) > 0 and tokens[-1] == ' ': continue # Double space in-between words else: current += c elif c == pu.UNIT_CLOSE_SYM: if pu.is_unit_start(current): tokens.append(current + c) current = "" else: print_warn( "Inconsistent use of the unit close symbol (" + pu.UNIT_CLOSE_SYM + ") at line " + str(self.line_nb) + " of file '" + self.in_file.name + "'. Consider escaping them if they are " + "not supposed to close a unit.\nThe generation will " + "however continue, considering it as a normal character." ) current += c elif c == pu.CHOICE_CLOSE_SYM: print_warn( "Inconsistent use of the choice close symbol (" + pu.CHOICE_CLOSE_SYM + ") at line " + str(self.line_nb) + " of file '" + self.in_file.name + "'. Consider escaping them if they are " + "not supposed to close a unit.\nThe generation will " + "however continue, considering it as a normal character.") current += c elif c == pu.CHOICE_OPEN_SYM: if current != "": tokens.append(current) inside_choice = True current = c elif pu.is_start_unit_sym(c) and current != pu.ALIAS_SYM and \ current != pu.SLOT_SYM and current != pu.INTENT_SYM: if current != "": tokens.append(current) current = c else: # Any other character current += c if current != "": tokens.append(current) return tokens