def guess_format(filename: Text) -> Text: """Applies heuristics to guess the data format of a file. Args: filename: file whose type should be guessed Returns: Guessed file format. """ from rasa.shared.nlu.training_data.formats import RasaYAMLReader guess = UNK if not os.path.isfile(filename): return guess try: content = rasa.shared.utils.io.read_file(filename) js = json.loads(content) except ValueError: if MarkdownReader.is_markdown_nlu_file(filename): guess = MARKDOWN elif NLGMarkdownReader.is_markdown_nlg_file(filename): guess = MARKDOWN_NLG elif RasaYAMLReader.is_yaml_nlu_file(filename): guess = RASA_YAML else: for file_format, format_heuristic in _json_format_heuristics.items(): if format_heuristic(js, filename): guess = file_format break logger.debug(f"Training data format of '{filename}' is '{guess}'.") return guess
def filter(cls, source_path: Path) -> bool: """Checks if the given training data file contains NLU data in `Markdown` format and can be converted to `YAML`. Args: source_path: Path to the training data file. Returns: `True` if the given file can be converted, `False` otherwise """ return MarkdownReader.is_markdown_nlu_file(source_path)