Exemplo n.º 1
0
def test_list_files_ignores_hidden_files(tmpdir):
    # create a hidden file
    open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, 'a').close()
    assert utils.list_files(tmpdir.strpath) == [normal_file]
Exemplo n.º 2
0
    async def read_from_folder(resource_name, domain,
                               interpreter=RegexInterpreter(),
                               template_variables=None, use_e2e=False,
                               exclusion_percentage=None):
        """Given a path reads all contained story files."""
        import rasa_nlu.utils as nlu_utils

        if not os.path.exists(resource_name):
            raise ValueError("Story file or folder could not be found. Make "
                             "sure '{}' exists and points to a story folder "
                             "or file.".format(os.path.abspath(resource_name)))

        story_steps = []
        for f in nlu_utils.list_files(resource_name):
            steps = await StoryFileReader.read_from_file(f, domain,
                                                         interpreter,
                                                         template_variables,
                                                         use_e2e)
            story_steps.extend(steps)

        # if exclusion percentage is not 100
        if exclusion_percentage and exclusion_percentage is not 100:
            import random
            idx = int(round(exclusion_percentage / 100.0 * len(story_steps)))
            random.shuffle(story_steps)
            story_steps = story_steps[:-idx]

        return story_steps
Exemplo n.º 3
0
def test_list_files_ignores_hidden_files(tmpdir):
    # create a hidden file
    open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, 'a').close()
    assert utils.list_files(tmpdir.strpath) == [normal_file]
Exemplo n.º 4
0
    def get_data(self, language):
        lookup_tables = []
        composite_entities = []

        cmdline_args = create_argument_parser().parse_args()
        files = utils.list_files(cmdline_args.data)

        for file in files:
            fformat = _guess_format(file)
            file_content = utils.read_json_file(file)
            if fformat == DIALOGFLOW_ENTITIES:
                entity = file_content['name']
                dialogflowReader = DialogflowReader()
                examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat)
                lookup_table = self._extract_lookup_tables(entity, examples_js)
                if(lookup_table):
                    lookup_tables.append(lookup_table)
                composite_entity = self._extract_composite_entities(
                            entity,
                            examples_js)
                if(composite_entity):
                    composite_entities.append(composite_entity)

            if fformat == RASA_NLU:
                rasa_nlu_data = file_content['rasa_nlu_data']
                composite_entities = rasa_nlu_data['composite_entities']
                lookup_tables = rasa_nlu_data['lookup_tables']

        return lookup_tables, composite_entities
Exemplo n.º 5
0
    def read_from_folder(resource_name, domain, interpreter=RegexInterpreter(),
                         template_variables=None):
        """Given a path reads all contained story files."""

        story_steps = []
        for f in nlu_utils.list_files(resource_name):
            steps = StoryFileReader.read_from_file(f, domain, interpreter,
                                                   template_variables)
            story_steps.extend(steps)
        return story_steps
Exemplo n.º 6
0
    def read_from_folder(resource_name, domain, interpreter=RegexInterpreter(),
                         template_variables=None):
        """Given a path reads all contained story files."""

        story_steps = []
        for f in nlu_utils.list_files(resource_name):
            steps = StoryFileReader.read_from_file(f, domain, interpreter,
                                                   template_variables)
            story_steps.extend(steps)
        return story_steps
Exemplo n.º 7
0
def load_data(resource_name, language='en'):
    # type: (Text, Optional[Text]) -> TrainingData
    """Load training data from disk. Merges them if multiple files are found."""

    files = utils.list_files(resource_name)
    data_sets = [_load(f, language) for f in files]
    data_sets = [ds for ds in data_sets if ds]
    if len(data_sets) == 0:
        return TrainingData()
    elif len(data_sets) == 1:
        return data_sets[0]
    else:
        return data_sets[0].merge(*data_sets[1:])
Exemplo n.º 8
0
 def _get_train_file_cmd():
     """Get the raw train data by fetching the train file given in the
     command line arguments to the train script.
     """
     cmdline_args = create_argument_parser().parse_args()
     files = utils.list_files(cmdline_args.data)
     is_rasa_format = [_guess_format(file) == RASA_NLU for file in files]
     n_rasa_format = sum(is_rasa_format)
     # TODO: Support multiple training files
     assert sum(is_rasa_format) == 1, 'Composite entities currently ' \
             'only work with exactly one train file.'
     file_index = [i for i, val in enumerate(is_rasa_format) if val][0]
     return files[file_index]
Exemplo n.º 9
0
def load_data(resource_name, language='en'):
    # type: (Text, Optional[Text]) -> TrainingData
    """Load training data from disk. Merges them if multiple files are found."""

    files = utils.list_files(resource_name)
    data_sets = [_load(f, language) for f in files]
    data_sets = [ds for ds in data_sets if ds]
    if len(data_sets) == 0:
        return TrainingData()
    elif len(data_sets) == 1:
        return data_sets[0]
    else:
        return data_sets[0].merge(*data_sets[1:])
Exemplo n.º 10
0
def load_data(resource_name: Text,
              language: Optional[Text] = 'en') -> 'TrainingData':
    """Load training data from disk.

    Merges them if loaded from disk and multiple files are found."""
    from rasa_nlu.training_data import TrainingData

    files = utils.list_files(resource_name)
    data_sets = [_load(f, language) for f in files]
    data_sets = [ds for ds in data_sets if ds]
    if len(data_sets) == 0:
        training_data = TrainingData()
    elif len(data_sets) == 1:
        training_data = data_sets[0]
    else:
        training_data = data_sets[0].merge(*data_sets[1:])

    return training_data
Exemplo n.º 11
0
def load_data(resource_name: Text,
              language: Optional[Text] = 'en') -> 'TrainingData':
    """Load training data from disk.

    Merges them if loaded from disk and multiple files are found."""
    from rasa_nlu.training_data import TrainingData

    files = utils.list_files(resource_name)
    data_sets = [_load(f, language) for f in files]
    data_sets = [ds for ds in data_sets if ds]
    if len(data_sets) == 0:
        training_data = TrainingData()
    elif len(data_sets) == 1:
        training_data = data_sets[0]
    else:
        training_data = data_sets[0].merge(*data_sets[1:])

    training_data.validate()
    return training_data
Exemplo n.º 12
0
    def read_from_folder(resource_name,
                         domain,
                         interpreter=RegexInterpreter(),
                         template_variables=None,
                         use_e2e=False,
                         exclusion_percentage=None):
        """Given a path reads all contained story files."""

        story_steps = []
        for f in nlu_utils.list_files(resource_name):
            steps = StoryFileReader.read_from_file(f, domain, interpreter,
                                                   template_variables, use_e2e)
            story_steps.extend(steps)

            # if exclusion percentage is not 100
            if exclusion_percentage and exclusion_percentage is not 100:
                import random
                idx = int(
                    round(exclusion_percentage / 100.0 * len(story_steps)))
                random.shuffle(story_steps)
                story_steps = story_steps[:-idx]

        return story_steps
Exemplo n.º 13
0
def test_list_files_non_existing_dir():
    with pytest.raises(ValueError) as execinfo:
        utils.list_files("my/made_up/path")
    assert "Could not locate the resource" in str(execinfo.value)
Exemplo n.º 14
0
def test_list_files_invalid_resource():
    with pytest.raises(ValueError) as execinfo:
        utils.list_files(None)
    assert "must be a string type" in str(execinfo.value)
Exemplo n.º 15
0
def test_list_files_non_existing_dir():
    with pytest.raises(ValueError) as execinfo:
        utils.list_files("my/made_up/path")
    assert "Could not locate the resource" in str(execinfo.value)
Exemplo n.º 16
0
def test_list_files_invalid_resource():
    with pytest.raises(ValueError) as execinfo:
        utils.list_files(None)
    assert "must be a string type" in str(execinfo.value)