def test_list_files_ignores_hidden_files(tmpdir): # create a hidden file open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close() # create a normal file normal_file = os.path.join(tmpdir.strpath, "normal_file") open(normal_file, 'a').close() assert utils.list_files(tmpdir.strpath) == [normal_file]
async def read_from_folder(resource_name, domain, interpreter=RegexInterpreter(), template_variables=None, use_e2e=False, exclusion_percentage=None): """Given a path reads all contained story files.""" import rasa_nlu.utils as nlu_utils if not os.path.exists(resource_name): raise ValueError("Story file or folder could not be found. Make " "sure '{}' exists and points to a story folder " "or file.".format(os.path.abspath(resource_name))) story_steps = [] for f in nlu_utils.list_files(resource_name): steps = await StoryFileReader.read_from_file(f, domain, interpreter, template_variables, use_e2e) story_steps.extend(steps) # if exclusion percentage is not 100 if exclusion_percentage and exclusion_percentage is not 100: import random idx = int(round(exclusion_percentage / 100.0 * len(story_steps))) random.shuffle(story_steps) story_steps = story_steps[:-idx] return story_steps
def get_data(self, language): lookup_tables = [] composite_entities = [] cmdline_args = create_argument_parser().parse_args() files = utils.list_files(cmdline_args.data) for file in files: fformat = _guess_format(file) file_content = utils.read_json_file(file) if fformat == DIALOGFLOW_ENTITIES: entity = file_content['name'] dialogflowReader = DialogflowReader() examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat) lookup_table = self._extract_lookup_tables(entity, examples_js) if(lookup_table): lookup_tables.append(lookup_table) composite_entity = self._extract_composite_entities( entity, examples_js) if(composite_entity): composite_entities.append(composite_entity) if fformat == RASA_NLU: rasa_nlu_data = file_content['rasa_nlu_data'] composite_entities = rasa_nlu_data['composite_entities'] lookup_tables = rasa_nlu_data['lookup_tables'] return lookup_tables, composite_entities
def read_from_folder(resource_name, domain, interpreter=RegexInterpreter(), template_variables=None): """Given a path reads all contained story files.""" story_steps = [] for f in nlu_utils.list_files(resource_name): steps = StoryFileReader.read_from_file(f, domain, interpreter, template_variables) story_steps.extend(steps) return story_steps
def load_data(resource_name, language='en'): # type: (Text, Optional[Text]) -> TrainingData """Load training data from disk. Merges them if multiple files are found.""" files = utils.list_files(resource_name) data_sets = [_load(f, language) for f in files] data_sets = [ds for ds in data_sets if ds] if len(data_sets) == 0: return TrainingData() elif len(data_sets) == 1: return data_sets[0] else: return data_sets[0].merge(*data_sets[1:])
def _get_train_file_cmd(): """Get the raw train data by fetching the train file given in the command line arguments to the train script. """ cmdline_args = create_argument_parser().parse_args() files = utils.list_files(cmdline_args.data) is_rasa_format = [_guess_format(file) == RASA_NLU for file in files] n_rasa_format = sum(is_rasa_format) # TODO: Support multiple training files assert sum(is_rasa_format) == 1, 'Composite entities currently ' \ 'only work with exactly one train file.' file_index = [i for i, val in enumerate(is_rasa_format) if val][0] return files[file_index]
def load_data(resource_name: Text, language: Optional[Text] = 'en') -> 'TrainingData': """Load training data from disk. Merges them if loaded from disk and multiple files are found.""" from rasa_nlu.training_data import TrainingData files = utils.list_files(resource_name) data_sets = [_load(f, language) for f in files] data_sets = [ds for ds in data_sets if ds] if len(data_sets) == 0: training_data = TrainingData() elif len(data_sets) == 1: training_data = data_sets[0] else: training_data = data_sets[0].merge(*data_sets[1:]) return training_data
def load_data(resource_name: Text, language: Optional[Text] = 'en') -> 'TrainingData': """Load training data from disk. Merges them if loaded from disk and multiple files are found.""" from rasa_nlu.training_data import TrainingData files = utils.list_files(resource_name) data_sets = [_load(f, language) for f in files] data_sets = [ds for ds in data_sets if ds] if len(data_sets) == 0: training_data = TrainingData() elif len(data_sets) == 1: training_data = data_sets[0] else: training_data = data_sets[0].merge(*data_sets[1:]) training_data.validate() return training_data
def read_from_folder(resource_name, domain, interpreter=RegexInterpreter(), template_variables=None, use_e2e=False, exclusion_percentage=None): """Given a path reads all contained story files.""" story_steps = [] for f in nlu_utils.list_files(resource_name): steps = StoryFileReader.read_from_file(f, domain, interpreter, template_variables, use_e2e) story_steps.extend(steps) # if exclusion percentage is not 100 if exclusion_percentage and exclusion_percentage is not 100: import random idx = int( round(exclusion_percentage / 100.0 * len(story_steps))) random.shuffle(story_steps) story_steps = story_steps[:-idx] return story_steps
def test_list_files_non_existing_dir(): with pytest.raises(ValueError) as execinfo: utils.list_files("my/made_up/path") assert "Could not locate the resource" in str(execinfo.value)
def test_list_files_invalid_resource(): with pytest.raises(ValueError) as execinfo: utils.list_files(None) assert "must be a string type" in str(execinfo.value)