def test_write_filled_tsv_taxonomy(self): temp_file = declare_temp_file('.tsv') # create objects from the dict (as expected by the io.py module) objects = {} for key in taxonomy_dict: objects[key] = Taxonomy(**taxonomy_dict[key]) # then write created objects io.write(objects, temp_file) output = io.read(temp_file) for key in output: self.assertDictEqual(output[key].__dict__, taxonomy_dict[key])
def _read_table(self, path): """ Read in a single table. - Parse taxonomies - Create a CodeTable instance - Assign taxonomies to the new CodeTable params: path (str) """ parsed_taxonomies = io.read(path) table = CodeTable() table.read(parsed_taxonomies) return table
def parse_config(self, file_path): """ Read and parse the config file. params: file_path (str): path to config file returns: dict{str:list[str]}: parsed config as a dict """ self.parser = io.read(file_path) for option in self.options: if option.required: self._parse_required(option) else: self._parse_optional(option)
def train_vector_space(self, train_params, output_path): """ Train vector space model. params: train_params (dict) """ if io.exists(output_path): logger.debug('Vector space already exists, loading %s' % output_path) self.vector_space_model = io.read(output_path) else: logger.info('Training vector space using word2vec') self._collect_phrases() phrases_as_tokens = self._phrases_to_tokens() self._start_word2vec(phrases_as_tokens, train_params) self._assign_vectors_to_phrases() logger.debug('Writing vector space to %s' % output_path) io.write(self.vector_space_model, output_path)
def test_read_empty_tsv_taxonomy(self): temp_file = create_temp_file(code_table_tsv, '.tsv') output = io.read(temp_file) for key in output: self.assertDictEqual(output[key].__dict__, code_table_dict[key])
def test_read_filled_xml_taxonomy(self): temp_file = create_temp_file(taxonomy_xml, '.xml') output = io.read(temp_file) for key in output: self.assertDictEqual(output[key].__dict__, taxonomy_dict[key])
def test_read_wrong_extension(self): temp_file = create_temp_file(taxonomy_tsv, '.json') io.read(temp_file)