示例#1
0
    def test_write_filled_tsv_taxonomy(self):
        temp_file = declare_temp_file('.tsv')

        # create objects from the dict (as expected by the io.py module)
        objects = {}
        for key in taxonomy_dict:
            objects[key] = Taxonomy(**taxonomy_dict[key])

        # then write created objects
        io.write(objects, temp_file)
        output = io.read(temp_file)

        for key in output:
            self.assertDictEqual(output[key].__dict__, taxonomy_dict[key])
示例#2
0
    def _read_table(self, path):
        """
        Read in a single table.

        - Parse taxonomies
        - Create a CodeTable instance
        - Assign taxonomies to the new CodeTable

        params:
            path (str)
        """
        parsed_taxonomies = io.read(path)
        table = CodeTable()
        table.read(parsed_taxonomies)
        return table
示例#3
0
    def parse_config(self, file_path):
        """
        Read and parse the config file.

        params:
            file_path (str): path to config file

        returns:
            dict{str:list[str]}: parsed config as a dict
        """
        self.parser = io.read(file_path)

        for option in self.options:
            if option.required:
                self._parse_required(option)
            else:
                self._parse_optional(option)
示例#4
0
    def train_vector_space(self, train_params, output_path):
        """
        Train vector space model.

        params:
            train_params (dict)
        """
        if io.exists(output_path):
            logger.debug('Vector space already exists, loading %s' %
                         output_path)
            self.vector_space_model = io.read(output_path)
        else:
            logger.info('Training vector space using word2vec')
            self._collect_phrases()
            phrases_as_tokens = self._phrases_to_tokens()
            self._start_word2vec(phrases_as_tokens, train_params)
            self._assign_vectors_to_phrases()
            logger.debug('Writing vector space to %s' %
                         output_path)

            io.write(self.vector_space_model, output_path)
示例#5
0
    def test_read_empty_tsv_taxonomy(self):
        temp_file = create_temp_file(code_table_tsv, '.tsv')
        output = io.read(temp_file)

        for key in output:
            self.assertDictEqual(output[key].__dict__, code_table_dict[key])
示例#6
0
    def test_read_filled_xml_taxonomy(self):
        temp_file = create_temp_file(taxonomy_xml, '.xml')
        output = io.read(temp_file)

        for key in output:
            self.assertDictEqual(output[key].__dict__, taxonomy_dict[key])
示例#7
0
 def test_read_wrong_extension(self):
     temp_file = create_temp_file(taxonomy_tsv, '.json')
     io.read(temp_file)