示例#1
0
    def data_inference(self, unlabeled_document_name, output_name, w):
        sentence_words_pos = dict()
        sentence_num = 0
        with open(unlabeled_document_name, 'r') as f1:
            with open(output_name, 'w+') as f2:
                for line in f1:

                    if line == '\n':
                        data_for_full_graph = set()
                        words_tags = dict()
                        for counter in sentence_words_pos:
                            word_tuple = sentence_words_pos[counter]
                            data_for_full_graph.add(
                                (word_tuple[0], word_tuple[1], counter))
                            words_tags[counter] = sentence_words_pos[counter][
                                1]

                        data_for_full_graph.add(('root', 'root', 0))
                        sentence_words_pos[0] = ('root', 'root', 0)
                        words_tags[0] = 'root'

                        full_unweighted_g = AuxFunctions.make_full_graph(
                            list(data_for_full_graph))
                        g_features = AuxFunctions.get_features_for_graph(
                            self.features, full_unweighted_g, words_tags,
                            self.is_improved)
                        full_weighted_g = AuxFunctions.get_weighted_graph(
                            full_unweighted_g, g_features, w)

                        g_inference = edmonds.mst(('root', 'root', 0),
                                                  full_weighted_g)

                        inference_arches = self.get_arches_in_order(
                            g_inference)
                        self.write_lines(inference_arches, f2)

                        sentence_num += 1
                        print('Done sentence number ' + str(sentence_num))
                        sentence_words_pos = dict()
                    else:
                        split_line = line.split('\t')
                        # (counter)->(token,pos)
                        sentence_words_pos[int(
                            split_line[0])] = (split_line[1], split_line[3])

            f2.close()
        f1.close()
示例#2
0
    def make_graphs(self):
        counter = 0
        for arches_data in self.arches_data_list:
            # Get (arches tuples, full data)
            g = dict()
            arches_tuples = arches_data[0]

            for arch_tuple in arches_tuples:
                if (arch_tuple[0][0], arch_tuple[0][1], arch_tuple[1]) in g:
                    g[(arch_tuple[0][0], arch_tuple[0][1],
                       arch_tuple[1])][(arch_tuple[0][2], arch_tuple[0][3],
                                        arch_tuple[2])] = 0
                else:
                    g[(arch_tuple[0][0], arch_tuple[0][1], arch_tuple[1])] = {
                        (arch_tuple[0][2], arch_tuple[0][3], arch_tuple[2]): 0
                    }

            self.graphs.append(
                (g, AuxFunctions.make_full_graph(arches_data[1])))