Exemplo n.º 1
0
 def perceptron(self):
     flag = True
     if not self.iter_set:
         self.iter += 1
     for idx, sentence in enumerate(self.all_data, start=1):
         full_graph = build_full_graph(len(sentence))
         get_score_func = GetScore(idx, self.w, sentence,
                                   self.feature_extractor)
         digraph = Digraph(full_graph, get_score_func)
         graph = build_real_graph(sentence)
         mst = digraph.mst().successors
         add_graph = {}
         rm_graph = {}
         for k in graph.keys():
             add_graph[k] = [v for v in graph[k] if v not in mst[k]]
             rm_graph[k] = [v for v in mst[k] if v not in graph[k]]
         if any(add_graph.values()):
             flag = False
             temp_w = plus(
                 self.w,
                 self.graph_feature_extractor(idx, add_graph, sentence))
             self.w = minus(
                 temp_w,
                 self.graph_feature_extractor(idx, rm_graph, sentence))
         progress_bar(idx, len(self.all_data), "sentences")
     return flag
Exemplo n.º 2
0
 def test_create_full_graph(self):
     a = {0: [1, 2, 3], 1: [2, 3], 2: [1, 3], 3: [1, 2]}
     graph = Digraph(a)
     new_graph = graph.mst()
     suc = new_graph.successors
     self.make_vaild(suc)
     suc = {0: suc}
     m = self.Model(suc)
     perc = StructPerceptron(m, directory=directory)
     self.assertRaises(Exception, GraphUtil.create_full_graph(suc))
Exemplo n.º 3
0
 def test_perceptron(self):
     n = 10
     a = {0: [1, 2, 3], 1: [2, 3], 2: [1, 3], 3: [1, 2]}
     graph = Digraph(a)
     new_graph = graph.mst()
     suc = new_graph.successors
     suc = self.make_vaild(suc)
     suc = {0: suc}
     m = self.Model(suc)
     perc = StructPerceptron(m, directory=directory)
     self.assertRaises(Exception, perc.perceptron(n))
     print(perc.perceptron(n).A)
Exemplo n.º 4
0
 def test_identical_dependency_tree(self):
     a = {0: [1, 2, 3], 1: [2, 3], 2: [1, 3], 3: [1, 2]}
     graph = Digraph(a)
     new_graph = graph.mst()
     suc = new_graph.successors
     self.make_vaild(suc)
     suc = {0: suc}
     m = self.Model(suc)
     perc = StructPerceptron(m, directory=directory)
     GraphUtil.create_full_graph(suc)
     full_graph = perc.full_graph[0]
     self.assertEqual(True, GraphUtil.identical_dependency_tree(full_graph, a))
Exemplo n.º 5
0
 def infer(self, sentence):
     full_graph = build_full_graph(len(sentence))
     get_score_func = GetScore(-1, self.w, sentence, self.feature_extractor)
     digraph = Digraph(full_graph, get_score_func)
     mst = digraph.mst().successors
     mst = inverse_graph(mst)
     Word = namedtuple('Word', 'counter token pos head')
     return [
         Word(counter=word.counter,
              token=word.token,
              pos=word.pos,
              head=mst[word.counter]) for word in sentence
     ]
Exemplo n.º 6
0
def get_score(h, m):
    return curr_G_wieghts.get((h, m), 0)

t_init = time.time()
######### run perceptron #########
for n in range(NUM_OF_PERCEPTRON_STEPS):
    print ('Starting Perceptron Step - ' + str(n + 1))
    for i in range(len(sent_word_list)):
        curr_G = sent_graph_list[i]
        # creates wieghts to each eadge
        curr_G_wieghts = turn_edge_feats_to_wights(
                edge_dict=sent_graph_edges_feats[i],
                wights_vec=basic_feature_weights_vec)
        graph = Digraph(curr_G, get_score)
        if True == TRAIN_WITH_MST:
            mst = graph.mst()
        else:
            mst = graph.greedy()
        pred_heads = convert_chi_lui_output_to_list_of_heads(
                sent_len=len(sent_word_list[i]),
                mst=mst)
        if len(pred_heads) != len(sent_head_list[i]):
            print ('Perceptron prediction problem at sent ' + str(i) + ' pred len ' + str(pred_heads) + ' real heads len' + str(sent_head_list[i]))
        if tuple(pred_heads) != tuple(sent_head_list[i]):
            # perceptron correction step
            pred_feat_idxs = get_all_feature_idxes_for_sent_and_head(
                featurs_obj=featurs_basic_obj,
                sent_words=sent_word_list[i],
                sent_pos=sent_pos_list[i],
                sent_heads=pred_heads)
            # indexs of real X + Y features