def train(this):
     print "TRAINING PARSER"
     X = []
     y = []
     for sample_name in this.train_names:
         f_handler = this.dataset[sample_name]
         if not f_handler.is_malformed():
             trace_map = {g.truth: g.traces for g in f_handler.groups}
             for rel in f_handler.relationship_graph.relations:
                 group1_traces = trace_map[rel.parent]
                 group2_traces = trace_map[rel.child]
                 both_traces = []
                 both_traces.extend(group1_traces)
                 both_traces.extend(group2_traces)
                 proc_traces = preprocess_strokes(both_traces)
                 if proc_traces == None:
                     continue
                 group1_traces = proc_traces[:len(group1_traces)]
                 group2_traces = proc_traces[len(group1_traces):]
                 bbox1  = sum([np.array(calculate_bounding_box(i)) for i in \
                              group1_traces])/len(group1_traces)
                 bbox2  = sum([np.array(calculate_bounding_box(i)) for i in \
                              group2_traces])/len(group2_traces)
                 center1 = (bbox1[0] + bbox1[1]) / 2., (bbox1[2] +
                                                        bbox1[3]) / 2.
                 center2 = (bbox2[0] + bbox2[1]) / 2., (bbox2[2] +
                                                        bbox2[3]) / 2.
                 a_center = (center1[0] + center2[0]) / 2, (center1[1] +
                                                            center2[1]) / 2
                 combined1 = []
                 for stroke in group1_traces:
                     combined1.extend(stroke)
                 parent_shape_context = msscf(combined1, [],
                                              center=a_center)
                 combined2 = []
                 for stroke in group2_traces:
                     combined2.extend(stroke)
                 child_shape_context = msscf(combined2, [], center=a_center)
                 geometric_features = stroke_symbol_pair_features(
                     combined1, combined2)
                 sample = []
                 sample.extend(parent_shape_context)
                 sample.extend(child_shape_context)
                 sample.extend(geometric_features)
                 X.append(sample)
                 y.append(parser.class_num_map[rel.type])
     this.model.fit(X, y)
     r, w = 0, 0
     for sample, target in zip(X, y):
         p = this.model.predict(sample)
         if p == target:
             r += 1
         else:
             w += 1
     print "{0} Accuracy on Training Data".format(float(r) / float(r + w))
Пример #2
0
    def evaluate_model(this, out_name):
        try:
            os.mkdir(out_name)
        except OSError as e:
            print "[WARNING]: Directory {0} already exists".format(out_name)
        try:
            os.mkdir("ground_truth/{0}/".format(out_name))
        except OSError as e:
            print "[WARNING]: Directory ground_truth/{0} already exists".format(
                out_name)

        for path in this.test_names:
            f_handler = this.dataset[path]
            if not f_handler.is_malformed():
                strokes = sorted([v for k, v in f_handler.traces.items()],
                                 key=lambda x: x.id)
                output = [[strokes[0]]]
                for s1, s2 in [[strokes[i], strokes[i + 1]]
                               for i in range(len(strokes) - 1)]:
                    ## GET FEATURES
                    s1f, s2f = preprocess_strokes([s1, s2])
                    shape_context = msscf(s1f, s2f)
                    geometric_features = stroke_symbol_pair_features(s1f, s2f)
                    features = extract_features_from_sample([s1f, s2f])
                    class_probs_join = this.classifier.model.predict_proba(
                        features)
                    features = extract_features_from_sample([s1f, s2f])
                    class_probs_sep = this.classifier.model.predict_proba(
                        features)
                    sample = []
                    sample.extend(shape_context)
                    sample.extend(geometric_features)
                    sample.extend(class_probs_join[0].T)
                    sample.extend(class_probs_sep[0].T)
                    to_join = this.model.predict(sample)
                    if to_join:
                        output[-1].append(s2)
                    else:
                        output.append([s2])
                ## Get predicted output
                #traces     = f_handler.traces
                store_name, _ = os.path.splitext(os.path.basename(path))
                with open("{0}/{1}.lg".format(out_name, store_name), 'w') as f:
                    for group in output:
                        data_array = [i.data for i in group]
                        ids = ", ".join([str(i.id) for i in group])
                        features = extract_features_from_sample(data_array)
                        prediction = this.classifier.predict(features)
                        f.write(str(s_object(prediction, ids)))
                    s_object.reset()
                groups = f_handler.groups
                with open(
                        "ground_truth/{0}/{1}.lg".format(out_name, store_name),
                        'w') as f:
                    for group in groups:
                        ids = ', '.join(str(i) for i in group.traces_id)
                        f.write(str(s_object(group.type, ids)))
Пример #3
0
 def evaluate_single(fname):
     t, g, r = read_inkml(fname)
     f_handler = ff_handler(t, g, r)
     if not f_handler.is_malformed():
         strokes = sorted([v for k, v in f_handler.traces.items()],
                          key=lambda x: x.id)
         output = [[strokes[0]]]
         for s1, s2 in [[strokes[i], strokes[i + 1]]
                        for i in range(len(strokes) - 1)]:
             ## GET FEATURES
             s1f, s2f = preprocess_strokes([s1, s2])
             shape_context = msscf(s1f, s2f)
             geometric_features = stroke_symbol_pair_features(s1f, s2f)
             features = extract_features_from_sample([s1f, s2f])
             class_probs_join = this.classifier.model.predict_proba(
                 features)
             features = extract_features_from_sample([s1f, s2f])
             class_probs_sep = this.classifier.model.predict_proba(features)
             sample = []
             sample.extend(shape_context)
             sample.extend(geometric_features)
             sample.extend(class_probs_join[0].T)
             sample.extend(class_probs_sep[0].T)
             to_join = this.model.predict(sample)
             if to_join:
                 output[-1].append(s2)
             else:
                 output.append([s2])
         groups = []
         for group in output:
             data_array = [i.data for i in group]
             ids = ", ".join([str(i.id) for i in group])
             features = extract_features_from_sample(data_array)
             prediction = this.classifier.predict(features)
             s_o = s_object(prediction, ids)
             g_o = group(None, None, None, override=True)
             g_o.id = s_o.get_truth()
             g_o.type = s_o.type
             g_o.truth = s_o.get_truth()
             g_o.traces = group
             groups.append(g_o)
         return groups
Пример #4
0
 def train(this):
     X = []
     y = []
     for sample_name in this.train_names:
         f_handler = this.dataset[sample_name]
         if not f_handler.is_malformed():
             strokes = sorted([v for k, v in f_handler.traces.items()],
                              key=lambda x: x.id)
             groups = f_handler.groups
             join_map = {}
             for group in groups:
                 sorted_ids = sorted(group.traces_id)
                 join_map.update({
                     sorted_ids[i]: sorted_ids[i + 1]
                     for i in range(len(sorted_ids) - 1)
                 })
             for s1, s2 in [[strokes[i], strokes[i + 1]]
                            for i in range(len(strokes) - 1)]:
                 to_join = join_map[
                     s1.id] == s2.id if s1.id in join_map else False
                 ## GET FEATURES
                 s1f, s2f = preprocess_strokes([s1, s2])
                 shape_context = msscf(s1f, s2f)
                 geometric_features = stroke_symbol_pair_features(s1f, s2f)
                 features = extract_features_from_sample([s1f, s2f])
                 class_probs_join = this.classifier.model.predict_proba(
                     features)
                 features = extract_features_from_sample([s1f, s2f])
                 class_probs_sep = this.classifier.model.predict_proba(
                     features)
                 sample = []
                 sample.extend(shape_context)
                 sample.extend(geometric_features)
                 sample.extend(class_probs_join[0].T)
                 sample.extend(class_probs_sep[0].T)
                 X.append(sample)
                 y.append(1 if to_join else 0)
     this.model.fit(X, y)
 def create_relation_graph(this, groups):
     G = nx.DiGraph()
     G.add_node('base')
     for g in groups:
         G.add_node(g.truth)
         G.add_edge('base', g.truth, weight=0)
     group_trace_lens = [len(g.traces) for g in groups]
     combined_strokes = []
     for group in groups:
         traces = [i.data for i in group.traces]
         smooth = smooth_xy_points({'id': traces})
         reposi = reposition_xy_points(smooth)['id']
         combined_strokes.extend(reposi)
     img = create_image_from_points(combined_strokes)
     groups_data = []
     group_trace_lens = group_trace_lens
     idx = 0
     f = 0
     while idx < len(group_trace_lens):
         s = f
         f = s + group_trace_lens[idx]
         groups_data.append(combined_strokes[s:f])
         idx += 1
     for group1, idx1 in zip(groups_data, range(len(groups_data))):
         for group2, idx2 in zip(groups_data, range(len(groups_data))):
             if idx1 != idx2:
                 los = False
                 for stroke1 in group1:
                     for stroke2 in group2:
                         for p1 in stroke1:
                             for p2 in stroke2:
                                 los = has_los(p1, p2, img)
                                 if los:
                                     group1_traces = group1
                                     group2_traces = group2
                                     both_traces = []
                                     both_traces.extend(group1_traces)
                                     both_traces.extend(group2_traces)
                                     proc_traces = preprocess_strokes(
                                         both_traces, raw=True)
                                     if proc_traces == None:
                                         continue
                                     group1_traces = proc_traces[:len(
                                         group1_traces)]
                                     group2_traces = proc_traces[
                                         len(group1_traces):]
                                     bbox1  = sum([np.array(calculate_bounding_box(i)) for i in \
                                                  group1_traces])/len(group1_traces)
                                     bbox2  = sum([np.array(calculate_bounding_box(i)) for i in \
                                                  group2_traces])/len(group2_traces)
                                     center1 = (bbox1[0] + bbox1[1]) / 2., (
                                         bbox1[2] + bbox1[3]) / 2.
                                     center2 = (bbox2[0] + bbox2[1]) / 2., (
                                         bbox2[2] + bbox2[3]) / 2.
                                     a_center = (center1[0] + center2[0]
                                                 ) / 2, (center1[1] +
                                                         center2[1]) / 2
                                     combined1 = []
                                     for stroke in group1_traces:
                                         combined1.extend(stroke)
                                     parent_shape_context = msscf(
                                         combined1, [], center=a_center)
                                     combined2 = []
                                     for stroke in group2_traces:
                                         combined2.extend(stroke)
                                     child_shape_context = msscf(
                                         combined2, [], center=a_center)
                                     geometric_features = stroke_symbol_pair_features(
                                         combined1, combined2)
                                     sample = []
                                     sample.extend(parent_shape_context)
                                     sample.extend(child_shape_context)
                                     sample.extend(geometric_features)
                                     ws = this.model.predict_proba(
                                         sample)[0]
                                     weight,label = max( \
                                                 [(w,l) for (w,l) in sorted(zip(ws, parser.names_map))])
                                     G.add_edge(groups[idx1].truth, groups[idx2].truth, \
                                                weight=weight, label=label)
                                     break
                             if los:
                                 break
                         if los:
                             break
                     if los:
                         break
     #print "GRAPH"
     #print G.nodes()
     #for u,v in G.edges():
     #    print u,v,G.get_edge_data(u,v)
     #nx.draw_networkx(G, arrows=True, with_labels=True, node_size=600)
     #plt.show()
     edmonds = nx.algorithms.tree.Edmonds(G)
     g = edmonds.find_optimum(kind='max')
     for u, v in g.edges():
         g[u][v]['label'] = G[u][v]['label']
     #nx.draw_networkx(g, arrows=True, with_labels=True, node_size=600)
     #plt.show()
     #print "GRAPH AFTER EDMONDS"
     #print g.nodes()
     #for u,v in g.edges():
     #    print u,v,G.get_edge_data(u,v)
     return g