def train(this): print "TRAINING PARSER" X = [] y = [] for sample_name in this.train_names: f_handler = this.dataset[sample_name] if not f_handler.is_malformed(): trace_map = {g.truth: g.traces for g in f_handler.groups} for rel in f_handler.relationship_graph.relations: group1_traces = trace_map[rel.parent] group2_traces = trace_map[rel.child] both_traces = [] both_traces.extend(group1_traces) both_traces.extend(group2_traces) proc_traces = preprocess_strokes(both_traces) if proc_traces == None: continue group1_traces = proc_traces[:len(group1_traces)] group2_traces = proc_traces[len(group1_traces):] bbox1 = sum([np.array(calculate_bounding_box(i)) for i in \ group1_traces])/len(group1_traces) bbox2 = sum([np.array(calculate_bounding_box(i)) for i in \ group2_traces])/len(group2_traces) center1 = (bbox1[0] + bbox1[1]) / 2., (bbox1[2] + bbox1[3]) / 2. center2 = (bbox2[0] + bbox2[1]) / 2., (bbox2[2] + bbox2[3]) / 2. a_center = (center1[0] + center2[0]) / 2, (center1[1] + center2[1]) / 2 combined1 = [] for stroke in group1_traces: combined1.extend(stroke) parent_shape_context = msscf(combined1, [], center=a_center) combined2 = [] for stroke in group2_traces: combined2.extend(stroke) child_shape_context = msscf(combined2, [], center=a_center) geometric_features = stroke_symbol_pair_features( combined1, combined2) sample = [] sample.extend(parent_shape_context) sample.extend(child_shape_context) sample.extend(geometric_features) X.append(sample) y.append(parser.class_num_map[rel.type]) this.model.fit(X, y) r, w = 0, 0 for sample, target in zip(X, y): p = this.model.predict(sample) if p == target: r += 1 else: w += 1 print "{0} Accuracy on Training Data".format(float(r) / float(r + w))
def evaluate_model(this, out_name): try: os.mkdir(out_name) except OSError as e: print "[WARNING]: Directory {0} already exists".format(out_name) try: os.mkdir("ground_truth/{0}/".format(out_name)) except OSError as e: print "[WARNING]: Directory ground_truth/{0} already exists".format( out_name) for path in this.test_names: f_handler = this.dataset[path] if not f_handler.is_malformed(): strokes = sorted([v for k, v in f_handler.traces.items()], key=lambda x: x.id) output = [[strokes[0]]] for s1, s2 in [[strokes[i], strokes[i + 1]] for i in range(len(strokes) - 1)]: ## GET FEATURES s1f, s2f = preprocess_strokes([s1, s2]) shape_context = msscf(s1f, s2f) geometric_features = stroke_symbol_pair_features(s1f, s2f) features = extract_features_from_sample([s1f, s2f]) class_probs_join = this.classifier.model.predict_proba( features) features = extract_features_from_sample([s1f, s2f]) class_probs_sep = this.classifier.model.predict_proba( features) sample = [] sample.extend(shape_context) sample.extend(geometric_features) sample.extend(class_probs_join[0].T) sample.extend(class_probs_sep[0].T) to_join = this.model.predict(sample) if to_join: output[-1].append(s2) else: output.append([s2]) ## Get predicted output #traces = f_handler.traces store_name, _ = os.path.splitext(os.path.basename(path)) with open("{0}/{1}.lg".format(out_name, store_name), 'w') as f: for group in output: data_array = [i.data for i in group] ids = ", ".join([str(i.id) for i in group]) features = extract_features_from_sample(data_array) prediction = this.classifier.predict(features) f.write(str(s_object(prediction, ids))) s_object.reset() groups = f_handler.groups with open( "ground_truth/{0}/{1}.lg".format(out_name, store_name), 'w') as f: for group in groups: ids = ', '.join(str(i) for i in group.traces_id) f.write(str(s_object(group.type, ids)))
def evaluate_single(fname): t, g, r = read_inkml(fname) f_handler = ff_handler(t, g, r) if not f_handler.is_malformed(): strokes = sorted([v for k, v in f_handler.traces.items()], key=lambda x: x.id) output = [[strokes[0]]] for s1, s2 in [[strokes[i], strokes[i + 1]] for i in range(len(strokes) - 1)]: ## GET FEATURES s1f, s2f = preprocess_strokes([s1, s2]) shape_context = msscf(s1f, s2f) geometric_features = stroke_symbol_pair_features(s1f, s2f) features = extract_features_from_sample([s1f, s2f]) class_probs_join = this.classifier.model.predict_proba( features) features = extract_features_from_sample([s1f, s2f]) class_probs_sep = this.classifier.model.predict_proba(features) sample = [] sample.extend(shape_context) sample.extend(geometric_features) sample.extend(class_probs_join[0].T) sample.extend(class_probs_sep[0].T) to_join = this.model.predict(sample) if to_join: output[-1].append(s2) else: output.append([s2]) groups = [] for group in output: data_array = [i.data for i in group] ids = ", ".join([str(i.id) for i in group]) features = extract_features_from_sample(data_array) prediction = this.classifier.predict(features) s_o = s_object(prediction, ids) g_o = group(None, None, None, override=True) g_o.id = s_o.get_truth() g_o.type = s_o.type g_o.truth = s_o.get_truth() g_o.traces = group groups.append(g_o) return groups
def train(this): X = [] y = [] for sample_name in this.train_names: f_handler = this.dataset[sample_name] if not f_handler.is_malformed(): strokes = sorted([v for k, v in f_handler.traces.items()], key=lambda x: x.id) groups = f_handler.groups join_map = {} for group in groups: sorted_ids = sorted(group.traces_id) join_map.update({ sorted_ids[i]: sorted_ids[i + 1] for i in range(len(sorted_ids) - 1) }) for s1, s2 in [[strokes[i], strokes[i + 1]] for i in range(len(strokes) - 1)]: to_join = join_map[ s1.id] == s2.id if s1.id in join_map else False ## GET FEATURES s1f, s2f = preprocess_strokes([s1, s2]) shape_context = msscf(s1f, s2f) geometric_features = stroke_symbol_pair_features(s1f, s2f) features = extract_features_from_sample([s1f, s2f]) class_probs_join = this.classifier.model.predict_proba( features) features = extract_features_from_sample([s1f, s2f]) class_probs_sep = this.classifier.model.predict_proba( features) sample = [] sample.extend(shape_context) sample.extend(geometric_features) sample.extend(class_probs_join[0].T) sample.extend(class_probs_sep[0].T) X.append(sample) y.append(1 if to_join else 0) this.model.fit(X, y)
def create_relation_graph(this, groups): G = nx.DiGraph() G.add_node('base') for g in groups: G.add_node(g.truth) G.add_edge('base', g.truth, weight=0) group_trace_lens = [len(g.traces) for g in groups] combined_strokes = [] for group in groups: traces = [i.data for i in group.traces] smooth = smooth_xy_points({'id': traces}) reposi = reposition_xy_points(smooth)['id'] combined_strokes.extend(reposi) img = create_image_from_points(combined_strokes) groups_data = [] group_trace_lens = group_trace_lens idx = 0 f = 0 while idx < len(group_trace_lens): s = f f = s + group_trace_lens[idx] groups_data.append(combined_strokes[s:f]) idx += 1 for group1, idx1 in zip(groups_data, range(len(groups_data))): for group2, idx2 in zip(groups_data, range(len(groups_data))): if idx1 != idx2: los = False for stroke1 in group1: for stroke2 in group2: for p1 in stroke1: for p2 in stroke2: los = has_los(p1, p2, img) if los: group1_traces = group1 group2_traces = group2 both_traces = [] both_traces.extend(group1_traces) both_traces.extend(group2_traces) proc_traces = preprocess_strokes( both_traces, raw=True) if proc_traces == None: continue group1_traces = proc_traces[:len( group1_traces)] group2_traces = proc_traces[ len(group1_traces):] bbox1 = sum([np.array(calculate_bounding_box(i)) for i in \ group1_traces])/len(group1_traces) bbox2 = sum([np.array(calculate_bounding_box(i)) for i in \ group2_traces])/len(group2_traces) center1 = (bbox1[0] + bbox1[1]) / 2., ( bbox1[2] + bbox1[3]) / 2. center2 = (bbox2[0] + bbox2[1]) / 2., ( bbox2[2] + bbox2[3]) / 2. a_center = (center1[0] + center2[0] ) / 2, (center1[1] + center2[1]) / 2 combined1 = [] for stroke in group1_traces: combined1.extend(stroke) parent_shape_context = msscf( combined1, [], center=a_center) combined2 = [] for stroke in group2_traces: combined2.extend(stroke) child_shape_context = msscf( combined2, [], center=a_center) geometric_features = stroke_symbol_pair_features( combined1, combined2) sample = [] sample.extend(parent_shape_context) sample.extend(child_shape_context) sample.extend(geometric_features) ws = this.model.predict_proba( sample)[0] weight,label = max( \ [(w,l) for (w,l) in sorted(zip(ws, parser.names_map))]) G.add_edge(groups[idx1].truth, groups[idx2].truth, \ weight=weight, label=label) break if los: break if los: break if los: break #print "GRAPH" #print G.nodes() #for u,v in G.edges(): # print u,v,G.get_edge_data(u,v) #nx.draw_networkx(G, arrows=True, with_labels=True, node_size=600) #plt.show() edmonds = nx.algorithms.tree.Edmonds(G) g = edmonds.find_optimum(kind='max') for u, v in g.edges(): g[u][v]['label'] = G[u][v]['label'] #nx.draw_networkx(g, arrows=True, with_labels=True, node_size=600) #plt.show() #print "GRAPH AFTER EDMONDS" #print g.nodes() #for u,v in g.edges(): # print u,v,G.get_edge_data(u,v) return g