def test_opt_model(): query = 'test' all_questions = geoserver_interface.download_questions(query) all_syntax_parses = questions_to_syntax_parses(all_questions) all_annotations = geoserver_interface.download_semantics(query) all_labels = geoserver_interface.download_labels(query) (tr_s, tr_a, tr_q), (te_s, te_a, te_q) = split([all_syntax_parses, all_annotations, all_questions], 0.5) tm = train_tag_model(all_syntax_parses, all_annotations) cm = train_semantic_model(tm, tr_s, tr_a) # te_m = questions_to_match_parses(te_q, all_labels) prs = evaluate_opt_model(cm, te_s, te_a, all_questions, np.linspace(-2, 2, 21)) ps, rs = zip(*prs.values()) plt.plot(prs.keys(), ps, 'o', label='precision') plt.plot(prs.keys(), rs, 'o', label='recall') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show()
def test_solving(): pk = 973 questions = geoserver_interface.download_questions(pk) question = questions.values()[0] label_data = geoserver_interface.download_labels(pk)[pk] diagram = open_image(question.diagram_path) graph_parse = diagram_to_graph_parse(diagram) match_parse = parse_match_from_known_labels(graph_parse, label_data) AB = v('AB', 'line') AC = v('AC', 'line') BC = v('BC', 'line') ED = v('ED', 'line') AE = v('AE', 'line') E = v('E', 'point') D = v('D', 'point') x = v('x', 'number') p1 = f('LengthOf', AB) == f('LengthOf', AC) p2 = f('IsMidpointOf', E, AB) p3 = f('IsMidpointOf', D, AC) p4 = f('LengthOf', AE) == x p5 = f('LengthOf', ED) == 4 qn = f('LengthOf', BC) confident_atoms = parse_confident_formulas(graph_parse) text_atoms = ground_formula_nodes(match_parse, [p1, p2, p3, p4, p5]) atoms = confident_atoms + text_atoms grounded_qn = ground_formula_nodes(match_parse, [qn])[0] ns = NumericSolver(atoms) print ns.evaluate(grounded_qn)
def test_rule_model(): query = 'test' all_questions = geoserver_interface.download_questions(query) all_syntax_parses = questions_to_syntax_parses(all_questions) all_annotations = geoserver_interface.download_semantics(query) all_labels = geoserver_interface.download_labels(query) (tr_s, tr_a), (te_s, te_a) = split((all_syntax_parses, all_annotations), 0.5) tm = train_tag_model(all_syntax_parses, all_annotations) cm = train_semantic_model(tm, tr_s, tr_a) unary_prs, core_prs, is_prs, cc_prs, core_tree_prs = evaluate_rule_model( cm, te_s, te_a, np.linspace(0, 1, 101)) plt.plot(core_tree_prs.keys(), core_tree_prs.values(), 'o') plt.show() plt.plot(unary_prs.keys(), unary_prs.values(), 'o') plt.show() plt.plot(core_prs.keys(), core_prs.values(), 'o') plt.show() plt.plot(is_prs.keys(), is_prs.values(), 'o') plt.show() plt.plot(cc_prs.keys(), cc_prs.values(), 'o') plt.show()
def test_ground_atoms(): pk = 973 questions = geoserver_interface.download_questions(pk) question = questions.values()[0] label_data = geoserver_interface.download_labels(pk)[pk] diagram = open_image(question.diagram_path) graph_parse = diagram_to_graph_parse(diagram) match_parse = parse_match_from_known_labels(graph_parse, label_data) AB = v('AB', 'line') AC = v('AC', 'line') BC = v('BC', 'line') ED = v('ED', 'line') AE = v('AE', 'line') E = v('E', 'point') D = v('D', 'point') x = v('x', 'number') p1 = f('LengthOf', AB) == f('LengthOf', AC) p2 = f('IsMidpointOf', E, AB) p3 = f('IsMidpointOf', D, AC) p4 = f('LengthOf', AE) == x p5 = f('LengthOf', ED) == 4 qn = f('LengthOf', BC) grounded_atoms = ground_formula_nodes(match_parse, [p1, p2, p3, p4, p5, qn]) for grounded_atom in grounded_atoms: print grounded_atom graph_parse.core_parse.display_points()
def save_parse_primitives(): question = geoserver_interface.download_questions(1037).values()[0] image_segment_parse = parse_image_segments(open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) image = primitive_parse.get_image_primitives() file_path = "/Users/minjoon/Desktop/primitives.png" cv2.imwrite(file_path, image)
def test_parse_graph(): questions = geoserver_interface.download_questions(973).values() for question in questions: image_segment_parse = parse_image_segments(open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) selected_primitive_parse = select_primitives(primitive_parse) core_parse = parse_core(selected_primitive_parse) graph_parse = parse_graph(core_parse) print("Confident information in the diagram:") for variable_node in parse_confident_atoms(graph_parse): print variable_node core_parse.display_points() lines = get_all_instances(graph_parse, 'line') circles = get_all_instances(graph_parse, 'circle') arcs = get_all_instances(graph_parse, 'arc') angles = get_all_instances(graph_parse, 'angle') print("Displaying lines...") for key, line in lines.iteritems(): graph_parse.display_instances([line]) print("Displaying circles...") for key, circle in circles.iteritems(): graph_parse.display_instances([circle]) print("Displaying arcs...") for key, arc in arcs.iteritems(): graph_parse.display_instances([arc]) print("Displaying angles...") for key, angle in angles.iteritems(): graph_parse.display_instances([angle])
def data_stat(query): questions = geoserver_interface.download_questions(query) syntax_parses = questions_to_syntax_parses(questions, parser=False) annotations = geoserver_interface.download_semantics(query) unary_rules = [] binary_rules = [] semantic_trees = [] for pk, local_syntax_parses in syntax_parses.iteritems(): print pk for number, syntax_parse in local_syntax_parses.iteritems(): local_semantic_trees = [ annotation_to_semantic_tree(syntax_parse, annotation) for annotation in annotations[pk][number].values() ] semantic_trees.extend(local_semantic_trees) print local_semantic_trees for semantic_tree in local_semantic_trees: unary_rules.extend(semantic_tree.get_unary_rules()) binary_rules.extend(semantic_tree.get_binary_rules()) tag_model = train_tag_model(syntax_parses, annotations) print "sentences: %d" % sum( len(question.sentence_words) for _, question in questions.iteritems()) print "words: %d" % (sum( len(words) for _, question in questions.iteritems() for _, words in question.sentence_words.iteritems())) print "literals: %d" % len(semantic_trees) print "unary rules: %d" % len(unary_rules) print "binary rules: %d" % len(binary_rules) print "" print "LEXICON" for key, s in tag_model.lexicon.iteritems(): print "%s: %s" % ("_".join(key), ", ".join(" ".join(ss) for ss in s))
def data_stat(query): questions = geoserver_interface.download_questions(query) syntax_parses = questions_to_syntax_parses(questions, parser=False) annotations = geoserver_interface.download_semantics(query) unary_rules = [] binary_rules = [] semantic_trees = [] for pk, local_syntax_parses in syntax_parses.iteritems(): print pk for number, syntax_parse in local_syntax_parses.iteritems(): local_semantic_trees = [annotation_to_semantic_tree(syntax_parse, annotation) for annotation in annotations[pk][number].values()] semantic_trees.extend(local_semantic_trees) print local_semantic_trees for semantic_tree in local_semantic_trees: unary_rules.extend(semantic_tree.get_unary_rules()) binary_rules.extend(semantic_tree.get_binary_rules()) tag_model = train_tag_model(syntax_parses, annotations) print "sentences: %d" % sum(len(question.sentence_words) for _, question in questions.iteritems()) print "words: %d" % (sum(len(words) for _, question in questions.iteritems() for _, words in question.sentence_words.iteritems())) print "literals: %d" % len(semantic_trees) print "unary rules: %d" % len(unary_rules) print "binary rules: %d" % len(binary_rules) print "" print "LEXICON" for key, s in tag_model.lexicon.iteritems(): print "%s: %s" % ("_".join(key), ", ".join(" ".join(ss) for ss in s))
def save_parse_image_segments(): question = geoserver_interface.download_questions(1037).values()[0] image_segment_parse = parse_image_segments( open_image(question.diagram_path)) image = image_segment_parse.diagram_image_segment.segmented_image file_path = "/Users/minjoon/Desktop/diagram.png" cv2.imwrite(file_path, image)
def test_parse_graph(): questions = geoserver_interface.download_questions(973).values() for question in questions: image_segment_parse = parse_image_segments( open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) selected_primitive_parse = select_primitives(primitive_parse) core_parse = parse_core(selected_primitive_parse) graph_parse = parse_graph(core_parse) print("Confident information in the diagram:") for variable_node in parse_confident_atoms(graph_parse): print variable_node core_parse.display_points() lines = get_all_instances(graph_parse, 'line') circles = get_all_instances(graph_parse, 'circle') arcs = get_all_instances(graph_parse, 'arc') angles = get_all_instances(graph_parse, 'angle') print("Displaying lines...") for key, line in lines.iteritems(): graph_parse.display_instances([line]) print("Displaying circles...") for key, circle in circles.iteritems(): graph_parse.display_instances([circle]) print("Displaying arcs...") for key, arc in arcs.iteritems(): graph_parse.display_instances([arc]) print("Displaying angles...") for key, angle in angles.iteritems(): graph_parse.display_instances([angle])
def save_parse_primitives(): question = geoserver_interface.download_questions(1037).values()[0] image_segment_parse = parse_image_segments( open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) image = primitive_parse.get_image_primitives() file_path = "/Users/minjoon/Desktop/primitives.png" cv2.imwrite(file_path, image)
def test_select_primitives(): question_dict = geoserver_interface.download_questions('test') for key in sorted(question_dict.keys()): question = question_dict[key] print(key) image_segment_parse = parse_image_segments(open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) selected = select_primitives(primitive_parse) selected.display_primitives()
def test_parse_image_segments(): question = geoserver_interface.download_questions(1037).values()[0] image_segment_parse = parse_image_segments( open_image(question.diagram_path)) image_segment_parse.diagram_image_segment.display_binarized_segmented_image( ) for idx, label_image_segment in image_segment_parse.label_image_segments.iteritems( ): label_image_segment.display_segmented_image()
def _annotated_unit_test(query): questions = geoserver_interface.download_questions(query) all_annotations = geoserver_interface.download_semantics(query) pk, question = questions.items()[0] choice_formulas = get_choice_formulas(question) label_data = geoserver_interface.download_labels(pk)[pk] diagram = open_image(question.diagram_path) graph_parse = diagram_to_graph_parse(diagram) core_parse = graph_parse.core_parse # core_parse.display_points() # core_parse.primitive_parse.display_primitives() match_parse = parse_match_from_known_labels(graph_parse, label_data) match_formulas = parse_match_formulas(match_parse) diagram_formulas = parse_confident_formulas(graph_parse) all_formulas = match_formulas + diagram_formulas for number, sentence_words in question.sentence_words.iteritems(): syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words) annotation_nodes = [annotation_to_semantic_tree(syntax_parse, annotation) for annotation in all_annotations[pk][number].values()] expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression)) for key, expression in question.sentence_expressions[number].iteritems()} truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas) text_formula_parse = semantic_trees_to_text_formula_parse(annotation_nodes) completed_formulas = complete_formulas(text_formula_parse) grounded_formulas = [ground_formula(match_parse, formula, value_expr_formulas) for formula in completed_formulas+truth_expr_formulas] text_formulas = filter_formulas(flatten_formulas(grounded_formulas)) all_formulas.extend(text_formulas) reduced_formulas = reduce_formulas(all_formulas) for reduced_formula in reduced_formulas: score = evaluate(reduced_formula, core_parse.variable_assignment) scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children] print reduced_formula, score, scores # core_parse.display_points() ans = solve(reduced_formulas, choice_formulas, assignment=core_parse.variable_assignment) print "ans:", ans if choice_formulas is None: attempted = True if abs(ans - float(question.answer)) < 0.01: correct = True else: correct = False else: attempted = True c = max(ans.iteritems(), key=lambda pair: pair[1].conf)[0] if c == int(question.answer): correct = True else: correct = False result = SimpleResult(query, False, attempted, correct) return result
def test_parse_match_from_known_labels(): questions = geoserver_interface.download_questions(977) for pk, question in questions.iteritems(): label_data = geoserver_interface.download_labels(pk)[pk] diagram = open_image(question.diagram_path) graph_parse = diagram_to_graph_parse(diagram) match_parse = parse_match_from_known_labels(graph_parse, label_data) for key, value in match_parse.match_dict.iteritems(): print key, value graph_parse.core_parse.display_points()
def test_select_primitives(): question_dict = geoserver_interface.download_questions('test') for key in sorted(question_dict.keys()): question = question_dict[key] print(key) image_segment_parse = parse_image_segments( open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) selected = select_primitives(primitive_parse) selected.display_primitives()
def test_parse_match_atoms(): questions = geoserver_interface.download_questions(977) for pk, question in questions.iteritems(): label_data = geoserver_interface.download_labels(pk)[pk] diagram = open_image(question.diagram_path) graph_parse = diagram_to_graph_parse(diagram) match_parse = parse_match_from_known_labels(graph_parse, label_data) match_atoms = parse_match_formulas(match_parse) for match_atom in match_atoms: print match_atom graph_parse.core_parse.display_points()
def save_select_primitives(): question_dict = geoserver_interface.download_questions('test') folder_path = "/Users/minjoon/Desktop/selected/" for key in sorted(question_dict.keys()): question = question_dict[key] print(key) image_segment_parse = parse_image_segments(open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) selected = select_primitives(primitive_parse) image = selected.get_image_primitives() cv2.imwrite(os.path.join(folder_path, "%s.png" % str(question.key)), image)
def save_select_primitives(): question_dict = geoserver_interface.download_questions('test') folder_path = "/Users/minjoon/Desktop/selected/" for key in sorted(question_dict.keys()): question = question_dict[key] print(key) image_segment_parse = parse_image_segments( open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) selected = select_primitives(primitive_parse) image = selected.get_image_primitives() cv2.imwrite(os.path.join(folder_path, "%s.png" % str(question.key)), image)
def save_parse_core(): question_dict = geoserver_interface.download_questions('test') folder_path = "/Users/minjoon/Desktop/core/" for key in sorted(question_dict.keys()): print(key) question = question_dict[key] file_path = os.path.join(folder_path, str(question.key) + ".png") if os.path.isfile(file_path): continue image_segment_parse = parse_image_segments(open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) selected = select_primitives(primitive_parse) core_parse = parse_core(selected) image = core_parse.get_image_points() cv2.imwrite(file_path, image)
def save_parse_core(): question_dict = geoserver_interface.download_questions('test') folder_path = "/Users/minjoon/Desktop/core/" for key in sorted(question_dict.keys()): print(key) question = question_dict[key] file_path = os.path.join(folder_path, str(question.key) + ".png") if os.path.isfile(file_path): continue image_segment_parse = parse_image_segments( open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) selected = select_primitives(primitive_parse) core_parse = parse_core(selected) image = core_parse.get_image_points() cv2.imwrite(file_path, image)
def save_questions(query): questions = geoserver_interface.download_questions(query) base_path = os.path.join("../../temp/data/", query) if not os.path.exists(base_path): os.mkdir(base_path) for index, (key, question) in enumerate(questions.iteritems()): print key folder_name = get_number_string(index, 3) json_path = os.path.join(base_path, folder_name + ".json") diagram_path = os.path.join(base_path, folder_name + ".png") d = {} d['key'] = question.key d['text'] = question.text d['choices'] = question.choices d['answer'] = str(int(question.answer)) json.dump(d, open(json_path, 'wb')) shutil.copyfile(question.diagram_path, diagram_path)
def test_opt_model(): query = 'test' all_questions = geoserver_interface.download_questions(query) all_syntax_parses = questions_to_syntax_parses(all_questions) all_annotations = geoserver_interface.download_semantics(query) all_labels = geoserver_interface.download_labels(query) (tr_s, tr_a, tr_q), (te_s, te_a, te_q) = split([all_syntax_parses, all_annotations, all_questions], 0.5) tm = train_tag_model(all_syntax_parses, all_annotations) cm = train_semantic_model(tm, tr_s, tr_a) # te_m = questions_to_match_parses(te_q, all_labels) prs = evaluate_opt_model(cm, te_s, te_a, all_questions, np.linspace(-2,2,21)) ps, rs = zip(*prs.values()) plt.plot(prs.keys(), ps, 'o', label='precision') plt.plot(prs.keys(), rs, 'o', label='recall') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.show()
def test_rule_model(): query = 'test' all_questions = geoserver_interface.download_questions(query) all_syntax_parses = questions_to_syntax_parses(all_questions) all_annotations = geoserver_interface.download_semantics(query) all_labels = geoserver_interface.download_labels(query) (tr_s, tr_a), (te_s, te_a) = split((all_syntax_parses, all_annotations), 0.5) tm = train_tag_model(all_syntax_parses, all_annotations) cm = train_semantic_model(tm, tr_s, tr_a) unary_prs, core_prs, is_prs, cc_prs, core_tree_prs = evaluate_rule_model(cm, te_s, te_a, np.linspace(0,1,101)) plt.plot(core_tree_prs.keys(), core_tree_prs.values(), 'o') plt.show() plt.plot(unary_prs.keys(), unary_prs.values(), 'o') plt.show() plt.plot(core_prs.keys(), core_prs.values(), 'o') plt.show() plt.plot(is_prs.keys(), is_prs.values(), 'o') plt.show() plt.plot(cc_prs.keys(), cc_prs.values(), 'o') plt.show()
def test_zip_diagrams(): questions = geoserver_interface.download_questions(['development']) zip_diagrams(questions, '/Users/minjoon/Desktop/development.zip')
def full_test(): start = time.time() ids1 = [963, 968, 969, 971, 973, 974, 977, 985, 990, 993, 995, 1000, 1003, 1004, 1006, 1014, 1017, 1018, 1020,] #1011 ids2 = [1025, 1030, 1031, 1032, 1035, 1038, 1039, 1040, 1042, 1043, 1045, 1047, 1050, 1051, 1052, 1054, 1056, 1058,] #1027, 1037 ids3 = [1063, 1065, 1067, 1076, 1089, 1095, 1096, 1097, 1099, 1102, 1105, 1106, 1107, 1108, 1110, 1111, 1119, 1120, 1121] # 1103 ids4 = [1122, 1123, 1124, 1127, 1141, 1142, 1143, 1145, 1146, 1147, 1149, 1150, 1151, 1152, 1070, 1083, 1090, 1092, 1144, 1148] ids5 = [975, 979, 981, 988, 989, 997, 1005, 1019, 1029, 1044, 1046, 1057, 1059, 1064, 1087, 1104, 1113, 1114, 1129, 1071] ids6 = [1100, 1101, 1109, 1140, 1053] tr_ids = ids4+ids5+ids6 te_ids = ids1+ids2+ids3 te_ids = ids4+ids6 load = True tr_questions = geoserver_interface.download_questions('aaai') te_questions = geoserver_interface.download_questions('emnlp') te_keys = [968, 971, 973, 1018] all_questions = dict(tr_questions.items() + te_questions.items()) tr_ids = tr_questions.keys() te_ids = te_questions.keys() if not load: all_syntax_parses = questions_to_syntax_parses(all_questions) pickle.dump(all_syntax_parses, open('syntax_parses.p', 'wb')) else: all_syntax_parses = pickle.load(open('syntax_parses.p', 'rb')) all_annotations = geoserver_interface.download_semantics() all_labels = geoserver_interface.download_labels() correct = 0 penalized = 0 error = 0 total = len(te_keys) #(te_s, te_a, te_l), (tr_s, tr_a, trl_l) = split([all_syntax_parses, all_annotations, all_labels], 0.7) tr_s = {id_: all_syntax_parses[id_] for id_ in tr_ids} tr_a = {id_: all_annotations[id_] for id_ in tr_ids} te_s = {id_: all_syntax_parses[id_] for id_ in te_ids} if not load: tm = train_tag_model(all_syntax_parses, all_annotations) cm = train_semantic_model(tm, tr_s, tr_a) pickle.dump(cm, open('cm.p', 'wb')) else: cm = pickle.load(open('cm.p', 'rb')) print "test ids: %s" % ", ".join(str(k) for k in te_s.keys()) for idx, id_ in enumerate(te_keys): question = all_questions[id_] label = all_labels[id_] id_ = str(id_) print "-"*80 print "id: %s" % id_ result = full_unit_test(cm, question, label) print result.message print result if result.error: error += 1 if result.penalized: penalized += 1 if result.correct: correct += 1 print "-"*80 print "%d/%d complete, %d correct, %d penalized, %d error" % (idx+1, len(te_keys), correct, penalized, error) end = time.time() print "-"*80 print "duration:\t%.1f" % (end - start) out = "total:\t\t%d\npenalized:\t%d\ncorrect:\t%d\nerror:\t\t%d" % (total, penalized, correct, error) print out dirs_path = os.path.join(demo_path, 'dirs.json') json.dump([str(x) for x in te_keys], open(dirs_path, 'wb'))
def test_parse_primitives(): question = geoserver_interface.download_questions(1037).values()[0] image_segment_parse = parse_image_segments( open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) primitive_parse.display_primitives()
def test_geoserver_interface(): data = geoserver_interface.download_questions(["annotated"]) ann = geoserver_interface.download_semantics() print(ann) print(data)
def test_parse_image_segments(): question = geoserver_interface.download_questions(1037).values()[0] image_segment_parse = parse_image_segments(open_image(question.diagram_path)) image_segment_parse.diagram_image_segment.display_binarized_segmented_image() for idx, label_image_segment in image_segment_parse.label_image_segments.iteritems(): label_image_segment.display_segmented_image()
def save_parse_image_segments(): question = geoserver_interface.download_questions(1037).values()[0] image_segment_parse = parse_image_segments(open_image(question.diagram_path)) image = image_segment_parse.diagram_image_segment.segmented_image file_path = "/Users/minjoon/Desktop/diagram.png" cv2.imwrite(file_path, image)
def full_test(): start = time.time() ids1 = [ 963, 968, 969, 971, 973, 974, 977, 985, 990, 993, 995, 1000, 1003, 1004, 1006, 1014, 1017, 1018, 1020, ] #1011 ids2 = [ 1025, 1030, 1031, 1032, 1035, 1038, 1039, 1040, 1042, 1043, 1045, 1047, 1050, 1051, 1052, 1054, 1056, 1058, ] #1027, 1037 ids3 = [ 1063, 1065, 1067, 1076, 1089, 1095, 1096, 1097, 1099, 1102, 1105, 1106, 1107, 1108, 1110, 1111, 1119, 1120, 1121 ] # 1103 ids4 = [ 1122, 1123, 1124, 1127, 1141, 1142, 1143, 1145, 1146, 1147, 1149, 1150, 1151, 1152, 1070, 1083, 1090, 1092, 1144, 1148 ] ids5 = [ 975, 979, 981, 988, 989, 997, 1005, 1019, 1029, 1044, 1046, 1057, 1059, 1064, 1087, 1104, 1113, 1114, 1129, 1071 ] ids6 = [1100, 1101, 1109, 1140, 1053] tr_ids = ids4 + ids5 + ids6 te_ids = ids1 + ids2 + ids3 te_ids = ids4 + ids6 load = False tr_questions = geoserver_interface.download_questions('aaai') te_questions = geoserver_interface.download_questions('official') te_keys = te_questions.keys() # [968, 971, 973, 1018] all_questions = dict(tr_questions.items() + te_questions.items()) tr_ids = tr_questions.keys() te_ids = te_questions.keys() if not load: all_syntax_parses = questions_to_syntax_parses(all_questions) pickle.dump(all_syntax_parses, open('syntax_parses.p', 'wb')) else: all_syntax_parses = pickle.load(open('syntax_parses.p', 'rb')) all_annotations = geoserver_interface.download_semantics() all_labels = geoserver_interface.download_labels() correct = 0 penalized = 0 error = 0 total = len(te_keys) #(te_s, te_a, te_l), (tr_s, tr_a, trl_l) = split([all_syntax_parses, all_annotations, all_labels], 0.7) tr_s = {id_: all_syntax_parses[id_] for id_ in tr_ids} tr_a = {id_: all_annotations[id_] for id_ in tr_ids} te_s = {id_: all_syntax_parses[id_] for id_ in te_ids} if not load: tm = train_tag_model(all_syntax_parses, all_annotations) cm = train_semantic_model(tm, tr_s, tr_a) pickle.dump(cm, open('cm.p', 'wb')) else: cm = pickle.load(open('cm.p', 'rb')) print "test ids: %s" % ", ".join(str(k) for k in te_s.keys()) for idx, id_ in enumerate(te_keys): question = all_questions[id_] label = all_labels[id_] id_ = str(id_) print "-" * 80 print "id: %s" % id_ result = full_unit_test(cm, question, label) print result.message print result if result.error: error += 1 if result.penalized: penalized += 1 if result.correct: correct += 1 print "-" * 80 print "%d/%d complete, %d correct, %d penalized, %d error" % ( idx + 1, len(te_keys), correct, penalized, error) end = time.time() print "-" * 80 print "duration:\t%.1f" % (end - start) out = "total:\t\t%d\npenalized:\t%d\ncorrect:\t%d\nerror:\t\t%d" % ( total, penalized, correct, error) print out dirs_path = os.path.join(demo_path, 'dirs.json') json.dump([str(x) for x in te_keys], open(dirs_path, 'wb'))
def test_parse_primitives(): question = geoserver_interface.download_questions(1037).values()[0] image_segment_parse = parse_image_segments(open_image(question.diagram_path)) primitive_parse = parse_primitives(image_segment_parse) primitive_parse.display_primitives()