def _annotated_unit_test(query): questions = geoserver_interface.download_questions(query) all_annotations = geoserver_interface.download_semantics(query) pk, question = questions.items()[0] choice_formulas = get_choice_formulas(question) label_data = geoserver_interface.download_labels(pk)[pk] diagram = open_image(question.diagram_path) graph_parse = diagram_to_graph_parse(diagram) core_parse = graph_parse.core_parse # core_parse.display_points() # core_parse.primitive_parse.display_primitives() match_parse = parse_match_from_known_labels(graph_parse, label_data) match_formulas = parse_match_formulas(match_parse) diagram_formulas = parse_confident_formulas(graph_parse) all_formulas = match_formulas + diagram_formulas for number, sentence_words in question.sentence_words.iteritems(): syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words) annotation_nodes = [annotation_to_semantic_tree(syntax_parse, annotation) for annotation in all_annotations[pk][number].values()] expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression)) for key, expression in question.sentence_expressions[number].iteritems()} truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas) text_formula_parse = semantic_trees_to_text_formula_parse(annotation_nodes) completed_formulas = complete_formulas(text_formula_parse) grounded_formulas = [ground_formula(match_parse, formula, value_expr_formulas) for formula in completed_formulas+truth_expr_formulas] text_formulas = filter_formulas(flatten_formulas(grounded_formulas)) all_formulas.extend(text_formulas) reduced_formulas = reduce_formulas(all_formulas) for reduced_formula in reduced_formulas: score = evaluate(reduced_formula, core_parse.variable_assignment) scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children] print reduced_formula, score, scores # core_parse.display_points() ans = solve(reduced_formulas, choice_formulas, assignment=core_parse.variable_assignment) print "ans:", ans if choice_formulas is None: attempted = True if abs(ans - float(question.answer)) < 0.01: correct = True else: correct = False else: attempted = True c = max(ans.iteritems(), key=lambda pair: pair[1].conf)[0] if c == int(question.answer): correct = True else: correct = False result = SimpleResult(query, False, attempted, correct) return result
def query(variable_handler, prior_atoms, query_atom, max_num_resets=10, tol=10**-3, verbose=False): assert isinstance(variable_handler, VariableHandler) assert isinstance(query_atom, FunctionNode) prior_assignment, prior_sat = find_assignment(variable_handler, prior_atoms, max_num_resets, tol, verbose) unique = prior_sat and evaluate(query_atom, prior_assignment).norm < tol all_assignment, sat = find_assignment(variable_handler, prior_atoms + [query_atom], max_num_resets, tol, verbose) if unique: # If unique answer exists, then enforce satisfiability. Just in case of numerical errors. sat = True assignment = prior_assignment else: assignment = all_assignment return assignment, sat, unique
def evaluate(self, variable_node): variable_node = self.variable_handler.add(variable_node) if not self.assigned: self.assignment = find_assignment(self.variable_handler, self.atoms, self.max_num_resets, self.tol) self.assigned = True assert self.assignment is not None return evaluate(variable_node, self.assignment)
def query_invar(self, query_atom): query_atom = self.variable_handler.add(query_atom) if not self.assigned: self.assignment = find_assignment(self.variable_handler, self.atoms, self.max_num_resets, self.tol) self.assigned = True if not self.assignment: return False return evaluate(query_atom, self.assignment).norm < self.tol
def query_invar(self, query_atom, th=None): query_atom = self.variable_handler.add(query_atom) if self.is_sat(th): return evaluate(query_atom, self.assignment).norm < self.tol else: return False
def func(vector): print "dim:", np.shape(vector), vector d = variable_handler.vector_to_dict(vector) return sum(evaluate(atom, d).norm for atom in atoms)
def _ground_variable(match_parse, variable, references={}): assert isinstance(variable, FormulaNode) assert isinstance(match_parse, MatchParse) return_type = variable.return_type graph_parse = match_parse.graph_parse core_parse = graph_parse.core_parse variable_signature = variable.signature if variable_signature.id in signatures: # pass What, Which, etc. return variable elif variable_signature.id in match_parse.graph_parse.core_parse.variable_assignment.keys( ): # pass point_0, point_1, etc. return variable elif isinstance(variable_signature, VariableSignature) and variable_signature.is_ref(): # @v_1, etc. return references[variable_signature.name] elif return_type == 'number': if is_number(variable_signature.name): return variable elif len(variable_signature.name) == 1: # x, y, z, etc. Need to redefine id (id shouldn't be tuple). return FormulaNode( VariableSignature(variable_signature.name, return_type), []) elif len(variable_signature.name ) == 2 and variable_signature.name.isupper(): new_leaf = FormulaNode( VariableSignature(variable.signature.id, "line", name=variable.signature.name), []) return FormulaNode(signatures['LengthOf'], [_ground_variable(match_parse, new_leaf)]) elif return_type == 'point': if len(variable_signature.name) == 1: return match_parse.match_dict[variable_signature.name][0] else: points = get_all_instances(graph_parse, 'point', True) return SetNode(points.values()) elif return_type == 'line': if len(variable_signature.name ) == 1 and variable_signature.name in match_parse.match_dict: line = match_parse.match_dict[variable_signature.name][0] return line elif len(variable_signature.name ) == 2 and variable_signature.name.isupper(): label_a, label_b = variable_signature.name point_a = match_parse.match_dict[label_a][0] point_b = match_parse.match_dict[label_b][0] return FormulaNode(signatures['Line'], [point_a, point_b]) """ elif variable_signature.name == 'hypotenuse': def func(x): l, t = x formula = FormulaNode(signatures['IsHypotenuseOf'], (l,t)) tv = core_parse.evaluate(formula) return tv.norm lines = get_all_instances(graph_parse, 'line', True).values() triangles = get_all_instances(graph_parse, 'triangle', True).values() line, triangle = min(itertools.product(lines, triangles), key=func) return line """ else: lines = get_all_instances(graph_parse, 'line', True) return SetNode(lines.values()) elif return_type == 'circle': if len(variable_signature.name) == 1: center_label = variable_signature.name center = match_parse.match_dict[center_label][0] center_idx = int(center.signature.name.split("_")[1]) return graph_parse.circle_dict[center_idx][0]['variable'] # radius = match_parse.graph_parse.core_parse.radius_variables[center_idx][0] elif variable_signature.name == 'circle': circles = get_all_instances(graph_parse, 'circle', True) return SetNode(circles.values()) else: raise Exception() elif return_type == 'angle': # TODO : if len(variable_signature.name ) == 3 and variable_signature.name.isupper(): label_a, label_b, label_c = variable_signature.name point_a = match_parse.match_dict[label_a][0] point_b = match_parse.match_dict[label_b][0] point_c = match_parse.match_dict[label_c][0] out = FormulaNode(signatures['Angle'], [point_a, point_b, point_c]) measure = evaluate(FormulaNode(signatures['MeasureOf'], [out]), core_parse.variable_assignment) if measure > np.pi: out = FormulaNode(signatures['Angle'], [point_c, point_b, point_a]) return out elif len(variable_signature.name ) == 1 and variable_signature.name.isupper(): angles = get_all_instances(graph_parse, 'angle', True) p = match_parse.match_dict[variable_signature.name][0] for formula in angles.values(): if formula.children[1].signature == p.signature: measure = evaluate( FormulaNode(signatures['MeasureOf'], [formula]), core_parse.variable_assignment) if measure > np.pi: continue return formula elif len(variable_signature.name ) == 1 and variable_signature.name.islower(): return match_parse.match_dict[variable_signature.name][0] elif return_type == 'arc': if len(variable_signature.name ) == 2 and variable_signature.name.isupper(): point_keys = [ match_parse.point_key_dict[label] for label in variable_signature.name ] test_arc = get_instances(graph_parse, 'arc', False, *point_keys).values()[0] if MeasureOf(test_arc) > np.pi: point_keys = [point_keys[1], point_keys[0]] arc = get_instances(graph_parse, 'arc', True, *point_keys).values()[0] return arc else: arcs = get_all_instances(graph_parse, 'arc', True) return SetNode(arcs.values()) elif return_type == 'triangle': if variable_signature.name.isupper() and len( variable_signature.name) == 3: point_keys = [ match_parse.point_key_dict[label] for label in variable_signature.name ] triangles = get_instances(graph_parse, 'triangle', True, *point_keys) return triangles.values()[0] else: triangles = get_all_instances(graph_parse, 'triangle', True) return SetNode(triangles.values()) elif return_type == 'quad': if variable_signature.name.isupper() and len( variable_signature.name) == 4: point_keys = [ match_parse.point_key_dict[label] for label in variable_signature.name ] quads = get_instances(graph_parse, 'quad', True, *point_keys) return quads.values()[0] else: quads = get_all_instances(graph_parse, 'quad', True) return SetNode(quads.values()) elif return_type == 'hexagon': if variable_signature.name.isupper() and len( variable_signature.name) == 6: point_keys = [ match_parse.point_key_dict[label] for label in variable_signature.name ] hexagons = get_instances(graph_parse, 'hexagon', True, *point_keys) return hexagons.values()[0] else: quads = get_all_instances(graph_parse, 'hexagon', True) return SetNode(quads.values()) elif return_type == 'polygon': if variable_signature.name.isupper(): point_keys = [ match_parse.point_key_dict[label] for label in variable_signature.name ] polygons = get_instances(graph_parse, 'polygon', True, *point_keys) return polygons.values()[0] else: polygons = get_all_instances(graph_parse, 'polygon', True) return SetNode(polygons.values()) elif return_type == 'twod': circles = get_all_instances(graph_parse, 'circle', True) polygons = get_all_instances(graph_parse, 'polygon', True) return SetNode(polygons.values() + circles.values()) elif return_type == 'oned': lines = get_all_instances(graph_parse, 'line', True) arcs = get_all_instances(graph_parse, 'arc', True) return SetNode(lines.values() + arcs.values()) #logging.warning("failed to ground variable: %r" % variable) raise Exception()
def _full_unit_test(combined_model, question, label_data): assert isinstance(combined_model, CombinedModel) base_path = os.path.join(demo_path, str(question.key)) if not os.path.exists(base_path): os.mkdir(base_path) question_path = os.path.join(base_path, 'question.json') text_parse_path = os.path.join(base_path, 'text_parse.json') diagram_parse_path = os.path.join(base_path, 'diagram_parse.json') optimized_path = os.path.join(base_path, 'optimized.json') entity_list_path = os.path.join(base_path, 'entity_map.json') diagram_path = os.path.join(base_path, 'diagram.png') solution_path = os.path.join(base_path, 'solution.json') shutil.copy(question.diagram_path, diagram_path) text_parse_list = [] diagram_parse_list = [] optimized_list = [] entity_list = [] solution = "" json.dump(question._asdict(), open(question_path, 'wb')) choice_formulas = get_choice_formulas(question) match_parse = question_to_match_parse(question, label_data) match_formulas = parse_match_formulas(match_parse) graph_parse = match_parse.graph_parse core_parse = graph_parse.core_parse # core_parse.display_points() # core_parse.primitive_parse.display_primitives() # opt_model = TextGreedyOptModel(combined_model) diagram_formulas = parse_confident_formulas(match_parse.graph_parse) all_formulas = set(match_formulas + diagram_formulas) opt_model = FullGreedyOptModel(combined_model, match_parse) for number, sentence_words in question.sentence_words.iteritems(): syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words) expr_formulas = { key: prefix_to_formula(expression_parser.parse_prefix(expression)) for key, expression in question.sentence_expressions[number].iteritems() } truth_expr_formulas, value_expr_formulas = _separate_expr_formulas( expr_formulas) semantic_forest = opt_model.combined_model.get_semantic_forest( syntax_parse) truth_semantic_trees = semantic_forest.get_semantic_trees_by_type( "truth") is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is") cc_trees = set( t for t in semantic_forest.get_semantic_trees_by_type('cc') if opt_model.combined_model.get_tree_score(t) > 0.01) for cc_tree in cc_trees: print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score( cc_tree) bool_semantic_trees = opt_model.optimize( truth_semantic_trees.union(is_semantic_trees), 0, cc_trees) # semantic_trees = bool_semantic_trees.union(cc_trees) for t in truth_semantic_trees.union(is_semantic_trees).union(cc_trees): text_parse_list.append({ 'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': opt_model.combined_model.get_tree_score(t) }) diagram_score = opt_model.get_diagram_score( t.to_formula(), cc_trees) if diagram_score is not None: diagram_parse_list.append({ 'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': diagram_score }) local_entities = semantic_tree_to_serialized_entities( match_parse, t, number, value_expr_formulas) entity_list.extend(local_entities) for t in bool_semantic_trees: optimized_list.append({ 'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': opt_model.get_magic_score(t, cc_trees) }) for key, f in expr_formulas.iteritems(): if key.startswith("v"): pass index = (i for i, word in sentence_words.iteritems() if word == key).next() tree = formula_to_semantic_tree(f, syntax_parse, (index, index + 1)) print "f and t:", f, tree text_parse_list.append({ 'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0 }) optimized_list.append({ 'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0 }) local_entities = formula_to_serialized_entities( match_parse, f, tree, number) print "local entities:", local_entities entity_list.extend(local_entities) core_formulas = set(t.to_formula() for t in bool_semantic_trees) cc_formulas = set(t.to_formula() for t in cc_trees) augmented_formulas = augment_formulas(core_formulas) completed_formulas = complete_formulas(augmented_formulas, cc_formulas) print "completed formulas:" for f in completed_formulas: print f print "" grounded_formulas = ground_formulas( match_parse, completed_formulas + truth_expr_formulas, value_expr_formulas) text_formulas = filter_formulas(flatten_formulas(grounded_formulas)) all_formulas = all_formulas.union(text_formulas) reduced_formulas = all_formulas # reduce_formulas(all_formulas) for reduced_formula in reduced_formulas: if reduced_formula.is_grounded(core_parse.variable_assignment.keys()): score = evaluate(reduced_formula, core_parse.variable_assignment) scores = [ evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children ] else: score = None scores = None solution += repr(reduced_formula) + '\n' print reduced_formula, score, scores solution = solution.rstrip() # core_parse.display_points() json.dump(diagram_parse_list, open(diagram_parse_path, 'wb')) json.dump(optimized_list, open(optimized_path, 'wb')) json.dump(text_parse_list, open(text_parse_path, 'wb')) json.dump(entity_list, open(entity_list_path, 'wb')) json.dump(solution, open(solution_path, 'wb')) # return SimpleResult(question.key, False, False, True) # Early termination print "Solving..." ans = solve(reduced_formulas, choice_formulas, assignment=None) #core_parse.variable_assignment) print "ans:", ans if choice_formulas is None: penalized = False if Equals(ans, float(question.answer)).conf > 0.98: correct = True else: correct = False else: idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf) if tv.conf > 0.98: if idx == int(float(question.answer)): correct = True penalized = False else: correct = False penalized = True else: penalized = False correct = False result = SimpleResult(question.key, False, penalized, correct) return result
def _full_unit_test(combined_model, question, label_data): assert isinstance(combined_model, CombinedModel) choice_formulas = get_choice_formulas(question) match_parse = question_to_match_parse(question, label_data) match_formulas = parse_match_formulas(match_parse) graph_parse = match_parse.graph_parse core_parse = graph_parse.core_parse # core_parse.display_points() # core_parse.primitive_parse.display_primitives() # opt_model = TextGreedyOptModel(combined_model) diagram_formulas = parse_confident_formulas(match_parse.graph_parse) all_formulas = match_formulas + diagram_formulas opt_model = FullGreedyOptModel(combined_model, match_parse) for number, sentence_words in question.sentence_words.iteritems(): syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words) expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression)) for key, expression in question.sentence_expressions[number].iteritems()} truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas) semantic_forest = opt_model.combined_model.get_semantic_forest(syntax_parse) truth_semantic_trees = semantic_forest.get_semantic_trees_by_type("truth") is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is") cc_trees = set(t for t in semantic_forest.get_semantic_trees_by_type('cc') if opt_model.combined_model.get_tree_score(t) > 0.01) for cc_tree in cc_trees: print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score(cc_tree) bool_semantic_trees = opt_model.optimize(truth_semantic_trees.union(is_semantic_trees), 0) # semantic_trees = bool_semantic_trees.union(cc_trees) core_formulas = set(t.to_formula() for t in bool_semantic_trees) cc_formulas = set(t.to_formula() for t in cc_trees) augmented_formulas = augment_formulas(core_formulas) completed_formulas = complete_formulas(augmented_formulas, cc_formulas) print "completed formulas:" for f in completed_formulas: print f print "" grounded_formulas = ground_formulas(match_parse, completed_formulas+truth_expr_formulas, value_expr_formulas) text_formulas = filter_formulas(flatten_formulas(grounded_formulas)) all_formulas.extend(text_formulas) reduced_formulas = all_formulas # reduce_formulas(all_formulas) for reduced_formula in reduced_formulas: if reduced_formula.is_grounded(core_parse.variable_assignment.keys()): score = evaluate(reduced_formula, core_parse.variable_assignment) scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children] else: score = None scores = None print reduced_formula, score, scores # core_parse.display_points() ans = solve(reduced_formulas, choice_formulas, assignment=None)#core_parse.variable_assignment) print "ans:", ans if choice_formulas is None: penalized = False if Equals(ans, float(question.answer)).conf > 0.98: correct = True else: correct = False else: idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf) if tv.conf > 0.98: if idx == int(question.answer): correct = True penalized = False else: correct = False penalized = True else: penalized = False correct = False result = SimpleResult(question.key, False, penalized, correct) return result
def evaluate(self, variable_node, th=None): variable_node = self.variable_handler.add(variable_node) if not self.assigned: self.solve() return evaluate(variable_node, self.assignment)
def func(vector): return sum( evaluate(atom, variable_handler.vector_to_dict(vector)).norm for atom in atoms)
def evaluate(self, formula): return evaluate(formula, self.variable_assignment)
def _ground_variable(match_parse, variable, references={}): assert isinstance(variable, FormulaNode) assert isinstance(match_parse, MatchParse) return_type = variable.return_type graph_parse = match_parse.graph_parse core_parse = graph_parse.core_parse variable_signature = variable.signature if variable_signature.id in signatures: # pass What, Which, etc. return variable elif variable_signature.id in match_parse.graph_parse.core_parse.variable_assignment.keys(): # pass point_0, point_1, etc. return variable elif isinstance(variable_signature, VariableSignature) and variable_signature.is_ref(): # @v_1, etc. return references[variable_signature.name] elif return_type == "number": if is_number(variable_signature.name): return variable elif len(variable_signature.name) == 1: # x, y, z, etc. Need to redefine id (id shouldn't be tuple). return FormulaNode(VariableSignature(variable_signature.name, return_type), []) elif len(variable_signature.name) == 2 and variable_signature.name.isupper(): new_leaf = FormulaNode(VariableSignature(variable.signature.id, "line", name=variable.signature.name), []) return FormulaNode(signatures["LengthOf"], [_ground_variable(match_parse, new_leaf)]) else: # ABC: number -> just variable return variable elif return_type == "point": if len(variable_signature.name) == 1: return match_parse.match_dict[variable_signature.name][0] else: points = get_all_instances(graph_parse, "point", True) return SetNode(points.values()) elif return_type == "line": if len(variable_signature.name) == 1 and variable_signature.name in match_parse.match_dict: line = match_parse.match_dict[variable_signature.name][0] return line elif len(variable_signature.name) == 2 and variable_signature.name.isupper(): label_a, label_b = variable_signature.name point_a = match_parse.match_dict[label_a][0] point_b = match_parse.match_dict[label_b][0] return FormulaNode(signatures["Line"], [point_a, point_b]) else: lines = get_all_instances(graph_parse, "line", True) return SetNode(lines.values()) elif return_type == "circle": if len(variable_signature.name) == 1: center_label = variable_signature.name center = match_parse.match_dict[center_label][0] center_idx = int(center.signature.name.split("_")[1]) return graph_parse.circle_dict[center_idx][0]["variable"] # radius = match_parse.graph_parse.core_parse.radius_variables[center_idx][0] elif variable_signature.name == "circle": circles = get_all_instances(graph_parse, "circle", True) return SetNode(circles.values()) else: raise Exception() elif return_type == "angle": # TODO : if len(variable_signature.name) == 3 and variable_signature.name.isupper(): label_a, label_b, label_c = variable_signature.name point_a = match_parse.match_dict[label_a][0] point_b = match_parse.match_dict[label_b][0] point_c = match_parse.match_dict[label_c][0] out = FormulaNode(signatures["Angle"], [point_a, point_b, point_c]) measure = evaluate(FormulaNode(signatures["MeasureOf"], [out]), core_parse.variable_assignment) if measure > np.pi: out = FormulaNode(signatures["Angle"], [point_c, point_b, point_a]) return out elif len(variable_signature.name) == 1 and variable_signature.name.isupper(): angles = get_all_instances(graph_parse, "angle", True) p = match_parse.match_dict[variable_signature.name][0] for formula in angles.values(): if formula.children[1].signature == p.signature: measure = evaluate(FormulaNode(signatures["MeasureOf"], [formula]), core_parse.variable_assignment) if measure > np.pi: continue return formula elif ( len(variable_signature.name) == 1 and variable_signature.name.islower() and variable_signature.name in match_parse.match_dict ): return match_parse.match_dict[variable_signature.name][0] else: angles = get_all_instances(graph_parse, "angle", True) return SetNode(angles.values()) elif return_type == "arc": if len(variable_signature.name) == 2 and variable_signature.name.isupper(): point_keys = [match_parse.point_key_dict[label] for label in variable_signature.name] test_arc = get_instances(graph_parse, "arc", False, *point_keys).values()[0] if MeasureOf(test_arc) > np.pi: point_keys = [point_keys[1], point_keys[0]] arc = get_instances(graph_parse, "arc", True, *point_keys).values()[0] return arc else: arcs = get_all_instances(graph_parse, "arc", True) return SetNode(arcs.values()) elif return_type == "triangle": if variable_signature.name.isupper() and len(variable_signature.name) == 3: point_keys = [match_parse.point_key_dict[label] for label in variable_signature.name] triangles = get_instances(graph_parse, "triangle", True, *point_keys) return triangles.values()[0] else: triangles = get_all_instances(graph_parse, "triangle", True) return SetNode(triangles.values()) elif return_type == "quad": if variable_signature.name.isupper() and len(variable_signature.name) == 4: point_keys = [match_parse.point_key_dict[label] for label in variable_signature.name] quads = get_instances(graph_parse, "quad", True, *point_keys) return quads.values()[0] else: quads = get_all_instances(graph_parse, "quad", True) return SetNode(quads.values()) elif return_type == "hexagon": if variable_signature.name.isupper() and len(variable_signature.name) == 6: point_keys = [match_parse.point_key_dict[label] for label in variable_signature.name] hexagons = get_instances(graph_parse, "hexagon", True, *point_keys) return hexagons.values()[0] else: quads = get_all_instances(graph_parse, "hexagon", True) return SetNode(quads.values()) elif return_type == "polygon": if variable_signature.name.isupper(): point_keys = [match_parse.point_key_dict[label] for label in variable_signature.name] polygons = get_instances(graph_parse, "polygon", True, *point_keys) return polygons.values()[0] else: polygons = get_all_instances(graph_parse, "polygon", True) return SetNode(polygons.values()) elif return_type == "twod": circles = get_all_instances(graph_parse, "circle", True) polygons = get_all_instances(graph_parse, "polygon", True) return SetNode(polygons.values() + circles.values()) elif return_type == "oned": lines = get_all_instances(graph_parse, "line", True) arcs = get_all_instances(graph_parse, "arc", True) return SetNode(lines.values() + arcs.values()) logging.error("failed to ground variable: %r" % variable) return variable
def _full_unit_test(combined_model, question, label_data): assert isinstance(combined_model, CombinedModel) base_path = os.path.join(demo_path, str(question.key)) if not os.path.exists(base_path): os.mkdir(base_path) question_path = os.path.join(base_path, 'question.json') text_parse_path = os.path.join(base_path, 'text_parse.json') diagram_parse_path = os.path.join(base_path, 'diagram_parse.json') optimized_path = os.path.join(base_path, 'optimized.json') entity_list_path = os.path.join(base_path, 'entity_map.json') diagram_path = os.path.join(base_path, 'diagram.png') solution_path = os.path.join(base_path, 'solution.json') shutil.copy(question.diagram_path, diagram_path) text_parse_list = [] diagram_parse_list = [] optimized_list = [] entity_list = [] solution = "" json.dump(question._asdict(), open(question_path, 'wb')) choice_formulas = get_choice_formulas(question) match_parse = question_to_match_parse(question, label_data) match_formulas = parse_match_formulas(match_parse) graph_parse = match_parse.graph_parse core_parse = graph_parse.core_parse # core_parse.display_points() # core_parse.primitive_parse.display_primitives() # opt_model = TextGreedyOptModel(combined_model) diagram_formulas = parse_confident_formulas(match_parse.graph_parse) all_formulas = set(match_formulas + diagram_formulas) opt_model = FullGreedyOptModel(combined_model, match_parse) for number, sentence_words in question.sentence_words.iteritems(): syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words) expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression)) for key, expression in question.sentence_expressions[number].iteritems()} truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas) semantic_forest = opt_model.combined_model.get_semantic_forest(syntax_parse) truth_semantic_trees = semantic_forest.get_semantic_trees_by_type("truth") is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is") cc_trees = set(t for t in semantic_forest.get_semantic_trees_by_type('cc') if opt_model.combined_model.get_tree_score(t) > 0.01) for cc_tree in cc_trees: print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score(cc_tree) bool_semantic_trees = opt_model.optimize(truth_semantic_trees.union(is_semantic_trees), 0, cc_trees) # semantic_trees = bool_semantic_trees.union(cc_trees) for t in truth_semantic_trees.union(is_semantic_trees).union(cc_trees): text_parse_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': opt_model.combined_model.get_tree_score(t)}) diagram_score = opt_model.get_diagram_score(t.to_formula(), cc_trees) if diagram_score is not None: diagram_parse_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': diagram_score}) local_entities = semantic_tree_to_serialized_entities(match_parse, t, number, value_expr_formulas) entity_list.extend(local_entities) for t in bool_semantic_trees: optimized_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': opt_model.get_magic_score(t, cc_trees)}) for key, f in expr_formulas.iteritems(): if key.startswith("v"): pass index = (i for i, word in sentence_words.iteritems() if word == key).next() tree = formula_to_semantic_tree(f, syntax_parse, (index, index+1)) print "f and t:", f, tree text_parse_list.append({'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0}) optimized_list.append({'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0}) local_entities = formula_to_serialized_entities(match_parse, f, tree, number) print "local entities:", local_entities entity_list.extend(local_entities) core_formulas = set(t.to_formula() for t in bool_semantic_trees) cc_formulas = set(t.to_formula() for t in cc_trees) augmented_formulas = augment_formulas(core_formulas) completed_formulas = complete_formulas(augmented_formulas, cc_formulas) print "completed formulas:" for f in completed_formulas: print f print "" grounded_formulas = ground_formulas(match_parse, completed_formulas+truth_expr_formulas, value_expr_formulas) text_formulas = filter_formulas(flatten_formulas(grounded_formulas)) all_formulas = all_formulas.union(text_formulas) reduced_formulas = all_formulas # reduce_formulas(all_formulas) for reduced_formula in reduced_formulas: if reduced_formula.is_grounded(core_parse.variable_assignment.keys()): score = evaluate(reduced_formula, core_parse.variable_assignment) scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children] else: score = None scores = None solution += repr(reduced_formula) + '\n' print reduced_formula, score, scores solution = solution.rstrip() # core_parse.display_points() json.dump(diagram_parse_list, open(diagram_parse_path, 'wb')) json.dump(optimized_list, open(optimized_path, 'wb')) json.dump(text_parse_list, open(text_parse_path, 'wb')) json.dump(entity_list, open(entity_list_path, 'wb')) json.dump(solution, open(solution_path, 'wb')) return SimpleResult(question.key, False, False, True) # Early termination print "Solving..." ans = solve(reduced_formulas, choice_formulas, assignment=None)#core_parse.variable_assignment) print "ans:", ans if choice_formulas is None: penalized = False if Equals(ans, float(question.answer)).conf > 0.98: correct = True else: correct = False else: idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf) if tv.conf > 0.98: if idx == int(question.answer): correct = True penalized = False else: correct = False penalized = True else: penalized = False correct = False result = SimpleResult(question.key, False, penalized, correct) return result
def func(vector): return sum(evaluate(atom, variable_handler.vector_to_dict(vector)).norm for atom in atoms)