def formula_to_serialized_entities(match_parse, formula, tree, sentence_number): offset = match_parse.graph_parse.core_parse.image_segment_parse.diagram_image_segment.offset grounded_formula = ground_formulas(match_parse, [formula])[0] entities = [] zipped_formula = grounded_formula.zip(tree) for zipped_node in zipped_formula: formula_node, tree_node = zipped_node.nodes if not isinstance(formula_node, FormulaNode): continue if len(formula_node.children) == 1 and not issubtype( formula_node.return_type, 'entity'): formula_node = formula_node.children[0] if issubtype(formula_node.return_type, 'entity'): coords = match_parse.graph_parse.core_parse.evaluate(formula_node) if coords is not None: coords = offset_coords(coords, formula_node.return_type, offset) content = tree_node.content.serialized() content['signature']['return_type'] = formula_node.return_type entity = { "content": content, "coords": serialize_entity(coords), "sentence_number": sentence_number } entities.append(entity) return entities
def val_func(parent_tag_rule, a_tag_rule, b_tag_rule): valence = parent_tag_rule.signature.valence if valence == 2: a = issubtype(a_tag_rule.signature.return_type, parent_tag_rule.signature.arg_types[0]) b = issubtype(b_tag_rule.signature.return_type, parent_tag_rule.signature.arg_types[1]) return a and b else: return False
def val_func(p, a, b): if p.signature.id != "Is": return False if not issubtype(a.signature.return_type, b.signature.return_type) and \ not issubtype(b.signature.return_type, a.signature.return_type): return False if not (issubtype(a.signature.return_type, 'number') or issubtype(a.signature.return_type, 'entity')): return False return BinaryRule.val_func(p, a, b)
def val_func(parent_tag_rule, a_tag_rule, b_tag_rule): valence = parent_tag_rule.signature.valence if valence == 2: a = issubtype(a_tag_rule.signature.return_type, parent_tag_rule.signature.arg_types[0]) b = issubtype(b_tag_rule.signature.return_type, parent_tag_rule.signature.arg_types[1]) return a and b else: return False
def val_func(parent_tag_rule, child_tag_rule): valence = parent_tag_rule.signature.valence if valence == 0: return False elif valence == 1: return issubtype(child_tag_rule.signature.return_type, parent_tag_rule.signature.arg_types[0]) elif valence == 2: c1 = issubtype(child_tag_rule.signature.return_type, parent_tag_rule.signature.arg_types[0]) c2 = issubtype(parent_tag_rule.signature.arg_types[1], 'entity') return c1 and c2 raise Exception()
def val_func(parent_tag_rule, child_tag_rule): valence = parent_tag_rule.signature.valence if valence == 0: return False elif valence == 1: return issubtype(child_tag_rule.signature.return_type, parent_tag_rule.signature.arg_types[0]) elif valence == 2: c1 = issubtype(child_tag_rule.signature.return_type, parent_tag_rule.signature.arg_types[0]) c2 = issubtype(parent_tag_rule.signature.arg_types[1], 'entity') return c1 and c2 raise Exception()
def AreaOf(twod): name = twod.__class__.__name__ assert issubtype(name, 'twod') if name == "circle": center, radius = twod area = np.pi * radius**2 elif issubtype(name, 'polygon'): # http://mathworld.wolfram.com/PolygonArea.html area = area_of_polygon(twod) elif name == 'arc': circle, a, b = twod else: raise Exception() return area
def Is(a, b): if is_number(a) or is_number(b): return Equals(a, b) a_name = a.__class__.__name__ b_name = b.__class__.__name__ if issubtype(a_name, 'polygon') or issubtype(a_name, 'line'): truth = set(a) == set(b) else: truth = a == b if truth: return TruthValue(0) else: return TruthValue(np.inf)
def semantic_tree_to_serialized_entities(match_parse, semantic_tree, sentence_number, value_expr_formulas): offset = match_parse.graph_parse.core_parse.image_segment_parse.diagram_image_segment.offset formula = semantic_tree.to_formula() entities = [] grounded_formula = ground_formulas(match_parse, [formula], value_expr_formulas)[0] zipped_formula = grounded_formula.zip(semantic_tree) for zipped_node in zipped_formula: formula_node, tree_node = zipped_node.nodes if isinstance(formula_node, FormulaNode) and issubtype( formula_node.return_type, 'entity'): coords = match_parse.graph_parse.core_parse.evaluate(formula_node) if coords is not None: coords = offset_coords(coords, tree_node.content.signature.return_type, offset) entity = { "content": tree_node.content.serialized(), "coords": serialize_entity(coords), "sentence_number": sentence_number } entities.append(entity) return entities
def parse_match_formulas(match_parse): assert isinstance(match_parse, MatchParse) match_atoms = [] for label, terms in match_parse.match_dict.iteritems(): for term in terms: assert isinstance(term, FormulaNode) if issubtype(term.return_type, 'entity'): if term.signature.id == "Angle": res = FormulaNode(signatures['Ge'], [FormulaNode(signatures['Pi'], []), FormulaNode(signatures['MeasureOf'], [term])]) match_atoms.append(res) continue # FIXME : to be obtained by tag model left_term = prefix_to_formula(expression_parser.parse_prefix(label)) """ if is_number(label): left_term = FormulaNode(FunctionSignature(label, "number", []), []) else: vs = VariableSignature(label, 'number') left_term = FormulaNode(vs, []) """ atom = FormulaNode(signatures['Equals'], [left_term, term]) match_atoms.append(atom) if term.signature.id == "Div": # TODO : this should be only constrained if the observed angle is < 180 # TODO : In fact, the labeling should be reorganized. (x --> x*\degree) res = FormulaNode(signatures['Ge'], [180, left_term]) match_atoms.append(res) return match_atoms
def offset_coords(coords, type_, offset): coords = list(coords) if issubtype(type_, 'point'): coords = offset_point(coords, offset) elif issubtype(type_, "line"): coords[0] = offset_point(coords[0], offset) coords[1] = offset_point(coords[1], offset) elif issubtype(type_, 'circle'): coords[0] = offset_point(coords[0], offset) elif issubtype(type_, 'arc') or issubtype(type_, 'sector'): coords[0][0] = offset_point(coords[0][0], offset) coords[1] = offset_point(coords[1], offset) coords[2] = offset_point(coords[2], offset) else: coords = [offset_point(point, offset) for point in coords] return coords
def offset_coords(coords, type_, offset): coords = list(coords) if issubtype(type_, 'point'): coords = offset_point(coords, offset) elif issubtype(type_, "line"): coords[0] = offset_point(coords[0], offset) coords[1] = offset_point(coords[1], offset) elif issubtype(type_, 'circle'): coords[0] = offset_point(coords[0], offset) elif issubtype(type_, 'arc') or issubtype(type_, 'sector'): coords[0][0] = offset_point(coords[0][0], offset) coords[1] = offset_point(coords[1], offset) coords[2] = offset_point(coords[2], offset) else: coords = [offset_point(point, offset) for point in coords] return coords
def is_valid_relation(parent_signature, child_signature, index): parent_type = parent_signature.arg_types[index] child_type = child_signature.return_type if parent_type[0] != "*" and child_type == "*": return False if parent_type[0] == "*": parent_type = parent_type[1:] if child_type[0] == "*": child_type = child_type[1:] return issubtype(child_type, parent_type)
def Tangent(line, twod): name = twod.__class__.__name__ if name == "circle": d = perpendicular_distance_between_line_and_point(line, twod.center) return Equals(d, twod.radius) elif issubtype(name, 'polygon'): out = reduce(operator.__or__, (PointLiesOnLine(point, line) for point in twod), False) return out raise Exception()
def is_valid_relation(parent_signature, child_signature, index): parent_type = parent_signature.arg_types[index] child_type = child_signature.return_type if parent_type[0] != "*" and child_type == "*": return False if parent_type[0] == "*": parent_type = parent_type[1:] if child_type[0] == "*": child_type = child_type[1:] return issubtype(child_type, parent_type)
def formula_to_serialized_entities(match_parse, formula, tree, sentence_number): offset = match_parse.graph_parse.core_parse.image_segment_parse.diagram_image_segment.offset grounded_formula = ground_formulas(match_parse, [formula])[0] entities = [] zipped_formula = grounded_formula.zip(tree) for zipped_node in zipped_formula: formula_node, tree_node = zipped_node.nodes if not isinstance(formula_node, FormulaNode): continue if len(formula_node.children) == 1 and not issubtype(formula_node.return_type, 'entity'): formula_node = formula_node.children[0] if issubtype(formula_node.return_type, 'entity'): coords = match_parse.graph_parse.core_parse.evaluate(formula_node) if coords is not None: coords = offset_coords(coords, formula_node.return_type, offset) content = tree_node.content.serialized() content['signature']['return_type'] = formula_node.return_type entity = {"content": content, "coords": serialize_entity(coords), "sentence_number": sentence_number} entities.append(entity) return entities
def get_semantic_trees_by_type(self, return_type, terminator=None): roots = [ node for node in self.node_dict.values() if issubtype(node.tag_rule.signature.return_type, return_type) and node.tag_rule.signature.return_type != 'ground' ] semantic_trees = set( itertools.chain(*[ self.get_semantic_trees_by_node(root, terminator) for root in roots ])) return semantic_trees
def IsCenterOf(point, twod): name = twod.__class__.__name__ if name == 'circle': return Equals(point[0], twod.center[0]) & Equals( point[1], twod.center[1]) elif issubtype(name, 'polygon'): distances = [distance_between_points(point, each) for each in twod] reg = IsRegular(twod) out = reduce(operator.__and__, (Equals(distances[index - 1], distance) for index, distance in enumerate(distances)), True) return reg & out else: raise Exception()
def formula_to_semantic_tree(formula, syntax_parse, span): """ Create dummy semantic tree where each tag's syntax Parse and span is given :param formula: :param index: :return: """ assert isinstance(formula, FormulaNode) if issubtype(formula.signature.return_type, 'entity'): new_sig = VariableSignature(formula.signature.id, formula.signature.return_type, name='temp') tag_rule = TagRule(syntax_parse, span, new_sig) return SemanticTreeNode(tag_rule, []) tag_rule = TagRule(syntax_parse, span, formula.signature) children = [formula_to_semantic_tree(child, syntax_parse, span) for child in formula.children] semantic_tree = SemanticTreeNode(tag_rule, children) return semantic_tree
def semantic_tree_to_serialized_entities(match_parse, semantic_tree, sentence_number, value_expr_formulas): offset = match_parse.graph_parse.core_parse.image_segment_parse.diagram_image_segment.offset formula = semantic_tree.to_formula() entities = [] grounded_formula = ground_formulas(match_parse, [formula], value_expr_formulas)[0] zipped_formula = grounded_formula.zip(semantic_tree) for zipped_node in zipped_formula: formula_node, tree_node = zipped_node.nodes if isinstance(formula_node, FormulaNode) and issubtype(formula_node.return_type, 'entity'): coords = match_parse.graph_parse.core_parse.evaluate(formula_node) if coords is not None: coords = offset_coords(coords, tree_node.content.signature.return_type, offset) entity = {"content": tree_node.content.serialized(), "coords": serialize_entity(coords), "sentence_number": sentence_number} entities.append(entity) return entities
def map(self, tr): assert isinstance(tr, TagRule) sp = tr.syntax_parse out = [] for ref_rt in self.return_type_set: out.append(int(issubtype(tr.signature.return_type, ref_rt))) for ref_pos in self.pos_set: out.append(int(ref_pos == sp.get_pos_by_span(tr.span))) nbrs = sp.get_neighbors(tr.span) pairs = set((rel, sp.get_word(key)) for key, rel in nbrs.iteritems()) for key_rel in self.key_rels: for nbr in self.key_nbrs: pair = (key_rel, nbr) out.append(int(pair in pairs)) return tuple(out)
def map(self, tr): assert isinstance(tr, TagRule) sp = tr.syntax_parse out = [] for ref_rt in self.return_type_set: out.append(int(issubtype(tr.signature.return_type, ref_rt))) for ref_pos in self.pos_set: out.append(int(ref_pos == sp.get_pos_by_span(tr.span))) nbrs = sp.get_neighbors(tr.span) pairs = set((rel, sp.get_word(key)) for key, rel in nbrs.iteritems()) for key_rel in self.key_rels: for nbr in self.key_nbrs: pair = (key_rel, nbr) out.append(int(pair in pairs)) return tuple(out)
def formula_to_semantic_tree(formula, syntax_parse, span): """ Create dummy semantic tree where each tag's syntax Parse and span is given :param formula: :param index: :return: """ assert isinstance(formula, FormulaNode) if issubtype(formula.signature.return_type, 'entity'): new_sig = VariableSignature(formula.signature.id, formula.signature.return_type, name='temp') tag_rule = TagRule(syntax_parse, span, new_sig) return SemanticTreeNode(tag_rule, []) tag_rule = TagRule(syntax_parse, span, formula.signature) children = [ formula_to_semantic_tree(child, syntax_parse, span) for child in formula.children ] semantic_tree = SemanticTreeNode(tag_rule, children) return semantic_tree
def evaluate(formula, assignment): if not isinstance(formula, Node): return formula if not formula.is_grounded(assignment.keys()): return None if isinstance(formula, SetNode): if issubtype(formula.head.return_type, 'boolean'): out = reduce(operator.__and__, (evaluate(child, assignment) for child in formula.children), True) return out return formula if isinstance(formula.signature, VariableSignature): return assignment[formula.signature.id] elif is_number(formula.signature.id): return float(formula.signature.id) else: evaluated_args = [] for arg in formula.children: if isinstance(arg, FormulaNode): evaluated_args.append(evaluate(arg, assignment)) elif isinstance(arg, SetNode): evaluated_args.append( SetNode([ evaluate(arg_arg, assignment) for arg_arg in arg.children ])) else: evaluated_args.append(arg) # FIXME : rather than try/catch, check type matching try: out = getattr(this, formula.signature.id)(*evaluated_args) return out except: return TruthValue(np.inf)
def parse_match_formulas(match_parse): assert isinstance(match_parse, MatchParse) match_atoms = [] for label, terms in match_parse.match_dict.iteritems(): for term in terms: assert isinstance(term, FormulaNode) if issubtype(term.return_type, 'entity'): if term.signature.id == "Angle": res = FormulaNode(signatures['Ge'], [ FormulaNode(signatures['Pi'], []), FormulaNode(signatures['MeasureOf'], [term]) ]) match_atoms.append(res) continue # FIXME : to be obtained by tag model left_term = prefix_to_formula( expression_parser.parse_prefix(label)) """ if is_number(label): left_term = FormulaNode(FunctionSignature(label, "number", []), []) else: vs = VariableSignature(label, 'number') left_term = FormulaNode(vs, []) """ atom = FormulaNode(signatures['Equals'], [left_term, term]) match_atoms.append(atom) if term.signature.id == "Div": # TODO : this should be only constrained if the observed angle is < 180 # TODO : In fact, the labeling should be reorganized. (x --> x*\degree) res = FormulaNode(signatures['Ge'], [180, left_term]) match_atoms.append(res) return match_atoms
def parse_match_from_known_labels(graph_parse, known_labels): assert isinstance(graph_parse, GraphParse) match_dict = {} point_key_dict = {} offset = graph_parse.image_segment_parse.diagram_image_segment.offset for idx, d in enumerate(known_labels): label = d['label'] x = d['x'] - offset[0] y = d['y'] - offset[1] label_point = instantiators['point'](x, y) type_ = d['type'] arr = type_.split(' ') if len(arr) > 1: type_ = arr[-1] # Find closest type_ instance's key in graph_parse instances = get_all_instances(graph_parse, type_) if len(instances) == 0: logging.error("no instance found of type %s" % type_) continue if len(arr) > 1 and type_ == 'line' and arr[0] == 'length': distances = [(key, label_distance_to_line(label_point, instance, True)) for key, instance in instances.iteritems()] elif type_ == 'line': distances = [(key, label_distance_to_line(label_point, instance, False)) for key, instance in instances.iteritems()] elif type_ == 'point': distances = [(key, label_distance_to_point(label_point, instance)) for key, instance in instances.iteritems()] elif type_ == 'arc': distances = [(key, label_distance_to_arc(label_point, instance)) for key, instance in instances.iteritems()] elif type_ == 'angle': # filter subangles # instances = {key: value for key, value in instances.iteritems() if all(x == value or not is_subangle(x, value) for x in instances.values())} distances = [(key, label_distance_to_angle(label_point, instance)) for key, instance in instances.iteritems()] # Then use the key to get corresponding variable in general graph # Wrap the general instance in function nod3. If there are extra prefixes, add these as well the formula argmin_key = min(distances, key=lambda pair: pair[1])[0] if type_ == 'line': a_key, b_key = argmin_key a_point = graph_parse.point_variables[a_key] b_point = graph_parse.point_variables[b_key] formula = FormulaNode(signatures['Line'], [a_point, b_point]) if len(arr) > 1 and arr[0] == 'length': formula = FormulaNode(signatures['LengthOf'], [formula]) elif type_ == 'point': formula = graph_parse.point_variables[argmin_key] point_key_dict[label] = argmin_key elif type_ == 'angle': a_key, b_key, c_key = argmin_key a_point = graph_parse.point_variables[a_key] b_point = graph_parse.point_variables[b_key] c_point = graph_parse.point_variables[c_key] formula = FormulaNode(signatures['Angle'], [a_point, b_point, c_point]) if len(arr) > 1 and arr[0] == 'angle': formula = FormulaNode(signatures['MeasureOf'], [formula]) formula = FormulaNode( signatures['Div'], [formula, FormulaNode(signatures['Degree'], [])]) elif type_ == 'arc': (center_key, radius_key), a_key, b_key = argmin_key center_point = graph_parse.point_variables[center_key] radius = graph_parse.radius_variables[center_key][radius_key] circle = FormulaNode(signatures['Circle'], [center_point, radius]) a_point = graph_parse.point_variables[a_key] b_point = graph_parse.point_variables[b_key] formula = FormulaNode(signatures['Arc'], [circle, a_point, b_point]) if label not in match_dict: match_dict[label] = [] elif issubtype(formula.return_type, 'entity'): raise Exception() match_dict[label].append(formula) match_parse = MatchParse(graph_parse, match_dict, point_key_dict) return match_parse
def get_semantic_trees_by_type(self, return_type, terminator=None): roots = [node for node in self.node_dict.values() if issubtype(node.tag_rule.signature.return_type, return_type) and node.tag_rule.signature.return_type != 'ground'] semantic_trees = set(itertools.chain(*[self.get_semantic_trees_by_node(root, terminator) for root in roots])) return semantic_trees
def parse_match_from_known_labels(graph_parse, known_labels): assert isinstance(graph_parse, GraphParse) match_dict = {} point_key_dict = {} offset = graph_parse.image_segment_parse.diagram_image_segment.offset for idx, d in enumerate(known_labels): label = d['label'] x = d['x'] - offset[0] y = d['y'] - offset[1] label_point = instantiators['point'](x, y) type_ = d['type'] arr = type_.split(' ') if len(arr) > 1: type_ = arr[-1] # Find closest type_ instance's key in graph_parse instances = get_all_instances(graph_parse, type_) if len(instances) == 0: logging.error("no instance found of type %s" % type_) continue if len(arr) > 1 and type_ == 'line' and arr[0] == 'length': distances = [(key, label_distance_to_line(label_point, instance, True)) for key, instance in instances.iteritems()] elif type_ == 'line': distances = [(key, label_distance_to_line(label_point, instance, False)) for key, instance in instances.iteritems()] elif type_ == 'point': distances = [(key, label_distance_to_point(label_point, instance)) for key, instance in instances.iteritems()] elif type_ == 'arc': distances = [(key, label_distance_to_arc(label_point, instance)) for key, instance in instances.iteritems()] elif type_ == 'angle': # filter subangles # instances = {key: value for key, value in instances.iteritems() if all(x == value or not is_subangle(x, value) for x in instances.values())} distances = [(key, label_distance_to_angle(label_point, instance)) for key, instance in instances.iteritems()] # Then use the key to get corresponding variable in general graph # Wrap the general instance in function nod3. If there are extra prefixes, add these as well the formula argmin_key = min(distances, key=lambda pair: pair[1])[0] if type_ == 'line': a_key, b_key = argmin_key a_point = graph_parse.point_variables[a_key] b_point = graph_parse.point_variables[b_key] formula = FormulaNode(signatures['Line'], [a_point, b_point]) if len(arr) > 1 and arr[0] == 'length': formula = FormulaNode(signatures['LengthOf'], [formula]) elif type_ == 'point': formula = graph_parse.point_variables[argmin_key] point_key_dict[label] = argmin_key elif type_ == 'angle': a_key, b_key, c_key = argmin_key a_point = graph_parse.point_variables[a_key] b_point = graph_parse.point_variables[b_key] c_point = graph_parse.point_variables[c_key] formula = FormulaNode(signatures['Angle'], [a_point, b_point, c_point]) if len(arr) > 1 and arr[0] == 'angle': formula = FormulaNode(signatures['MeasureOf'], [formula]) formula = FormulaNode(signatures['Div'], [formula, FormulaNode(signatures['Degree'], [])]) elif type_ == 'arc': (center_key, radius_key), a_key, b_key = argmin_key center_point = graph_parse.point_variables[center_key] radius = graph_parse.radius_variables[center_key][radius_key] circle = FormulaNode(signatures['Circle'], [center_point, radius]) a_point = graph_parse.point_variables[a_key] b_point = graph_parse.point_variables[b_key] formula = FormulaNode(signatures['Arc'], [circle, a_point, b_point]) if len(arr) > 0 and arr[0] == 'angle': formula = FormulaNode(signatures['MeasureOf'], [formula]) formula = FormulaNode(signatures['Div'], [formula, FormulaNode(signatures['Degree'], [])]) if label not in match_dict: match_dict[label] = [] elif issubtype(formula.return_type, 'entity'): raise Exception() match_dict[label].append(formula) match_parse = MatchParse(graph_parse, match_dict, point_key_dict) return match_parse