def test_lisp_to_nested_expression(self): logical_form = u"((reverse fb:row.row.year) (fb:row.row.league fb:cell.usl_a_league))" expression = util.lisp_to_nested_expression(logical_form) assert expression == [[[u'reverse', u'fb:row.row.year'], [u'fb:row.row.league', u'fb:cell.usl_a_league']]] logical_form = u"(count (and (division 1) (tier (!= null))))" expression = util.lisp_to_nested_expression(logical_form) assert expression == [[u'count', [u'and', [u'division', u'1'], [u'tier', [u'!=', u'null']]]]]
def test_lisp_to_nested_expression(self): logical_form = "((reverse fb:row.row.year) (fb:row.row.league fb:cell.usl_a_league))" expression = util.lisp_to_nested_expression(logical_form) assert expression == [['reverse', 'fb:row.row.year'], ['fb:row.row.league', 'fb:cell.usl_a_league']] logical_form = "(count (and (division 1) (tier (!= null))))" expression = util.lisp_to_nested_expression(logical_form) assert expression == ['count', ['and', ['division', '1'], ['tier', ['!=', 'null']]]]
def test_lisp_to_nested_expression(self): logical_form = "((reverse fb:row.row.year) (fb:row.row.league fb:cell.usl_a_league))" expression = util.lisp_to_nested_expression(logical_form) assert expression == [ ["reverse", "fb:row.row.year"], ["fb:row.row.league", "fb:cell.usl_a_league"], ] logical_form = "(count (and (division 1) (tier (!= null))))" expression = util.lisp_to_nested_expression(logical_form) assert expression == [ "count", ["and", ["division", "1"], ["tier", ["!=", "null"]]] ]
def logical_form_to_action_sequence(self, logical_form: str) -> List[str]: """ Converts a logical form into a linearization of the production rules from its abstract syntax tree. The linearization is top-down, depth-first. Each production rule is formatted as "LHS -> RHS", where "LHS" is a single non-terminal type, and RHS is either a terminal or a list of non-terminals (other possible values for RHS in a more general context-free grammar are not produced by our grammar induction logic). Non-terminals are `types` in the grammar, either basic types (like ``int``, ``str``, or some class that you define), or functional types, represented with angle brackets with a colon separating arguments from the return type. Multi-argument functions have commas separating their argument types. For example, ``<int:int>`` is a function that takes an integer and returns an integer, and ``<int,int:int>`` is a function that takes two integer arguments and returns an integer. As an example translation from logical form to complete action sequence, the logical form ``(add 2 3)`` would be translated to ``['@start@ -> int', 'int -> [<int,int:int>, int, int]', '<int,int:int> -> add', 'int -> 2', 'int -> 3']``. """ expression = util.lisp_to_nested_expression(logical_form) try: transitions, start_type = self._get_transitions(expression, expected_type=None) if self._start_types and start_type not in self._start_types: raise ParsingError(f"Expression had unallowed start type of {start_type}: {expression}") except ParsingError: logger.error(f'Error parsing logical form: {logical_form}') raise transitions.insert(0, f'@start@ -> {start_type}') return transitions
def execute(self, logical_form: str): """Executes a logical form, using whatever predicates you have defined.""" if not hasattr(self, '_functions'): raise RuntimeError("You must call super().__init__() in your Language constructor") logical_form = logical_form.replace(",", " ") expression = util.lisp_to_nested_expression(logical_form) return self._execute_expression(expression)
def parse_logical_form(self, logical_form: str, remove_var_function: bool = True) -> Expression: """ Takes a logical form as a string, maps its tokens using the mapping and returns a parsed expression. Parameters ---------- logical_form : ``str`` Logical form to parse remove_var_function : ``bool`` (optional) ``var`` is a special function that some languages use within lambda functions to indicate the usage of a variable. If your language uses it, and you do not want to include it in the parsed expression, set this flag. You may want to do this if you are generating an action sequence from this parsed expression, because it is easier to let the decoder not produce this function due to the way constrained decoding is currently implemented. """ if not logical_form.startswith("("): logical_form = f"({logical_form})" if remove_var_function: # Replace "(x)" with "x" logical_form = re.sub(r'\(([x-z])\)', r'\1', logical_form) # Replace "(var x)" with "(x)" logical_form = re.sub(r'\(var ([x-z])\)', r'(\1)', logical_form) parsed_lisp = semparse_util.lisp_to_nested_expression(logical_form) translated_string = self._process_nested_expression(parsed_lisp) type_signature = self.local_type_signatures.copy() type_signature.update(self.global_type_signatures) return self._logic_parser.parse(translated_string, signature=type_signature)
def parse_logical_form(self, logical_form: str, remove_var_function: bool = True) -> Expression: """ Takes a logical form as a string, maps its tokens using the mapping and returns a parsed expression. Parameters ---------- logical_form : ``str`` Logical form to parse remove_var_function : ``bool`` (optional) ``var`` is a special function that some languages use within lambda founctions to indicate the usage of a variable. If your language uses it, and you do not want to include it in the parsed expression, set this flag. You may want to do this if you are generating an action sequence from this parsed expression, because it is easier to let the decoder not produce this function due to the way constrained decoding is currently implemented. """ if not logical_form.startswith("("): logical_form = f"({logical_form})" if remove_var_function: # Replace "(x)" with "x" logical_form = re.sub(r'\(([x-z])\)', r'\1', logical_form) # Replace "(var x)" with "(x)" logical_form = re.sub(r'\(var ([x-z])\)', r'(\1)', logical_form) parsed_lisp = semparse_util.lisp_to_nested_expression(logical_form) translated_string = self._process_nested_expression(parsed_lisp) type_signature = self.local_type_signatures.copy() type_signature.update(self.global_type_signatures) return self._logic_parser.parse(translated_string, signature=type_signature)
def execute(self, logical_form: str) -> Any: if not logical_form.startswith("("): logical_form = f"({logical_form})" logical_form = logical_form.replace(",", " ") expression_as_list = semparse_util.lisp_to_nested_expression(logical_form) result = self._handle_expression(expression_as_list) return result
def execute(self, logical_form: str) -> bool: """ Execute the logical form. The top level function is an assertion function (see below). We just parse the string into a list and pass the whole thing to ``_execute_assertion`` and let the method deal with it. This is because the dataset contains sentences (instead of questions), and they evaluate to either true or false. The language we defined here contains six types of functions, five of which return sets, and one returns booleans. 1) Assertion Function : These occur only at the root node of the logical form trees. They take a set of entities, and compare their attributes to a given value, and return true or false. The entities they take can be boxes or objects. If the assertion function takes objects, it may compare their colors or shapes with the given value; If it takes boxes, the attributes it compares are only the counts. The comparison operator can be any of equals, not equals, greater than, etc. So, the function specifies what kind of entities it takes, the attribute being compared and the comparison operator. For example, "object_count_not_equals" takes a set of objects, compares their count to the given value and returns true iff they are not equal. They have names like "object_*" or "box_*" 2) Object Attribute Functions: They take sets of objects and return sets of attributes. `color` and `shape` are the attribute functions. 3) Box Membership Function : This takes a box as an argument and returns the objects in it. This is a special kind of attribute function for two reasons. Firstly, it returns a set of objects instead of attributes, and secondly it occurs only within the second argument of a box filtering function (see below). It provides a way to query boxes based on the attributes of objects contained within it. The function is called ``object_in_box``, and it gets executed within ``_execute_box_filter``. 4) Box Filtering Functions : These are of the form `filter(set_of_boxes, attribute_function, target_attribute)` The idea is that we take a set of boxes, an attribute function that extracts the relevant attribute from a box, and a target attribute that we compare against. The logic is that we execute the attribute function on `each` of the given boxes and return only those whose attribute value, in comparison with the target attribute, satisfies the filtering criterion (i.e., equal to the target, less than, greater than etc.). The fitering function defines the comparison operator. All the functions in this class with names ``filter_*`` belong to this category. 5) Object Filtering Functions : These are of the form ``filter(set_of_objects)``. These are similar to box filtering functions, but they operate on objects instead. Also, note that they take just one argument instead of three. This is because while box filtering functions typically query complex attributes, object filtering functions query the properties of the objects alone. These are simple and finite in number. Thus, we essentially let the filtering function define the attribute function, and the target attribute as well, along with the comparison operator. That is, these are functions like `black` (which takes a set of objects, and returns those whose "color" (attribute function) "equals" (comparison operator) "black" (target attribute)), or "square" (which returns objects that are squares). 6) Negate Object Filter : Takes an object filter and a set of objects and applies the negation of the object filter on the set. """ if not logical_form.startswith("("): logical_form = "(%s)" % logical_form logical_form = logical_form.replace(",", " ") expression_as_list = semparse_util.lisp_to_nested_expression(logical_form) # The whole expression has to be an assertion expression because it has to return a boolean. # TODO(pradeep): May want to make this more general and let the executor deal with questions. return self._execute_assertion(expression_as_list)
def same_logical_form(form1: str, form2: str) -> bool: if form1.__contains__("@@UNKNOWN@@") or form2.__contains__("@@UNKNOWN@@"): return False try: G1 = logical_form_to_graph(lisp_to_nested_expression(form1)) except Exception: return False try: G2 = logical_form_to_graph(lisp_to_nested_expression(form2)) except Exception: return False def node_match(n1, n2): if n1['id'] == n2['id'] and n1['type'] == n2['type']: func1 = n1.pop('function', 'none') func2 = n2.pop('function', 'none') tc1 = n1.pop('tc', 'none') tc2 = n2.pop('tc', 'none') if func1 == func2 and tc1 == tc2: return True else: return False # if 'function' in n1 and 'function' in n2 and n1['function'] == n2['function']: # return True # elif 'function' not in n1 and 'function' not in n2: # return True # else: # return False else: return False def multi_edge_match(e1, e2): if len(e1) != len(e2): return False values1 = [] values2 = [] for v in e1.values(): values1.append(v['relation']) for v in e2.values(): values2.append(v['relation']) return sorted(values1) == sorted(values2) return nx.is_isomorphic(G1, G2, node_match=node_match, edge_match=multi_edge_match)
def execute(self, logical_form: str) -> Any: if not logical_form.startswith("("): logical_form = f"({logical_form})" logical_form = logical_form.replace(",", " ") expression_as_list = semparse_util.lisp_to_nested_expression( logical_form) result = self._handle_expression(expression_as_list) return result
def execute(self, logical_form: str) -> bool: if not logical_form.startswith("("): logical_form = f"({logical_form})" logical_form = logical_form.replace(",", " ") expression_as_list = semparse_util.lisp_to_nested_expression(logical_form) # Expression list has an additional level of # nesting at the top. result = self._handle_expression(expression_as_list) return result
def get_explanation(logical_form: str, world_extractions: JsonDict, answer_index: int, world: QuarelWorld) -> List[JsonDict]: """ Create explanation (as a list of header/content entries) for an answer """ output = [] nl_world = {} if world_extractions['world1'] != "N/A" and world_extractions['world1'] != ["N/A"]: nl_world['world1'] = nl_world_string(world_extractions['world1']) nl_world['world2'] = nl_world_string(world_extractions['world2']) output.append({ "header": "Identified two worlds", "content": [f'''world1 = {nl_world['world1']}''', f'''world2 = {nl_world['world2']}'''] }) else: nl_world['world1'] = 'world1' nl_world['world2'] = 'world2' parse = semparse_util.lisp_to_nested_expression(logical_form) if parse[0][0] != "infer": return None setup = parse[0][1] output.append({ "header": "The question is stating", "content": nl_arg(setup, nl_world) }) answers = parse[0][2:] output.append({ "header": "The answer options are stating", "content": ["A: " + " and ".join(nl_arg(answers[0], nl_world)), "B: " + " and ".join(nl_arg(answers[1], nl_world))] }) setup_core = setup if setup[0] == 'and': setup_core = setup[1] s_attr = setup_core[0] s_dir = world.qr_size[setup_core[1]] s_world = nl_world[setup_core[2]] a_attr = answers[answer_index][0] qr_dir = world._get_qr_coeff(strip_entity_type(s_attr), strip_entity_type(a_attr)) # pylint: disable=protected-access a_dir = s_dir * qr_dir a_world = nl_world[answers[answer_index][2]] content = [f'When {nl_attr(s_attr)} is {nl_dir(s_dir)} ' + f'then {nl_attr(a_attr)} is {nl_dir(a_dir)} (for {s_world})'] if a_world != s_world: content.append(f'''Therefore {nl_attr(a_attr)} is {nl_dir(-a_dir)} for {a_world}''') content.append(f"Therefore {chr(65+answer_index)} is the correct answer") output.append({ "header": "Theory used", "content": content }) return output
def execute(self, logical_form: str) -> Any: if not logical_form.startswith("("): logical_form = f"({logical_form})" logical_form = logical_form.replace(",", " ") expression_as_list = semparse_util.lisp_to_nested_expression(logical_form) # Expression list has an additional level of # nesting at the top. For example, if the # logical form is # "(select all_rows fb:row.row.league)", # the expression list will be # [['select', 'all_rows', 'fb:row.row.league']]. # Removing the top most level of nesting. result = self._handle_expression(expression_as_list[0]) return result
def execute(self, lf_raw: str) -> int: """ Very basic model for executing friction logical forms. For now returns answer index (or -1 if no answer can be concluded) """ # Remove "a:" prefixes from attributes (hack) logical_form = re.sub(r"\(a:", r"(", lf_raw) parse = semparse_util.lisp_to_nested_expression(logical_form) if len(parse) < 1 and len(parse[0]) < 2: return -1 if parse[0][0] == 'infer': args = [self._exec_and(arg) for arg in parse[0][1:]] if None in args: return -1 return self._exec_infer(*args) return -1
def lisp_to_sparql(lisp_program: str): clauses = [] order_clauses = [] entities = set() # collect entites for filtering # identical_variables = {} # key should be smaller than value, we will use small variable to replace large variable identical_variables_r = {} # key should be larger than value expression = util.lisp_to_nested_expression(lisp_program) superlative = False if expression[0] in ['ARGMAX', 'ARGMIN']: superlative = True # remove all joins in relation chain of an arg function. In another word, we will not use arg function as # binary function here, instead, the arity depends on the number of relations in the second argument in the # original function if isinstance(expression[2], list): def retrieve_relations(exp: list): rtn = [] for element in exp: if element == 'JOIN': continue elif isinstance(element, str): rtn.append(element) elif isinstance(element, list) and element[0] == 'R': rtn.append(element) elif isinstance(element, list) and element[0] == 'JOIN': rtn.extend(retrieve_relations(element)) return rtn relations = retrieve_relations(expression[2]) expression = expression[:2] expression.extend(relations) sub_programs = _linearize_lisp_expression(expression, [0]) question_var = len(sub_programs) - 1 count = False def get_root(var: int): while var in identical_variables_r: var = identical_variables_r[var] return var for i, subp in enumerate(sub_programs): i = str(i) if subp[0] == 'JOIN': if isinstance(subp[1], list): # R relation if subp[2][:2] in ["m.", "g."]: # entity clauses.append("ns:" + subp[2] + " ns:" + subp[1][1] + " ?x" + i + " .") entities.add(subp[2]) elif subp[2][0] == '#': # variable clauses.append("?x" + subp[2][1:] + " ns:" + subp[1][1] + " ?x" + i + " .") else: # literal (actually I think literal can only be object) if subp[2].__contains__('^^'): data_type = subp[2].split("^^")[1].split("#")[1] if data_type not in ['integer', 'float', 'dateTime']: subp[ 2] = f'"{subp[2].split("^^")[0] + "-08:00"}"^^<{subp[2].split("^^")[1]}>' # subp[2] = subp[2].split("^^")[0] + '-08:00^^' + subp[2].split("^^")[1] else: subp[ 2] = f'"{subp[2].split("^^")[0]}"^^<{subp[2].split("^^")[1]}>' clauses.append(subp[2] + " ns:" + subp[1][1] + " ?x" + i + " .") else: if subp[2][:2] in ["m.", "g."]: # entity clauses.append("?x" + i + " ns:" + subp[1] + " ns:" + subp[2] + " .") entities.add(subp[2]) elif subp[2][0] == '#': # variable clauses.append("?x" + i + " ns:" + subp[1] + " ?x" + subp[2][1:] + " .") else: # literal if subp[2].__contains__('^^'): data_type = subp[2].split("^^")[1].split("#")[1] if data_type not in ['integer', 'float', 'dateTime']: subp[ 2] = f'"{subp[2].split("^^")[0] + "-08:00"}"^^<{subp[2].split("^^")[1]}>' else: subp[ 2] = f'"{subp[2].split("^^")[0]}"^^<{subp[2].split("^^")[1]}>' clauses.append("?x" + i + " ns:" + subp[1] + " " + subp[2] + " .") elif subp[0] == 'AND': var1 = int(subp[2][1:]) rooti = get_root(int(i)) root1 = get_root(var1) if rooti > root1: identical_variables_r[rooti] = root1 else: identical_variables_r[root1] = rooti root1 = rooti # identical_variables[var1] = int(i) if subp[1][0] == "#": var2 = int(subp[1][1:]) root2 = get_root(var2) # identical_variables[var2] = int(i) if root1 > root2: # identical_variables[var2] = var1 identical_variables_r[root1] = root2 else: # identical_variables[var1] = var2 identical_variables_r[root2] = root1 else: # 2nd argument is a class clauses.append("?x" + i + " ns:type.object.type ns:" + subp[1] + " .") elif subp[0] in ['le', 'lt', 'ge', 'gt']: # the 2nd can only be numerical value clauses.append("?x" + i + " ns:" + subp[1] + " ?y" + i + " .") if subp[0] == 'le': op = "<=" elif subp[0] == 'lt': op = "<" elif subp[0] == 'ge': op = ">=" else: op = ">" if subp[2].__contains__('^^'): data_type = subp[2].split("^^")[1].split("#")[1] if data_type not in ['integer', 'float', 'dateTime']: subp[ 2] = f'"{subp[2].split("^^")[0] + "-08:00"}"^^<{subp[2].split("^^")[1]}>' else: subp[ 2] = f'"{subp[2].split("^^")[0]}"^^<{subp[2].split("^^")[1]}>' clauses.append(f"FILTER (?y{i} {op} {subp[2]})") elif subp[0] == 'TC': var = int(subp[1][1:]) # identical_variables[var] = int(i) rooti = get_root(int(i)) root_var = get_root(var) if rooti > root_var: identical_variables_r[rooti] = root_var else: identical_variables_r[root_var] = rooti year = subp[3] if year == 'NOW': from_para = '"2015-08-10"^^xsd:dateTime' to_para = '"2015-08-10"^^xsd:dateTime' else: from_para = f'"{year}-12-31"^^xsd:dateTime' to_para = f'"{year}-01-01"^^xsd:dateTime' clauses.append( f'FILTER(NOT EXISTS {{?x{i} ns:{subp[2]} ?sk0}} || ') clauses.append(f'EXISTS {{?x{i} ns:{subp[2]} ?sk1 . ') clauses.append(f'FILTER(xsd:datetime(?sk1) <= {from_para}) }})') if subp[2][-4:] == "from": clauses.append( f'FILTER(NOT EXISTS {{?x{i} ns:{subp[2][:-4] + "to"} ?sk2}} || ' ) clauses.append( f'EXISTS {{?x{i} ns:{subp[2][:-4] + "to"} ?sk3 . ') else: # from_date -> to_date clauses.append( f'FILTER(NOT EXISTS {{?x{i} ns:{subp[2][:-9] + "to_date"} ?sk2}} || ' ) clauses.append( f'EXISTS {{?x{i} ns:{subp[2][:-9] + "to_date"} ?sk3 . ') clauses.append(f'FILTER(xsd:datetime(?sk3) >= {to_para}) }})') elif subp[0] in ["ARGMIN", "ARGMAX"]: superlative = True if subp[1][0] == '#': var = int(subp[1][1:]) rooti = get_root(int(i)) root_var = get_root(var) # identical_variables[var] = int(i) if rooti > root_var: identical_variables_r[rooti] = root_var else: identical_variables_r[root_var] = rooti else: # arg1 is class clauses.append(f'?x{i} ns:type.object.type ns:{subp[1]} .') if len(subp) == 3: clauses.append(f'?x{i} ns:{subp[2]} ?sk0 .') elif len(subp) > 3: for j, relation in enumerate(subp[2:-1]): if j == 0: var0 = f'x{i}' else: var0 = f'c{j - 1}' var1 = f'c{j}' if isinstance(relation, list) and relation[0] == 'R': clauses.append(f'?{var1} ns:{relation[1]} ?{var0} .') else: clauses.append(f'?{var0} ns:{relation} ?{var1} .') clauses.append(f'?c{j} ns:{subp[-1]} ?sk0 .') if subp[0] == 'ARGMIN': order_clauses.append("ORDER BY ?sk0") elif subp[0] == 'ARGMAX': order_clauses.append("ORDER BY DESC(?sk0)") order_clauses.append("LIMIT 1") elif subp[ 0] == 'COUNT': # this is easy, since it can only be applied to the quesiton node var = int(subp[1][1:]) root_var = get_root(var) identical_variables_r[int( i)] = root_var # COUNT can only be the outtermost count = True # Merge identical variables for i in range(len(clauses)): for k in identical_variables_r: clauses[i] = clauses[i].replace(f'?x{k} ', f'?x{get_root(k)} ') question_var = get_root(question_var) for i in range(len(clauses)): clauses[i] = clauses[i].replace(f'?x{question_var} ', f'?x ') if superlative: arg_clauses = clauses[:] for entity in entities: clauses.append(f'FILTER (?x != ns:{entity})') clauses.insert( 0, f"FILTER (!isLiteral(?x) OR lang(?x) = '' OR langMatches(lang(?x), 'en'))" ) clauses.insert(0, "WHERE {") if count: clauses.insert(0, f"SELECT COUNT DISTINCT ?x") elif superlative: clauses.insert(0, "{SELECT ?sk0") clauses = arg_clauses + clauses clauses.insert(0, "WHERE {") clauses.insert(0, f"SELECT DISTINCT ?x") else: clauses.insert(0, f"SELECT DISTINCT ?x") clauses.insert(0, "PREFIX ns: <http://rdf.freebase.com/ns/>") clauses.append('}') clauses.extend(order_clauses) if superlative: clauses.append('}') clauses.append('}') # for clause in clauses: # print(clause) return '\n'.join(clauses)