def _turn_clause_to_interim_repr(clause: Clause, suffix: str = "_x"): head_vars = dict([ (v, ind) for ind, v in enumerate(clause.get_head().get_variables()) ]) return [ tuple([a.get_predicate()] + [ head_vars[t] if isinstance(t, Variable) and t in head_vars else t for t in a.get_terms() ]) for a in clause.get_literals() ]
def skolemize(clause: Clause) -> Clause: # Find all variables in clause vars = clause.get_variables() # Map from X,Y,Z,... -> sk0,sk1,sk2,... subst = { vars[i]: Constant(f"sk{i}", c_type("thing")) for i in range(len(vars)) } # Apply this substitution to create new clause without quantifiers return clause.substitute(subst), subst
def _instantiate_var_clause(clause: Clause, constant: Constant): """ Returns all clauses generated by substituting a `Variable` in `clause` with `constant`. """ if isinstance(clause, Body): suitable_vars = clause.get_variables() else: suitable_vars = clause.get_body_variables() candidates = [] for var in suitable_vars: candidates.append(clause.substitute({var:constant})) # if len(candidates) > 0: # print("Type of each is {}", type(candidates[0])) return list(set(candidates))
def get_predecessor_of( self, node: typing.Union[Clause, Recursion, Body] ) -> typing.Union[Clause, Recursion, Body, typing.Sequence[Clause]]: """ Returns the predecessor of the node = the last position of the pointer before reaching the node :param node: :return: """ # TODO: make it possible to get all predecessors, not just the last visited from if isinstance(node, Body): return self._hypothesis_space.nodes[node]["last_visited_from"] else: if isinstance(node, Clause): head = node.get_head() body = node.get_body() else: rec = node.get_recursive_case() if len(rec) > 1: raise Exception( "do not support recursions with more than 1 recursive case" ) else: head = rec[0].get_head() body = rec[0].get_body() predecessor = self._hypothesis_space.nodes[body][ "last_visited_from"] if head in self._hypothesis_space.nodes[predecessor]["heads"]: return Clause(head, predecessor) else: return self.retrieve_clauses_from_body(predecessor)
def _execute_program(self, examples: Task, clause: Clause) -> typing.Sequence[Atom]: """ Evaluates a clause using the Prolog engine and background knowledge Returns a set of atoms that the clause covers """ if len(clause.get_body().get_literals()) == 0: return [] else: self._solver.asserta(clause) covered_examples = [] pos, neg = examples.get_examples() total_examples = pos.union(neg) for example in total_examples: if self._solver.has_solution(example): covered_examples.append(example) self._solver.retract(clause) # head_predicate = clause.get_head().get_predicate() # head_variables = clause.get_head_variables() # # sols = self._solver.query(*clause.get_body().get_literals()) # # sols = [head_predicate(*[s[v] for v in head_variables]) for s in sols] return covered_examples
def remove(self, node: typing.Union[Clause, Procedure], remove_entire_body: bool = False, not_if_other_parents: bool = True) -> None: """ Removes the node from the hypothesis space (and all of its descendents) """ clause = (node if isinstance(node, Clause) else [x for x in node.get_clauses() if x.is_recursive()][0]) head = clause.get_head() body = clause.get_body() children = self._hypothesis_space.successors(body) if not_if_other_parents: # do not remove children that have other parents children = [ x for x in children if len(self._hypothesis_space.predecessors(x)) <= 1 ] if remove_entire_body: # remove entire body self._hypothesis_space.remove_node(body) for ch_ind in range(len(children)): self.remove(Clause(head, body), remove_entire_body=remove_entire_body, not_if_other_parents=not_if_other_parents) else: # remove just the head if head in self._hypothesis_space.nodes[body]["heads"]: del self._hypothesis_space.nodes[body]["heads"][head] if len(self._hypothesis_space.nodes[body]["heads"]) == 0: # if no heads left, remove the entire node self._hypothesis_space.remove_node(body) for ch_ind in range(len(children)): self.remove(Clause(head, body), remove_entire_body=True) else: # remove the same head from children if len(children) > 0: for ch_ind in range(len(children)): self.remove(Clause(head, children[ch_ind]))
def compute_bottom_clause(theory: Sequence[Clause], c: Clause) -> Clause: """ Computes the bottom clause given a theory and a clause. Algorithm from (De Raedt,2008) """ # 1. Find a skolemization substitution θ for c (w.r.t. B and c) _, theta = skolemize(c) # 2. Compute the least Herbrand model M of theory ¬body(c)θ body_facts = [ Clause(l.substitute(theta), []) for l in c.get_body().get_literals() ] m = herbrand_model(theory + body_facts) # 3. Deskolemize the clause head(cθ) <= M and return the result. theta_inv = {value: key for key, value in theta.items()} return Clause(c.get_head(), [l.get_head().substitute(theta_inv) for l in m])
def retrieve_clauses_from_body( self, body: Body) -> typing.Sequence[typing.Union[Clause, Procedure]]: """ Returns all possible clauses given the body """ heads = self._hypothesis_space.nodes[body]["heads"] heads = [x for x in heads if not heads[x]["ignored"]] return [Clause(x, body) for x in heads]
def _execute_program(self, clause: Clause) -> typing.Sequence[Atom]: """ Evaluates a clause using the Prolog engine and background knowledge Returns a set of atoms that the clause covers """ if len(clause.get_body().get_literals()) == 0: return [] else: head_predicate = clause.get_head().get_predicate() head_variables = clause.get_head_variables() sols = self._solver.query(*clause.get_body().get_literals()) sols = [ head_predicate(*[s[v] for v in head_variables]) for s in sols ] return sols
def evaluate(self, clause: Clause, examples: Task, covered: Sequence[Atom]): self._clauses_evaluated += 1 pos, neg = examples.get_examples() covered_pos = len(pos.intersection(covered)) covered_neg = len(neg.intersection(covered)) clause_length = len(clause.get_literals()) if self._return_upperbound: return (covered_pos - covered_neg - clause_length + 1), covered_pos return covered_pos - covered_neg - clause_length + 1
def encode2(self, clause: Clause): head = clause.get_head() if not self._problemindices.__contains__(head.get_predicate()): self.addproblem(head.get_predicate(), 10) problems = self._encodingProblems.copy() primitives = self._encodingprimitives.copy() variables = self._encodingvariables.copy() problems[self._problemindices[head.get_predicate()]] = 1 variables[self._variablesindices[head.get_variables()[0]]] = 1 cijfer = 3 for literal in clause.get_literals(): startindexliteral = self._primitivesindices[ literal.get_predicate()] fillin = False while fillin == False: if (primitives[startindexliteral] == 0): primitives[startindexliteral] = cijfer fillin = True else: startindexliteral += 1 startindexvariable = self._variablesindices[ literal.get_variables()[0]] fillin = False while fillin == False: if (variables[startindexvariable] == 0): variables[startindexvariable] = cijfer fillin = True else: startindexvariable += 1 if (len(literal.get_variables()) == 2): startindexvariable = self._variablesindices[ literal.get_variables()[1]] fillin = False while fillin == False: if (variables[startindexvariable] == 0): variables[startindexvariable] = cijfer + 1 fillin = True else: startindexvariable += 1 cijfer += 2 return problems + primitives + variables
def _execute_program(self, clause: Clause, count_as_query: bool = True) -> typing.Sequence[Atom]: """ Evaluates a clause using the Prolog engine and background knowledge Returns a set of atoms that the clause covers """ if len(clause.get_body().get_literals()) == 0: # Covers all possible examples because trivial hypothesis return None else: head_predicate = clause.get_head().get_predicate() head_args = clause.get_head_arguments() # print("{}({})".format(head_predicate, *head_args)) sols = self._solver.query(*clause.get_body().get_literals()) self._prolog_queries += 1 if count_as_query else 0 # Build a solution by substituting Variables with their found value # and copying constants without change sols = [head_predicate(*[s[v] if isinstance(v,Variable) else v for v in head_args]) for s in sols] return sols
def as_clauses(self): l = [] for x in self.get_all(): if isinstance(x, Clause): l.append(x) elif isinstance(x, Atom): l.append(Clause(x, [])) elif isinstance(x, Procedure): for cl in x.get_clauses(): l.append(cl) elif isinstance(x, Program): for cl in x.get_clauses(): l.append(cl) else: raise AssertionError( "Knowledge can only contain clauses, atoms, procedures or programs!" ) return l
def _valid_positions(cl: Clause,allowed_positions_dict,allowed_reflexivity=[]): """ Returns True iff the clause `cl` respects the allowed positions for constants as given by `allowed_positions_dict`, and is not reflective (e.g. next(X,X)) when disallowed """ for atom in cl.get_literals(): pred = atom.get_predicate() args = atom.get_arguments() for i in range(len(args)): arg = args[i] # Constants must appear at the right places if isinstance(arg,Constant): if not i in allowed_positions_dict[arg][pred]: return False # If all arguments are equal, this must be explicitly allowed if len(args) > 0 and all(args[i] == args[0] for i in range(len(args))) and pred not in allowed_reflexivity: return False return True
def _get_recursions(self, node: Body) -> typing.Sequence[Recursion]: """ Prepares the valid recursions """ pointer_name = self._hypothesis_space.nodes[node]["partner"] init_pointer_value = self._pointers[pointer_name] last_pointer_value = None valid_heads = list(self._hypothesis_space.nodes[node]["heads"].keys()) recursions = [] # for each valid head for h_ind in range(len(valid_heads)): c_head: Atom = valid_heads[h_ind] recursive_clause = Clause(c_head, node) frontier = [self._pointers[pointer_name]] while len(frontier) > 0: focus_node = frontier[0] frontier = frontier[1:] # find matching heads focus_node_heads: typing.Sequence[Atom] = list( self._hypothesis_space.nodes[focus_node]["heads"].keys()) focus_node_heads = [ x for x in focus_node_heads if x.get_predicate().get_arg_types() == c_head.get_predicate().get_arg_types() ] # prepare recursion for bcl_ind in range(len(focus_node_heads)): if isinstance(self._head_constructor, Predicate): recursions.append( Recursion([ Clause(focus_node_heads[bcl_ind], focus_node), recursive_clause, ])) else: # if the filler predicate is used to construct heads, make sure the same head predicate is used head_args = focus_node_heads[bcl_ind].get_arguments() recursions.append( Recursion([ Clause( Atom(c_head.get_predicate(), head_args), focus_node, ), recursive_clause, ])) # extend the frontier - exclude recursive nodes to_add = [ x for x in self._hypothesis_space.successors(focus_node) if "partner" not in self._hypothesis_space.nodes[x] ] frontier += to_add last_pointer_value = focus_node # reset the pointer value for next valid head self.reset_pointer(pointer_name, init_pointer_value) # set the pointer to the last explored clause self.reset_pointer(pointer_name, last_pointer_value) return recursions
def learn( self, examples: Task, knowledge: Knowledge, hypothesis_space: HypothesisSpace, initial_clause: typing.Union[Body,Clause] = None, minimum_freq: int = 0 ): """ To find a hypothesis, Aleph uses the following set covering approach: 1. Select a positive example to be generalised. If none exists, stop; otherwise proceed to the next step. 2. Construct the most specific clause (the bottom clause) (Muggleton, 1995) that entails the selected example and that is consistent with the mode declarations. 3. Search for a clause more general than the bottom clause and that has the best score. 4. Add the clause to the current hypothesis and remove all the examples made redundant by it. Return to step 1. (Description from Cropper and Dumancic ) """ # Variables for learning statics start_time = datetime.datetime.now() i = 0 stop = False self._learnresult = LearnResult() # Reset in case the learner is reused self._prolog_queries = 0 self._intermediate_coverage = [] self._eval_fn._clauses_evaluated = 0 # Assert all BK into engines self._solver.retract_all() self._assert_knowledge(knowledge) # Start with all examples examples_to_use = examples pos, _ = examples_to_use.get_examples() # List of clauses we're learning prog = [] # parameters for aleph_extension() allowed_positions = find_allowed_positions(knowledge) allowed_reflexivity = find_allowed_reflexivity(knowledge) if minimum_freq > 0: allowed_constants = find_frequent_constants(knowledge,minimum_freq) else: allowed_constants = None # Create HypothesisSpace: primitives will be different in each iteration # (based on the chosen positive example) hs = TopDownHypothesisSpace( primitives=[], head_constructor=list(pos)[0].get_predicate(), expansion_hooks_reject=[ lambda x, y: has_duplicated_literal(x, y), ], initial_clause=initial_clause ) while len(pos) > 0 and not stop: i += 1 # Pick example from pos pos_ex = Clause(list(pos)[0], []) bk = knowledge.as_clauses() bottom = compute_bottom_clause(bk, pos_ex) if self._print: print("Next iteration: generalizing example {}".format(str(pos_ex))) # print("Bottom clause: " + str(bottom)) # Predicates can only be picked from the body of the bottom clause body_predicates = list( set(map( lambda l: l.get_predicate(), bottom.get_body().get_literals())) ) # Constants can only be picked from the literals in the bottom clause, # and from constants that are frequent enough in bk (if applicable) if allowed_constants is None: allowed = lambda l: isinstance(l,Constant) or isinstance(l,int) else: allowed = lambda l: (isinstance(l,Constant) and l in allowed_constants) or isinstance(l,int) constants = list(set(list(filter( allowed, bottom.get_body().get_arguments(),)))) if self._print: print("Constants in bottom clause: {}".format(constants)) print("Predicates in bottom clause: {}".format(body_predicates)) # IMPORTANT: use VALUES of pred and constants, not the variables # Has something to do with closures extensions = [ lambda x,a=pred,b=allowed_positions,c=constants,d=allowed_reflexivity: aleph_extension(x,a,b,c,d) for pred in body_predicates ] # Update hypothesis space for this iteration hs._primitives = extensions hs.remove_all_edges() # Learn 1 clause and add to program cl = self._learn_one_clause(examples_to_use, hs) prog.append(cl) if self._print: print("- New clause: " + str(cl)) # update covered positive examples covered = self._execute_program(cl) if self._print: print( "Clause covers {} pos examples: {}".format( len(pos.intersection(covered)), pos.intersection(covered) ) ) # Find intermediate quality of program at this point, add to learnresult (don't cound these as Prolog queries) c = set() for cl in prog: c = c.union(self._execute_program(cl,count_as_query=False)) pos_covered = len(c.intersection(examples._positive_examples)) neg_covered = len(c.intersection(examples._negative_examples)) self._intermediate_coverage.append((pos_covered,neg_covered)) # Remove covered examples and start next iteration pos, neg = examples_to_use.get_examples() pos = pos.difference(covered) examples_to_use = Task(pos, neg) if self._print: print("Finished iteration {}".format(i)) # print("Current program: {}".format(str(prog))) # Wrap results into learnresult and return self._learnresult['learner'] = "Aleph" self._learnresult["total_time"] = (datetime.datetime.now() - start_time).total_seconds() self._learnresult["final_program"] = prog self._learnresult["num_iterations"] = i self._learnresult["evalfn_evaluations"] = self._eval_fn._clauses_evaluated self._learnresult["prolog_queries"] = self._prolog_queries self._learnresult["intermediate_coverage"] = self._intermediate_coverage return self._learnresult
def herbrand_model(clauses: Sequence[Clause]) -> Sequence[Clause]: """ Computes a minimal Herbrand model of a theory 'clauses'. Algorithm from Logical and Relational learning (De Raedt, 2008) """ i = 1 m = {0: []} # Find a fact in the theory (i.e. no body literals) facts = list( filter(lambda c: len(c.get_body().get_literals()) == 0, clauses)) if len(facts) == 0: raise AssertionError( "Theory does not contain ground facts, which necessary to compute a minimal Herbrand model!" ) # print("Finished iteration 0") # If all clauses are just facts, there is nothing to be done. if len(facts) == len(clauses): return clauses #BUG: doesn't work properly after pylo update... m[1] = list(facts) while Counter(m[i]) != Counter(m[i - 1]): model_constants = _flatten( [fact.get_head().get_arguments() for fact in m[i]]) m[i + 1] = [] rules = list( filter(lambda c: len(c.get_body().get_literals()) > 0, clauses)) for rule in rules: # if there is a substition theta such that # all literals in rule._body are true in the previous model body = rule.get_body() body_vars = body.get_variables() # Build all substitutions body_vars -> model_constants substitutions = _all_maps(body_vars, model_constants) for theta in substitutions: # add_rule is True unless there is some literal that never # occurs in m[i] add_fact = True for body_lit in body.get_literals(): candidate = body_lit.substitute(theta) facts = list(map(lambda x: x.get_head(), m[i])) # print("Does {} occur in {}?".format(candidate,facts)) if candidate in facts: pass # print("Yes") else: add_fact = False new_fact = Clause(rule.get_head().substitute(theta), []) if add_fact and not new_fact in m[i + 1] and not new_fact in m[i]: m[i + 1].append(new_fact) # print("Added fact {} to m[{}]".format(str(new_fact),i+1)) # print(m[i+1]) # print(f"Finished iteration {i}") m[i + 1] = list(set(m[i + 1] + m[i])) # print("New model: "+str(m[i+1])) i += 1 return m[i]
is_uppercase = c_pred("is_uppercase", 1) # define the Variables H = c_var("H") Ta = c_var("Ta") Tb = c_var("Tb") A = c_var("A") B = c_var("B") H1 = c_var("H1") H2 = c_var("H2") Z = c_var("Z") O = c_var("O") N = c_var("N") # create clauses head = Atom(not_space, [A]) body = Atom(is_space, [A]) clause1 = Clause(head, Body(Not(body))) head = Atom(is_uppercase, [Structure(s, [Pair(H, Z), O])]) body = Atom(is_uppercase_aux, [H]) clause2 = Clause(head, Body(body)) head = Atom(not_uppercase, [A]) body = Atom(is_uppercase, [A]) clause3 = Clause(head, Body(Not(body))) head = Atom(is_lowercase, [Structure(s, [Pair(H, Z), O])]) body = Atom(is_lowercase_aux, [H]) clause4 = Clause(head, Body(body)) head = Atom(not_lowercase, [A]) body = Atom(is_lowercase, [A]) clause5 = Clause(head, Body(Not(body))) head = Atom(is_letter, [Structure(s, [Pair(H, Z), O])]) body1 = Atom(is_lowercase_aux, [H]) body2 = Atom(is_uppercase, [H])
def encode(self, clause: Clause): encodingClause = np.zeros(1850) vars = [] set = {} index = 0 for lit in [clause.get_head(), *clause.get_literals()]: var = '' for variable in lit.get_variables(): var += variable.get_name() if var in set: index = set[var] vars[index][1].append(lit) else: set[var] = index index += 1 list = [lit] if len(var) == 1: value = 100000 * (ord(var) - 64) + 3500 * ( ord(var) - 64) + 130 * (ord(var) - 64) else: if len(var) == 2: if ord(var[0]) <= ord(var[-1]): value = 100000 * (ord(var[0]) - 64) + 3500 * ( ord(var[0]) - 64) + 130 * (ord(var[-1]) - 64) else: value = 100000 * (ord(var[0]) - 64) + 3500 * (ord( var[0]) - 64) + 130 * (ord(var[-1]) - 64) + 1 else: if ord(var[0]) <= ord(var[1]) <= ord(var[2]): value = 100000 * (ord(var[0]) - 64) + 3500 * ( ord(var[1]) - 64) + 130 * (ord(var[2]) - 64) else: if ord(var[0]) <= ord(var[2]) <= ord(var[1]): value = 100000 * (ord(var[0]) - 64) + 3500 * ( ord(var[2]) - 64) + 130 * (ord(var[1]) - 64) + 1 else: if ord(var[1]) <= ord(var[0]) <= ord(var[2]): value = 100000 * ( ord(var[1]) - 64) + 3500 * ( ord(var[0]) - 64) + 130 * (ord(var[2]) - 64) + 2 else: if ord(var[1]) <= ord(var[0]) <= ord( var[2]): value = 100000 * ( ord(var[1]) - 64) + 3500 * ( ord(var[0]) - 64) + 130 * ( ord(var[2]) - 64) + 3 else: if ord(var[2]) <= ord(var[0]) <= ord( var[1]): value = 100000 * ( ord(var[2]) - 64) + 3500 * ( ord(var[0]) - 64) + 130 * ( ord(var[1]) - 64) + 4 else: value = 100000 * ( ord(var[2]) - 64) + 3500 * ( ord(var[1]) - 64) + 130 * ( ord(var[0]) - 64) + 5 vars.append((value, list)) vars.sort() newClause = [] for v in vars: newClause = newClause + v[1] encoding = [ self.variableSubstition(newClause[i:i + 2]) for i in range(len(newClause) - 1) ] for element in encoding: encodingClause[self._dictionary.get(tuple(element))] += 1 encodingClause[1849] = len(clause.get_variables()) return encodingClause
Ta = c_var("Ta") Tb = c_var("Tb") A = c_var("A") B = c_var("B") C = c_var("C") D = c_var("D") E = c_var("E") H1 = c_var("H1") H2 = c_var("H2") Z = c_var("Z") O = c_var("O") N = c_var("N") # create clauses head = Atom(not_space, [A]) body = Atom(is_space, [A]) clause1 = Clause(head, Body(Not(body))) head = Atom(is_uppercase, [Structure(s, [Pair(H, Z), O])]) body = Atom(is_uppercase_aux, [H]) clause2 = Clause(head, Body(body)) head = Atom(not_uppercase, [A]) body = Atom(is_uppercase, [A]) clause3 = Clause(head, Body(Not(body))) head = Atom(is_lowercase, [Structure(s, [Pair(H, Z), O])]) body = Atom(is_lowercase_aux, [H]) clause4 = Clause(head, Body(body)) head = Atom(not_lowercase, [A]) body = Atom(is_lowercase, [A]) clause5 = Clause(head, Body(Not(body))) head = Atom(is_letter, [Structure(s, [Pair(H, Z), O])]) body1 = Atom(is_lowercase_aux, [H]) body2 = Atom(is_uppercase, [H])