def match_query_res(self, r): """ Function matching a result from the database to the current Cluster object :param r: :return: """ return (term2str(r[0]) == term2str(self.scope) and r[1].functor == self.source_columns)
def match_query_res(self, r): """ Function matching a result from the database to the current Predictor object :param r: :return: """ return (term2str(r[0]) == term2str(self.scope) and r[1].functor == self.modelclass and r[2].functor == self.source_columns and r[3].functor == self.target_columns)
def scope_to_tables(scope, kwargs): tables = get_terms_from_scope(scope, "table", **kwargs) table_cells = get_terms_from_scope(scope, "table_cell", **kwargs) table_types = get_terms_from_scope(scope, "table_cell_type", **kwargs) tacle_tables = [] for table in tables: table_name = unquote(term2str(table.args[0].value)) data = table_cells_to_matrix( [c for c in table_cells if c.args[0] == table.args[0]]) type_data = table_cell_types_to_matrix( [c for c in table_types if c.args[0] == table.args[0]]) t_range = Range( table.args[2].value - 1, table.args[1].value - 1, table.args[4].value, table.args[3].value, ) table = Table( data, type_data, t_range, name=table_name, orientations=[Orientation.vertical], ) tacle_tables.append(table) return tacle_tables
def test_source_predicates(self): expected_columns = ["column('T1',5)", "column('T1',2)"] model = (self.module_import + self.load_csv + """magic_models:X :-{}(magic_tables, [{}], [column('T1', 3)], X). query(magic_models:source(_, _)). """.format(self.input_predicate, ",".join(expected_columns))) result = get_evaluatable().create_from(PrologString(model)).evaluate() self.assertEqual(len(result), len(expected_columns)) for term, proba in result.items(): self.assertEqual(len(term.args), 2) self.assertIn(term2str(term.args[1].args[1]), expected_columns) expected_columns.remove(term2str(term.args[1].args[1]))
def query(self, db, term, backend=None, **kwdargs): """ :param db: :param term: :param kwdargs: :return: """ if backend in ('swipl', 'yap'): from .util import mktempfile, subprocess_check_output tmpfn = mktempfile('.pl') with open(tmpfn, 'w') as tmpf: print(db.to_prolog(), file=tmpf) from problog.logic import term2str termstr = term2str(term) cmd = [ 'swipl', '-l', tmpfn, '-g', '%s, writeln(%s), fail; halt' % (termstr, termstr) ] try: output = subprocess_check_output(cmd) except CalledProcessError as err: in_error = True error_message = [] for line in err.output.split('\n'): if line.startswith('Warning:'): in_error = False elif line.startswith('ERROR:'): in_error = True if in_error: error_message.append(line) error_message = 'SWI-Prolog returned some errors:\n' + '\n'.join( error_message) raise GroundingError(error_message) return [ Term.from_string(line).args for line in output.split('\n') if line.strip() ] else: gp = LogicFormula() if term.is_negated(): term = -term negative = True else: negative = False gp, result = self._ground(db, term, gp, **kwdargs) if negative: if not result: return [term] else: return [] else: return [x for x, y in result]
def probfoil_loop(scope, target_predicate, **kwargs): t = unquote(term2str(target_predicate)) len_target = len(t) engine = kwargs["engine"] database = kwargs["database"] input_facts = engine.query(database, Term("':'", scope, None), subcall=True) num_facts = len(input_facts) background_facts = [] target_facts = {} base_list = [] base_facts = [] mode_list = [] mode_facts = [] for i in range(0, num_facts): fact = input_facts[i][1] args = ["'" + term2str(val) + "'" for val in fact.args] if ( len(fact.functor) > len_target + 1 and fact.functor[: len_target + 1] == t + "_" ): target_constant = fact.functor[len_target + 1 :] if target_constant not in target_facts: target_facts[target_constant] = [] target_facts[target_constant].append( fact.functor + "(" + ",".join(args) + ")." ) elif fact.functor == t: continue else: background_facts.append(fact.functor + "(" + ",".join(args) + ").") # Typing of Predicates if fact.functor not in base_list: base_list.append(fact.functor) if fact.functor == t or len(args) == 1: base_facts.append("base(" + fact.functor + "(row_id)).") elif len(args) == 2: base_facts.append( "base(" + fact.functor + "(row_id, " + fact.functor + "_constant))." ) # Declarative Bias if fact.functor not in mode_list: mode_list.append(fact.functor) if len(args) == 1 and fact.functor != t: mode_facts.append("mode(" + fact.functor + "(+)).") elif len(args) == 2 and fact.functor != t: mode_facts.append("mode(" + fact.functor + "(+, +)).") mode_facts.append("mode(" + fact.functor + "(-, +)).") mode_facts.append("mode(" + fact.functor + "(+, -)).") result = [] for target_constant in target_facts.keys(): pos_examples = target_facts[target_constant] neg_examples = [] for key, value in target_facts.items(): if key != target_constant: neg_examples += value # Create ProbFOIL Input probfoil_input = create_probfoil_inputfile( base_facts, mode_facts, t + "_" + target_constant, background_facts, pos_examples, neg_examples, ) # Run ProbFOIL+ hypothesis = ProbFOIL2( DataFile(PrologString(probfoil_input)), beam_size=10, l=4 ).learn() result += rules2scope(hypothesis) + evaluate_probfoil_rules(hypothesis) return result
def probfoil(scope, target_predicate, **kwargs): t = unquote(term2str(target_predicate)) len_target = len(t) engine = kwargs["engine"] database = kwargs["database"] input_facts = engine.query(database, Term("':'", scope, None), subcall=True) probfoil_input = "learn(" + t + "/1).\n" num_facts = len(input_facts) base_list = [] # base_facts = [] mode_list = [] # mode_facts = [] for i in range(0, num_facts): fact = input_facts[i][1] args = ["'" + term2str(val) + "'" for val in fact.args] # Ignore propositionalized facts of target predicate if ( len(fact.functor) > len_target + 1 and fact.functor[: len_target + 1] == t + "_" ): if fact.functor.endswith("yes"): probfoil_input += t + "(" + args[0] + ").\n" else: probfoil_input += "0::" + t + "(" + args[0] + ").\n" else: probfoil_input += fact.functor + "(" + ",".join(args) + ").\n" # Typing of Predicates if fact.functor not in base_list: base_list.append(fact.functor) if fact.functor.startswith(t + "_"): probfoil_input += "base(" + t + "(row_id)).\n" elif len(args) == 1: probfoil_input += "base(" + fact.functor + "(row_id)).\n" elif len(args) == 2: probfoil_input += ( "base(" + fact.functor + "(row_id, " + fact.functor + "_constant)).\n" ) # Declarative Bias if fact.functor not in mode_list: mode_list.append(fact.functor) if len(args) == 1 and not fact.functor.startswith(t + "_"): probfoil_input += "mode(" + fact.functor + "(+)).\n" elif len(args) == 2 and not fact.functor.startswith(t + "_"): probfoil_input += "mode(" + fact.functor + "(+, -)).\n" probfoil_input += "mode(" + fact.functor + "(-, +)).\n" probfoil_input += "mode(" + fact.functor + "(+, +)).\n" # # Typing of Predicates # for fact in base_facts: # probfoil_input += fact + "\n" # # # Declarative Bias # for fact in mode_facts: # probfoil_input += fact + "\n" # Run ProbFOIL+ hypothesis = ProbFOIL2( DataFile(PrologString(probfoil_input)), beam_size=10, l=4 ).learn() result = rules2scope(hypothesis) + evaluate_probfoil_rules(hypothesis) return result
def scikit_learn_transformer(scope, source_columns, problog_obj, **kwargs): """ Fit scikit learn transformer on scope. It uses source_columns to learn the transformation :param scope: A scope, containing table_cell predicates describing a table content. :param source_columns: A list of columns, where column is: column(<table_name>, <col_number>). <table_name> is a table name present in table_cell. These columns will be used as input columns for the predictor. :param transformer: The transformer to use :param kwargs: :return: A tuple list of Terms, problog_object. List of Terms is transformer(<transformer>) is created, with <transformer> the scikit-learn transformer object. source(<transformer>, <column>) are created for each source column. <transformer> is the scikit-learn predictor object and <column> is column(<table_name>, <col_number>) problog_object is the transformation object, as a problog object """ engine = kwargs["engine"] database = kwargs["database"] transformer = problog_obj.functor # We try to retrieve the model trained with the same parameters res_predictor_object = [ t for t in engine.query( database, Term("transformer_object", None, None, None), subcall=True ) ] # TODO: Handle probabilistic terms in transformers! # If we succeed, we retrieve the previously trained object. # If not, we train a new one for r in res_predictor_object: if term2str(scope) == r[0].functor and r[1].functor == source_columns: problog_obj = r[2] source_columns = r[1].functor transformer_term = Term("transformer", problog_obj) source_terms = [Term("source", problog_obj, s) for s in source_columns] return [transformer_term] + source_terms, problog_obj table_cell_term_list = [ t[1] for t in engine.query(database, Term("':'", scope, None), subcall=True) if t[1].functor == "table_cell" ] relevant_table = [ t for t in table_cell_term_list if t.args[0] == source_columns[0].args[0] ] matrix = cells_to_matrix(relevant_table) src_cols = [s.args[1].value for s in source_columns] transformer.fit(matrix[:, src_cols]) # We add the new predictor in the database to be able to retrieve it in future calls database.add_fact( Term("transformer_object", scope, Object(source_columns), problog_obj) ) transformer_term = Term("transformer", problog_obj) source_terms = [Term("source", problog_obj, s) for s in source_columns] return [transformer_term] + source_terms, problog_obj
def mercs(scope, source_columns, **kwargs): # Preliminaries engine = kwargs["engine"] database = kwargs["database"] def short_str(_self): return "MERCS({})".format(id(_self)) MERCS.__repr__ = short_str MERCS.__str__ = short_str # Verify whether or not a MERCS model already exists with these exact same parameters res_predictor_object = [ t for t in engine.query( database, Term("predictor_object", None, None, None), subcall=True) ] # If found, return the existing object. If not, create a predictor. for r in res_predictor_object: if term2str(scope) == r[0].functor and r[1].functor == source_columns: mercs_problog_object = r[2] source_columns = r[1].functor predictor_term = Term("predictor", mercs_problog_object) mercs_term = Term("mercs", mercs_problog_object) target_terms = [ Term("target", mercs_problog_object, t) for t in source_columns ] source_terms = [ Term("source", mercs_problog_object, s) for s in source_columns ] return [predictor_term] + [mercs_term ] + source_terms + target_terms # Getting input data table_cell_term_list = [ t[1] for t in engine.query(database, Term("':'", scope, None), subcall=True) if t[1].functor == "table_cell" ] relevant_table = [ t for t in table_cell_term_list if t.args[0] == source_columns[0].args[0] ] # Filter data matrix = cells_to_matrix(relevant_table) src_cols = [s.args[1].value for s in source_columns] matrix = matrix[:, src_cols] # Train a MERCS model clf = MERCS() data = pd.DataFrame(matrix) # MERCS still needs this (elia: I'm so sorry) clf.fit(data) mercs_problog_object = Object(clf) # We add the new predictor in the database to be able to retrieve it in future calls database.add_fact( Term("predictor_object", scope, Object(source_columns), mercs_problog_object)) predictor_term = Term("predictor", mercs_problog_object) mercs_term = Term("mercs", mercs_problog_object) target_terms = [ Term("target", mercs_problog_object, t) for t in source_columns ] source_terms = [ Term("source", mercs_problog_object, s) for s in source_columns ] # Whitebox dt_terms = [] for dt, dt_code in zip(clf.m_list, clf.m_codes): def short_str(_self): return "DT({})".format(id(_self)) DecisionTreeRegressor.__str__ = short_str DecisionTreeRegressor.__repr__ = short_str DecisionTreeClassifier.__str__ = short_str DecisionTreeClassifier.__repr__ = short_str # dt.__str__ = short_str # dt.__repr__ = short_str dt_problog_object = Object(dt) dt_predictor_term = Term("predictor", dt_problog_object) decision_tree_term = Term("decision_tree", dt_problog_object) dt_source_columns = [ x for i, x in enumerate(source_columns) if dt_code[i] == 0 ] dt_target_columns = [ x for i, x in enumerate(source_columns) if dt_code[i] == 1 ] dt_target_terms = [ Term("target", dt_problog_object, t) for t in dt_target_columns ] dt_source_terms = [ Term("source", dt_problog_object, s) for s in dt_source_columns ] dt_terms.append(dt_predictor_term) dt_terms.append(decision_tree_term) dt_terms.extend(dt_target_terms) dt_terms.extend(dt_source_terms) database.add_fact( Term( "predictor_object", scope, Object(dt_source_terms), Object(dt_target_terms), dt_problog_object, )) return [predictor_term] + source_terms + target_terms + [mercs_term ] + dt_terms