Пример #1
0
 def match_query_res(self, r):
     """
     Function matching a result from the database to the current Cluster object
     :param r:
     :return:
     """
     return (term2str(r[0]) == term2str(self.scope)
             and r[1].functor == self.source_columns)
Пример #2
0
 def match_query_res(self, r):
     """
     Function matching a result from the database to the current Predictor object
     :param r:
     :return:
     """
     return (term2str(r[0]) == term2str(self.scope)
             and r[1].functor == self.modelclass
             and r[2].functor == self.source_columns
             and r[3].functor == self.target_columns)
Пример #3
0
def scope_to_tables(scope, kwargs):
    tables = get_terms_from_scope(scope, "table", **kwargs)
    table_cells = get_terms_from_scope(scope, "table_cell", **kwargs)
    table_types = get_terms_from_scope(scope, "table_cell_type", **kwargs)

    tacle_tables = []
    for table in tables:
        table_name = unquote(term2str(table.args[0].value))
        data = table_cells_to_matrix(
            [c for c in table_cells if c.args[0] == table.args[0]])
        type_data = table_cell_types_to_matrix(
            [c for c in table_types if c.args[0] == table.args[0]])
        t_range = Range(
            table.args[2].value - 1,
            table.args[1].value - 1,
            table.args[4].value,
            table.args[3].value,
        )

        table = Table(
            data,
            type_data,
            t_range,
            name=table_name,
            orientations=[Orientation.vertical],
        )
        tacle_tables.append(table)

    return tacle_tables
Пример #4
0
    def test_source_predicates(self):
        expected_columns = ["column('T1',5)", "column('T1',2)"]
        model = (self.module_import + self.load_csv +
                 """magic_models:X :-{}(magic_tables,
                                       [{}],
                                       [column('T1', 3)],
                                       X).

                query(magic_models:source(_, _)).
                """.format(self.input_predicate, ",".join(expected_columns)))
        result = get_evaluatable().create_from(PrologString(model)).evaluate()
        self.assertEqual(len(result), len(expected_columns))

        for term, proba in result.items():
            self.assertEqual(len(term.args), 2)
            self.assertIn(term2str(term.args[1].args[1]), expected_columns)
            expected_columns.remove(term2str(term.args[1].args[1]))
Пример #5
0
    def query(self, db, term, backend=None, **kwdargs):
        """

        :param db:
        :param term:
        :param kwdargs:
        :return:
        """

        if backend in ('swipl', 'yap'):
            from .util import mktempfile, subprocess_check_output

            tmpfn = mktempfile('.pl')
            with open(tmpfn, 'w') as tmpf:
                print(db.to_prolog(), file=tmpf)

            from problog.logic import term2str
            termstr = term2str(term)
            cmd = [
                'swipl', '-l', tmpfn, '-g',
                '%s, writeln(%s), fail; halt' % (termstr, termstr)
            ]

            try:
                output = subprocess_check_output(cmd)
            except CalledProcessError as err:
                in_error = True
                error_message = []
                for line in err.output.split('\n'):
                    if line.startswith('Warning:'):
                        in_error = False
                    elif line.startswith('ERROR:'):
                        in_error = True
                    if in_error:
                        error_message.append(line)
                error_message = 'SWI-Prolog returned some errors:\n' + '\n'.join(
                    error_message)
                raise GroundingError(error_message)

            return [
                Term.from_string(line).args for line in output.split('\n')
                if line.strip()
            ]
        else:
            gp = LogicFormula()
            if term.is_negated():
                term = -term
                negative = True
            else:
                negative = False
            gp, result = self._ground(db, term, gp, **kwdargs)
            if negative:
                if not result:
                    return [term]
                else:
                    return []
            else:
                return [x for x, y in result]
Пример #6
0
def probfoil_loop(scope, target_predicate, **kwargs):
    t = unquote(term2str(target_predicate))
    len_target = len(t)

    engine = kwargs["engine"]
    database = kwargs["database"]
    input_facts = engine.query(database, Term("':'", scope, None), subcall=True)

    num_facts = len(input_facts)
    background_facts = []
    target_facts = {}
    base_list = []
    base_facts = []
    mode_list = []
    mode_facts = []

    for i in range(0, num_facts):
        fact = input_facts[i][1]
        args = ["'" + term2str(val) + "'" for val in fact.args]

        if (
            len(fact.functor) > len_target + 1
            and fact.functor[: len_target + 1] == t + "_"
        ):
            target_constant = fact.functor[len_target + 1 :]
            if target_constant not in target_facts:
                target_facts[target_constant] = []
            target_facts[target_constant].append(
                fact.functor + "(" + ",".join(args) + ")."
            )
        elif fact.functor == t:
            continue
        else:
            background_facts.append(fact.functor + "(" + ",".join(args) + ").")

            # Typing of Predicates
            if fact.functor not in base_list:
                base_list.append(fact.functor)
                if fact.functor == t or len(args) == 1:
                    base_facts.append("base(" + fact.functor + "(row_id)).")
                elif len(args) == 2:
                    base_facts.append(
                        "base("
                        + fact.functor
                        + "(row_id, "
                        + fact.functor
                        + "_constant))."
                    )

            # Declarative Bias
            if fact.functor not in mode_list:
                mode_list.append(fact.functor)
                if len(args) == 1 and fact.functor != t:
                    mode_facts.append("mode(" + fact.functor + "(+)).")
                elif len(args) == 2 and fact.functor != t:
                    mode_facts.append("mode(" + fact.functor + "(+, +)).")
                    mode_facts.append("mode(" + fact.functor + "(-, +)).")
                    mode_facts.append("mode(" + fact.functor + "(+, -)).")

    result = []

    for target_constant in target_facts.keys():
        pos_examples = target_facts[target_constant]
        neg_examples = []
        for key, value in target_facts.items():
            if key != target_constant:
                neg_examples += value

        # Create ProbFOIL Input
        probfoil_input = create_probfoil_inputfile(
            base_facts,
            mode_facts,
            t + "_" + target_constant,
            background_facts,
            pos_examples,
            neg_examples,
        )

        # Run ProbFOIL+
        hypothesis = ProbFOIL2(
            DataFile(PrologString(probfoil_input)), beam_size=10, l=4
        ).learn()

        result += rules2scope(hypothesis) + evaluate_probfoil_rules(hypothesis)

    return result
Пример #7
0
def probfoil(scope, target_predicate, **kwargs):
    t = unquote(term2str(target_predicate))
    len_target = len(t)

    engine = kwargs["engine"]
    database = kwargs["database"]
    input_facts = engine.query(database, Term("':'", scope, None), subcall=True)

    probfoil_input = "learn(" + t + "/1).\n"

    num_facts = len(input_facts)
    base_list = []
    # base_facts = []
    mode_list = []
    # mode_facts = []

    for i in range(0, num_facts):
        fact = input_facts[i][1]
        args = ["'" + term2str(val) + "'" for val in fact.args]

        # Ignore propositionalized facts of target predicate
        if (
            len(fact.functor) > len_target + 1
            and fact.functor[: len_target + 1] == t + "_"
        ):
            if fact.functor.endswith("yes"):
                probfoil_input += t + "(" + args[0] + ").\n"
            else:
                probfoil_input += "0::" + t + "(" + args[0] + ").\n"

        else:
            probfoil_input += fact.functor + "(" + ",".join(args) + ").\n"

        # Typing of Predicates
        if fact.functor not in base_list:
            base_list.append(fact.functor)
            if fact.functor.startswith(t + "_"):
                probfoil_input += "base(" + t + "(row_id)).\n"
            elif len(args) == 1:
                probfoil_input += "base(" + fact.functor + "(row_id)).\n"
            elif len(args) == 2:
                probfoil_input += (
                    "base("
                    + fact.functor
                    + "(row_id, "
                    + fact.functor
                    + "_constant)).\n"
                )

        # Declarative Bias
        if fact.functor not in mode_list:
            mode_list.append(fact.functor)
            if len(args) == 1 and not fact.functor.startswith(t + "_"):
                probfoil_input += "mode(" + fact.functor + "(+)).\n"
            elif len(args) == 2 and not fact.functor.startswith(t + "_"):
                probfoil_input += "mode(" + fact.functor + "(+, -)).\n"
                probfoil_input += "mode(" + fact.functor + "(-, +)).\n"
                probfoil_input += "mode(" + fact.functor + "(+, +)).\n"

    # # Typing of Predicates
    # for fact in base_facts:
    #     probfoil_input += fact + "\n"
    #
    # # Declarative Bias
    # for fact in mode_facts:
    #     probfoil_input += fact + "\n"

    # Run ProbFOIL+
    hypothesis = ProbFOIL2(
        DataFile(PrologString(probfoil_input)), beam_size=10, l=4
    ).learn()

    result = rules2scope(hypothesis) + evaluate_probfoil_rules(hypothesis)
    return result
Пример #8
0
def scikit_learn_transformer(scope, source_columns, problog_obj, **kwargs):
    """
    Fit scikit learn transformer on scope. It uses source_columns to learn the transformation
    :param scope: A scope, containing table_cell predicates describing a table content.
    :param source_columns: A list of columns, where column is: column(<table_name>, <col_number>). <table_name> is a table name present in table_cell. These columns will be used as input columns for the predictor.
    :param transformer: The transformer to use
    :param kwargs:
    :return: A tuple list of Terms, problog_object.
    List of Terms is
        transformer(<transformer>) is created, with <transformer> the scikit-learn transformer object.
        source(<transformer>, <column>) are created for each source column. <transformer> is the scikit-learn predictor object and <column> is column(<table_name>, <col_number>)
    problog_object is the transformation object, as a problog object
    """
    engine = kwargs["engine"]
    database = kwargs["database"]

    transformer = problog_obj.functor

    # We try to retrieve the model trained with the same parameters
    res_predictor_object = [
        t
        for t in engine.query(
            database, Term("transformer_object", None, None, None), subcall=True
        )
    ]
    # TODO: Handle probabilistic terms in transformers!
    # If we succeed, we retrieve the previously trained object.
    # If not, we train a new one
    for r in res_predictor_object:
        if term2str(scope) == r[0].functor and r[1].functor == source_columns:
            problog_obj = r[2]
            source_columns = r[1].functor

            transformer_term = Term("transformer", problog_obj)
            source_terms = [Term("source", problog_obj, s) for s in source_columns]
            return [transformer_term] + source_terms, problog_obj

    table_cell_term_list = [
        t[1]
        for t in engine.query(database, Term("':'", scope, None), subcall=True)
        if t[1].functor == "table_cell"
    ]

    relevant_table = [
        t for t in table_cell_term_list if t.args[0] == source_columns[0].args[0]
    ]

    matrix = cells_to_matrix(relevant_table)
    src_cols = [s.args[1].value for s in source_columns]

    transformer.fit(matrix[:, src_cols])

    # We add the new predictor in the database to be able to retrieve it in future calls
    database.add_fact(
        Term("transformer_object", scope, Object(source_columns), problog_obj)
    )

    transformer_term = Term("transformer", problog_obj)
    source_terms = [Term("source", problog_obj, s) for s in source_columns]

    return [transformer_term] + source_terms, problog_obj
Пример #9
0
def mercs(scope, source_columns, **kwargs):

    # Preliminaries
    engine = kwargs["engine"]
    database = kwargs["database"]

    def short_str(_self):
        return "MERCS({})".format(id(_self))

    MERCS.__repr__ = short_str
    MERCS.__str__ = short_str

    # Verify whether or not a MERCS model already exists with these exact same parameters
    res_predictor_object = [
        t for t in engine.query(
            database, Term("predictor_object", None, None, None), subcall=True)
    ]

    # If found, return the existing object. If not, create a predictor.
    for r in res_predictor_object:
        if term2str(scope) == r[0].functor and r[1].functor == source_columns:
            mercs_problog_object = r[2]
            source_columns = r[1].functor

            predictor_term = Term("predictor", mercs_problog_object)
            mercs_term = Term("mercs", mercs_problog_object)
            target_terms = [
                Term("target", mercs_problog_object, t) for t in source_columns
            ]
            source_terms = [
                Term("source", mercs_problog_object, s) for s in source_columns
            ]
            return [predictor_term] + [mercs_term
                                       ] + source_terms + target_terms

    # Getting input data
    table_cell_term_list = [
        t[1]
        for t in engine.query(database, Term("':'", scope, None), subcall=True)
        if t[1].functor == "table_cell"
    ]

    relevant_table = [
        t for t in table_cell_term_list
        if t.args[0] == source_columns[0].args[0]
    ]

    # Filter data
    matrix = cells_to_matrix(relevant_table)
    src_cols = [s.args[1].value for s in source_columns]
    matrix = matrix[:, src_cols]

    # Train a MERCS model
    clf = MERCS()

    data = pd.DataFrame(matrix)  # MERCS still needs this (elia: I'm so sorry)
    clf.fit(data)

    mercs_problog_object = Object(clf)

    # We add the new predictor in the database to be able to retrieve it in future calls
    database.add_fact(
        Term("predictor_object", scope, Object(source_columns),
             mercs_problog_object))

    predictor_term = Term("predictor", mercs_problog_object)
    mercs_term = Term("mercs", mercs_problog_object)
    target_terms = [
        Term("target", mercs_problog_object, t) for t in source_columns
    ]
    source_terms = [
        Term("source", mercs_problog_object, s) for s in source_columns
    ]

    # Whitebox
    dt_terms = []
    for dt, dt_code in zip(clf.m_list, clf.m_codes):

        def short_str(_self):
            return "DT({})".format(id(_self))

        DecisionTreeRegressor.__str__ = short_str
        DecisionTreeRegressor.__repr__ = short_str
        DecisionTreeClassifier.__str__ = short_str
        DecisionTreeClassifier.__repr__ = short_str

        # dt.__str__ = short_str
        # dt.__repr__ = short_str

        dt_problog_object = Object(dt)
        dt_predictor_term = Term("predictor", dt_problog_object)
        decision_tree_term = Term("decision_tree", dt_problog_object)

        dt_source_columns = [
            x for i, x in enumerate(source_columns) if dt_code[i] == 0
        ]
        dt_target_columns = [
            x for i, x in enumerate(source_columns) if dt_code[i] == 1
        ]

        dt_target_terms = [
            Term("target", dt_problog_object, t) for t in dt_target_columns
        ]
        dt_source_terms = [
            Term("source", dt_problog_object, s) for s in dt_source_columns
        ]

        dt_terms.append(dt_predictor_term)
        dt_terms.append(decision_tree_term)
        dt_terms.extend(dt_target_terms)
        dt_terms.extend(dt_source_terms)

        database.add_fact(
            Term(
                "predictor_object",
                scope,
                Object(dt_source_terms),
                Object(dt_target_terms),
                dt_problog_object,
            ))

    return [predictor_term] + source_terms + target_terms + [mercs_term
                                                             ] + dt_terms