示例#1
0
    def _check_rules_and_predictions(self, dataset, expected_string_rules):
        expected_string_rules = [
            s.strip() for s in expected_string_rules.strip().split("\n")
            if len(s) > 0
        ]

        expected_rules = []
        for string_rule in expected_string_rules:
            expected_rules.append(
                Rule.from_string(string_rule, dataset.features,
                                 dataset.targets))

        #eprint(expected_rules)

        output = PRIDE.fit(dataset)

        #eprint(output)

        #for r in expected_rules:
        #    if r not in output:
        #        eprint("Missing rule: ", r)
        #    self.assertTrue(r in output)

        for r in output:
            if r not in expected_rules:
                eprint("Additional rule: ", r)
            self.assertTrue(r in expected_rules)

        model = DMVLP(dataset.features, dataset.targets, output)

        expected = set((tuple(s1), tuple(s2)) for s1, s2 in dataset.data)
        predicted = set()

        for s1 in model.feature_states():
            prediction = model.predict([s1])
            for s2 in prediction[tuple(s1)]:
                predicted.add((tuple(s1), tuple(s2)))

        eprint()
        done = 0
        for s1, s2 in expected:
            done += 1
            eprint("\rChecking transitions ", done, "/", len(expected), end='')
            self.assertTrue((s1, s2) in predicted)

        done = 0
        for s1, s2 in predicted:
            done += 1
            eprint("\rChecking transitions ",
                   done,
                   "/",
                   len(predicted),
                   end='')
            self.assertTrue((s1, s2) in expected)
示例#2
0
def random_rule(head_var_id, head_val_id, features, targets, size=None):
    body = []
    conditions = []
    nb_conditions = random.randint(0, (len(features)))
    if size is not None:
        nb_conditions = size
    while len(body) < nb_conditions:
        var = random.randint(0, len(features) - 1)
        val = random.randint(0, len(features[var][1]) - 1)
        if var not in conditions:
            body.append((var, val))
            conditions.append(var)
    return Rule(head_var_id, head_val_id, len(features), body)
示例#3
0
def random_constraint(nb_features, nb_targets, nb_values, max_body_size):
    head_var = -1
    head_val = -1
    body = []
    conditions = []
    nb_conditions = random.randint(0, max_body_size)
    while len(body) < nb_conditions:
        var = random.randint(0, nb_features + nb_targets - 1)
        val = random.randint(0, nb_values - 1)
        if var not in conditions:
            body.append((var, val))
            conditions.append(var)
    r = Rule(head_var, head_val, nb_features + nb_targets, body)

    return r
示例#4
0
    def _check_rules(self, model, expected_string_rules):
        expected_string_rules = [s.strip() for s in expected_string_rules.strip().split("\n") if len(s.strip()) > 0 ]

        expected_rules = []
        for string_rule in expected_string_rules:
            expected_rules.append(Rule.from_string(string_rule, model.features, model.targets))

        for r in expected_rules:
            if r not in model.rules:
                eprint("Missing rule: ", r.logic_form(model.features, model.targets), " (", r.to_string(),")")
            self.assertTrue(r in model.rules)

        for r in model.rules:
            if r not in expected_rules:
                eprint("Additional rule: ", r.logic_form(model.features, model.targets), " (", r.to_string(),")")
            self.assertTrue(r in expected_rules)
示例#5
0
    def test_next(self):
        print(">> pylfit.semantics.Synchronous.next(feature_state, targets, rules)")

        # Unit test
        data = [ \
        ([0,0,0],[0,0,1]), \
        ([0,0,0],[1,0,0]), \
        ([1,0,0],[0,0,0]), \
        ([0,1,0],[1,0,1]), \
        ([0,0,1],[0,0,1]), \
        ([1,1,0],[1,0,0]), \
        ([1,0,1],[0,1,0]), \
        ([0,1,1],[1,0,1]), \
        ([1,1,1],[1,1,0])]
        feature_names=["p_t-1","q_t-1","r_t-1"]
        target_names=["p_t","q_t","r_t"]

        dataset = pylfit.preprocessing.transitions_dataset_from_array(data=data, feature_names=feature_names, target_names=target_names)

        model = DMVLP(features=dataset.features, targets=dataset.targets)
        model.compile(algorithm="gula")
        model.fit(dataset=dataset)

        feature_state = Algorithm.encode_state([0,0,0], model.features)
        self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules)]), set([(1,0,0), (0,0,0), (0, 0, 1), (1,0,1)]))
        feature_state = Algorithm.encode_state([1,1,1], model.features)
        self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules)]), set([(1,1,0)]))
        feature_state = Algorithm.encode_state([0,1,0], model.features)
        self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules)]), set([(1,0,1)]))

        # incomplete program, semantics with default
        model = DMVLP(features=dataset.features, targets=dataset.targets)
        rules = [
        "p_t(1) :- q_t-1(1)",
        "q_t(1) :- p_t-1(1), r_t-1(1)",
        "r_t(1) :- p_t-1(0)"]
        model.rules = [Rule.from_string(s, model.features, model.targets) for s in rules]
        default = [("p_t", [0]), ("q_t", [0]), ("r_t", [0])]

        # with default
        feature_state = Algorithm.encode_state([1,1,1], model.features)
        self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules, default)]), set([(1,1,0)]))

        # Default to unknow
        feature_state = Algorithm.encode_state([1,1,1], model.features)
        self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules, None)]), set([(1,1,-1)]))

        # Random tests
        for i in range(self._nb_tests):
            model = random_DMVLP( \
            nb_features=random.randint(1,self._nb_features), \
            nb_targets=random.randint(1,self._nb_targets), \
            max_feature_values=self._nb_feature_values, \
            max_target_values=self._nb_target_values, \
            algorithm="pride")

            feature_state = random.choice(model.feature_states())
            feature_state = Algorithm.encode_state(feature_state, model.features)

            output = Synchronous.next(feature_state, model.targets, model.rules, default=None)

            domains = [set() for var in model.targets]

            # extract conclusion of all matching rules
            for r in model.rules:
                if(r.matches(feature_state)):
                    domains[r.head_variable].add(r.head_value)

            # Check variables without next value
            for i,domain in enumerate(domains):
                if len(domain) == 0:
                    domains[i] = [-1]

            # generate all combination of domains
            expected = [list(i) for i in list(itertools.product(*domains))]

            target_states = output.keys()
            expected = [tuple(i) for i in expected]

            for s2 in target_states:
                self.assertTrue(s2 in expected)

            for s2 in expected:
                self.assertTrue(s2 in target_states)

            for state, rules in output.items():
                for r in rules:
                    self.assertTrue(r.matches(feature_state))
                    self.assertEqual(r.head_value,state[r.head_variable])
                    self.assertEqual([],[r for r in model.rules if r not in rules and r.matches(feature_state) and r.head_value == state[r.head_variable]])
    def _check_rules_and_predictions(self, dataset, expected_string_rules,
                                     expected_string_constraints):
        expected_string_rules = [
            s.strip() for s in expected_string_rules.strip().split("\n")
            if len(s) > 0
        ]
        expected_string_constraints = [
            s.strip() for s in expected_string_constraints.strip().split("\n")
            if len(s) > 0
        ]

        expected_rules = []
        for string_rule in expected_string_rules:
            expected_rules.append(
                Rule.from_string(string_rule, dataset.features,
                                 dataset.targets))

        expected_constraints = []
        for string_constraint in expected_string_constraints:
            expected_constraints.append(
                Rule.from_string(string_constraint, dataset.features,
                                 dataset.targets))

        #eprint(expected_rules)

        rules, constraints = Synchronizer.fit(dataset)

        #eprint(output)

        for r in expected_rules:
            if r not in rules:
                eprint("Missing rule: ", r)
            self.assertTrue(r in rules)

        for r in rules:
            if r not in expected_rules:
                eprint("Additional rule: ", r)
            self.assertTrue(r in expected_rules)

        for r in expected_constraints:
            if r not in constraints:
                eprint("Missing constraint: ", r)
            self.assertTrue(r in constraints)

        for r in constraints:
            if r not in expected_constraints:
                eprint("Additional constraint: ", r)
            self.assertTrue(r in constraints)

        model = CDMVLP(dataset.features, dataset.targets, rules, constraints)

        #model.compile("synchronizer")
        #model.summary()

        expected = set((tuple(s1), tuple(s2)) for s1, s2 in dataset.data)

        predicted = model.predict(model.feature_states())
        predicted = set(
            (tuple(s1), tuple(s2)) for (s1, S2) in predicted for s2 in S2)

        eprint()
        done = 0
        for s1, s2 in expected:
            done += 1
            eprint("\rChecking transitions ", done, "/", len(expected), end='')
            self.assertTrue((s1, s2) in predicted)

        done = 0
        for s1, s2 in predicted:
            done += 1
            eprint("\rChecking transitions ",
                   done,
                   "/",
                   len(predicted),
                   end='')
            self.assertTrue((s1, s2) in expected)
示例#7
0
def evaluate_explanation_on_bn_benchmark(algorithm,
                                         benchmark,
                                         expected_model,
                                         run_tests,
                                         train_size,
                                         mode,
                                         benchmark_name,
                                         semantics_name,
                                         full_transitions=None):
    """
        Evaluate accuracy of an algorithm
        over a given benchmark with a given number/proporsion
        of training samples.

        Args:
            algorithm: Class
                Class of the algorithm to be tested
            benchmark: DMVLP
                benchmark model to be tested
            expected_model: WDMVLP
                optimal WDMVLP that model the transitions of the benchmark.
            train_size: float in [0,1] or int
                Size of the training set in proportion (float in [0,1])
            mode: string
                "all_from_init_states": training contains all transitions from its initials states
                "random": training contains random transitions, 80%/20% train/test then train is reduced to train_size
            benchmark_name: string
                for csv output.
            benchmark_name: string
                for csv output.
        Returns:
        train_set_size: int
        test_set_size: int
        accuracy: float
            Average accuracy score.
        csv_output: String
            csv string format of all tests run statistiques.
    """
    csv_output = ""

    # 0) Extract logic program
    #-----------------------
    #eprint(benchmark.to_string())

    # 1) Generate transitions
    #-------------------------------------

    # Boolean network benchmarks only have rules for value 1, if none match next value is 0
    #default = [[0] for v in benchmark.targets]
    if full_transitions is None:
        eprint(">>> Generating benchmark transitions...")
        full_transitions = [
            (np.array(feature_state),
             np.array(["0" if x == "?" else "1" for x in target_state]))
            for feature_state in program.feature_states()
            for target_state in program.predict([feature_state], semantics)[
                tuple(feature_state)]
        ]
    full_transitions_grouped = {
        tuple(s1): set(
            tuple(s2_) for s1_, s2_ in full_transitions
            if tuple(s1) == tuple(s1_))
        for s1, s2 in full_transitions
    }
    #eprint("Transitions: ", full_transitions)
    #eprint("Grouped: ", full_transitions_grouped)

    #eprint(benchmark.to_string())
    #eprint(semantics.states(P))
    #eprint(full_transitions)

    # 2) Prepare scores containers
    #---------------------------
    results_time = []
    results_score = []

    # 3) Average over several tests
    #-----------------------------
    for run in range(run_tests):

        # 3.1 Split train/test sets on initial states
        #----------------------------------------------
        all_feature_states = list(full_transitions_grouped.keys())
        random.shuffle(all_feature_states)

        # Test set: all transition from last 20% feature states
        test_begin = max(1, int(0.8 * len(all_feature_states)))
        test_feature_states = all_feature_states[test_begin:]

        test = []
        for s1 in test_feature_states:
            test.extend([(list(s1), list(s2))
                         for s2 in full_transitions_grouped[s1]])
        random.shuffle(test)

        # Train set
        # All transition from first train_size % feature states (over 80% include some test set part)
        if mode == "all_from_init_states":
            train_end = max(1, int(train_size * len(all_feature_states)))
            train_feature_states = all_feature_states[:train_end]
            train = []
            for s1 in train_feature_states:
                train.extend([(list(s1), list(s2))
                              for s2 in full_transitions_grouped[s1]])
            random.shuffle(train)
        # Random train_size % of transitions from the feature states not in test set
        elif mode == "random_transitions":
            train_feature_states = all_feature_states[:test_begin]
            train = []
            for s1 in train_feature_states:
                train.extend([(list(s1), list(s2))
                              for s2 in full_transitions_grouped[s1]])
            random.shuffle(train)
            train_end = int(max(1, train_size * len(train)))
            train = train[:train_end]
        else:
            raise ValueError("Wrong mode requested")

        #eprint("train: ", train)
        #eprint("test: ", test)
        #exit()

        # DBG
        if run == 0:
            eprint(">>> Start Training on " + str(len(train)) + "/" +
                   str(len(full_transitions)) + " transitions (" +
                   str(round(100 * len(train) / len(full_transitions), 2)) +
                   "%)")

        eprint(">>>> run: " + str(run + 1) + "/" + str(run_tests), end='')

        train_dataset = StateTransitionsDataset([(np.array(s1), np.array(s2))
                                                 for (s1, s2) in train],
                                                benchmark.features,
                                                benchmark.targets)

        # 3.2) Learn from training set
        #------------------------------------------

        if algorithm == "gula" or algorithm == "pride":
            # possibilities
            start = time.time()
            model = WDMVLP(features=benchmark.features,
                           targets=benchmark.targets)
            model.compile(algorithm=algorithm)
            model.fit(dataset=train_dataset)
            #model = algorithm.fit(train, benchmark.features, benchmark.targets, supported_only=True)
            end = time.time()

            results_time.append(round(end - start, 3))

        # 3.4) Evaluate on accuracy of domain prediction on test set
        #------------------------------------------------------------
        test_dataset = StateTransitionsDataset([(np.array(s1), np.array(s2))
                                                for s1, s2 in test],
                                               benchmark.features,
                                               benchmark.targets)

        # csv format of results
        expected_train_size = train_size
        expected_test_size = 0.2
        real_train_size = round(len(train) / (len(full_transitions)), 2)
        real_test_size = round(len(test) / (len(full_transitions)), 2)

        if mode == "random_transitions":
            expected_train_size = round(train_size * 0.8, 2)

        common_settings = \
        semantics_name + "," +\
        benchmark_name + "," +\
        str(len(benchmark.features)) + "," +\
        str(len(full_transitions)) + "," +\
        mode + "," +\
        str(expected_train_size) + "," +\
        str(expected_test_size) + "," +\
        str(real_train_size) + "," +\
        str(real_test_size) + "," +\
        str(len(train)) + "," +\
        str(len(test))

        if algorithm == "gula" or algorithm == "pride":
            score = explanation_score(model=model,
                                      expected_model=expected_model,
                                      dataset=test_dataset)
            print(algorithm + "," + common_settings + "," + str(score))
            results_score.append(score)
            eprint(" explanation score: " + str(round(score * 100, 2)) + "%")

        if algorithm == "baseline":
            eprint()

            # Perfect prediction random rule
            predictions = {tuple(s1): {variable: {value: (proba, \
            (int(proba*100), random_rule(var_id,val_id,test_dataset.features,test_dataset.targets)),\
            (100 - int(proba*100), random_rule(var_id,val_id,test_dataset.features,test_dataset.targets)) )\
            for val_id, value in enumerate(values) for proba in [int(val_id in set(test_dataset.targets[var_id][1].index(s2[var_id]) for s1_, s2 in test_dataset.data if tuple(s1_)==s1))]}\
            for var_id, (variable, values) in enumerate(test_dataset.targets)}\
            for s1 in test_feature_states}

            score = explanation_score_from_predictions(
                predictions=predictions,
                expected_model=expected_model,
                dataset=test_dataset)
            print("baseline_perfect_predictions_random_rules," +
                  common_settings + "," + str(score))
            eprint(">>>>> explanation score: " + str(round(score * 100, 2)) +
                   "% (baseline_perfect_predictions_random_rules)")

            # Perfect prediction empty_program":
            predictions = {tuple(s1): {variable: {value: (proba, \
            (int(proba*100), None),\
            (100 - int(proba*100), None) )\
            for val_id, value in enumerate(values) for proba in [int(val_id in set(test_dataset.targets[var_id][1].index(s2[var_id]) for s1_, s2 in test_dataset.data if tuple(s1_)==s1))]}\
            for var_id, (variable, values) in enumerate(test_dataset.targets)}\
            for s1 in test_feature_states}

            score = explanation_score_from_predictions(
                predictions=predictions,
                expected_model=expected_model,
                dataset=test_dataset)
            print("baseline_perfect_predictions_no_rules," + common_settings +
                  "," + str(score))
            eprint(">>>>> explanation score: " + str(round(score * 100, 2)) +
                   "% (baseline_perfect_predictions_no_rules)")

            # Perfect prediction most general rule
            predictions = {tuple(s1): {variable: {value: (proba, \
            (int(proba*100), Rule(var_id, val_id, len(test_dataset.features))),\
            (100 - int(proba*100), Rule(var_id, val_id, len(test_dataset.features))) )\
            for val_id, value in enumerate(values) for proba in [int(val_id in set(test_dataset.targets[var_id][1].index(s2[var_id]) for s1_, s2 in test_dataset.data if tuple(s1_)==s1))]}\
            for var_id, (variable, values) in enumerate(test_dataset.targets)}\
            for s1 in test_feature_states}

            score = explanation_score_from_predictions(
                predictions=predictions,
                expected_model=expected_model,
                dataset=test_dataset)
            print("baseline_perfect_predictions_most_general_rules," +
                  common_settings + "," + str(score))
            eprint(">>>>> explanation score: " + str(round(score * 100, 2)) +
                   "% (baseline_perfect_predictions_most_general_rules)")

            # Perfect prediction most specific rule:
            predictions = {tuple(s1): {variable: {value: (proba, \
            (int(proba*100), most_specific_matching_rule),\
            (100 - int(proba*100), most_specific_matching_rule) )\
            for val_id, value in enumerate(values)\
            for proba in [int(val_id in set(test_dataset.targets[var_id][1].index(s2[var_id]) for s1_, s2 in test_dataset.data if tuple(s1_)==s1))] \
            for most_specific_matching_rule in [Rule(var_id,val_id,len(test_dataset.features),[(cond_var,cond_val) for cond_var,cond_val in enumerate(GULA.encode_state(s1,test_dataset.features))])]}\
            for var_id, (variable, values) in enumerate(test_dataset.targets)}\
            for s1 in test_feature_states}

            score = explanation_score_from_predictions(
                predictions=predictions,
                expected_model=expected_model,
                dataset=test_dataset)
            print("baseline_perfect_predictions_most_specific_rules," +
                  common_settings + "," + str(score))
            eprint(">>>>> explanation score: " + str(round(score * 100, 2)) +
                   "% (baseline_perfect_predictions_most_specific_rules)")

            # Random prediction

            # random prediction and rules
            #predictions = {tuple(s1): {variable: {value: (proba, \
            #(int(proba*100), random_rule(var_id,val_id,test_dataset.features,test_dataset.targets)),\
            #(100 - int(proba*100), random_rule(var_id,val_id,test_dataset.features,test_dataset.targets)) )\
            #for val_id, value in enumerate(values) for proba in [round(random.uniform(0.0,1.0),2)]}\
            #for var_id, (variable, values) in enumerate(test_dataset.targets)}\
            #for s1 in test_feature_states}

            #score = explanation_score_from_predictions(predictions=predictions, expected_model=expected_model, dataset=test_dataset)
            #print("baseline_random_predictions_random_rules," + common_settings + "," + str(score))
            #eprint(">>>>> explanation score: " + str(round(score * 100,2)) + "% (baseline_random_predictions_random_rules)")

            # empty_program":
            #predictions = {tuple(s1): {variable: {value: (proba, \
            #(int(proba*100), None),\
            #(100 - int(proba*100), None) )\
            #for val_id, value in enumerate(values) for proba in [round(random.uniform(0.0,1.0),2)]}\
            #for var_id, (variable, values) in enumerate(test_dataset.targets)}\
            #for s1 in test_feature_states}

            #score = explanation_score_from_predictions(predictions=predictions, expected_model=expected_model, dataset=test_dataset)
            #print("baseline_random_predictions_no_rules," + common_settings + "," + str(score))
            #eprint(">>>>> explanation score: " + str(round(score * 100,2)) + "% (baseline_random_predictions_no_rules)")

            # random prediction and most general rule
            #predictions = {tuple(s1): {variable: {value: (proba, \
            #(int(proba*100), Rule(var_id, val_id, len(test_dataset.features))),\
            #(100 - int(proba*100), Rule(var_id, val_id, len(test_dataset.features))) )\
            #for val_id, value in enumerate(values) for proba in [round(random.uniform(0.0,1.0),2)]}\
            #for var_id, (variable, values) in enumerate(test_dataset.targets)}\
            #for s1 in test_feature_states}

            #score = explanation_score_from_predictions(predictions=predictions, expected_model=expected_model, dataset=test_dataset)
            #print("baseline_random_predictions_most_general_rules," + common_settings + "," + str(score))
            #eprint(">>>>> explanation score: " + str(round(score * 100,2)) + "% (baseline_random_predictions_most_general_rules)")

            # random prediction and most specific rule:
            #predictions = {tuple(s1): {variable: {value: (proba, \
            #(int(proba*100), most_specific_matching_rule),\
            #(100 - int(proba*100), most_specific_matching_rule) )\
            #for val_id, value in enumerate(values)\
            #for proba in [round(random.uniform(0.0,1.0),2)] \
            #for most_specific_matching_rule in [Rule(var_id,val_id,len(test_dataset.features),[(cond_var,cond_val) for cond_var,cond_val in enumerate(GULA.encode_state(s1,test_dataset.features))])]}\
            #for var_id, (variable, values) in enumerate(test_dataset.targets)}\
            #for s1 in test_feature_states}

            #score = explanation_score_from_predictions(predictions=predictions, expected_model=expected_model, dataset=test_dataset)
            #print("baseline_random_predictions_most_specific_rules," + common_settings + "," + str(score))
            #eprint(">>>>> explanation score: " + str(round(score * 100,2)) + "% (baseline_random_predictions_most_specific_rules)")

    # 4) Average scores
    #-------------------
    if algorithm in ["gula", "pride"]:
        score = sum(results_score) / run_tests
        #run_time = sum(results_time) / run_tests
        eprint(">>> AVG explanation score: " + str(round(score * 100, 2)) +
               "%")