def _check_rules_and_predictions(self, dataset, expected_string_rules): expected_string_rules = [ s.strip() for s in expected_string_rules.strip().split("\n") if len(s) > 0 ] expected_rules = [] for string_rule in expected_string_rules: expected_rules.append( Rule.from_string(string_rule, dataset.features, dataset.targets)) #eprint(expected_rules) output = PRIDE.fit(dataset) #eprint(output) #for r in expected_rules: # if r not in output: # eprint("Missing rule: ", r) # self.assertTrue(r in output) for r in output: if r not in expected_rules: eprint("Additional rule: ", r) self.assertTrue(r in expected_rules) model = DMVLP(dataset.features, dataset.targets, output) expected = set((tuple(s1), tuple(s2)) for s1, s2 in dataset.data) predicted = set() for s1 in model.feature_states(): prediction = model.predict([s1]) for s2 in prediction[tuple(s1)]: predicted.add((tuple(s1), tuple(s2))) eprint() done = 0 for s1, s2 in expected: done += 1 eprint("\rChecking transitions ", done, "/", len(expected), end='') self.assertTrue((s1, s2) in predicted) done = 0 for s1, s2 in predicted: done += 1 eprint("\rChecking transitions ", done, "/", len(predicted), end='') self.assertTrue((s1, s2) in expected)
def random_rule(head_var_id, head_val_id, features, targets, size=None): body = [] conditions = [] nb_conditions = random.randint(0, (len(features))) if size is not None: nb_conditions = size while len(body) < nb_conditions: var = random.randint(0, len(features) - 1) val = random.randint(0, len(features[var][1]) - 1) if var not in conditions: body.append((var, val)) conditions.append(var) return Rule(head_var_id, head_val_id, len(features), body)
def random_constraint(nb_features, nb_targets, nb_values, max_body_size): head_var = -1 head_val = -1 body = [] conditions = [] nb_conditions = random.randint(0, max_body_size) while len(body) < nb_conditions: var = random.randint(0, nb_features + nb_targets - 1) val = random.randint(0, nb_values - 1) if var not in conditions: body.append((var, val)) conditions.append(var) r = Rule(head_var, head_val, nb_features + nb_targets, body) return r
def _check_rules(self, model, expected_string_rules): expected_string_rules = [s.strip() for s in expected_string_rules.strip().split("\n") if len(s.strip()) > 0 ] expected_rules = [] for string_rule in expected_string_rules: expected_rules.append(Rule.from_string(string_rule, model.features, model.targets)) for r in expected_rules: if r not in model.rules: eprint("Missing rule: ", r.logic_form(model.features, model.targets), " (", r.to_string(),")") self.assertTrue(r in model.rules) for r in model.rules: if r not in expected_rules: eprint("Additional rule: ", r.logic_form(model.features, model.targets), " (", r.to_string(),")") self.assertTrue(r in expected_rules)
def test_next(self): print(">> pylfit.semantics.Synchronous.next(feature_state, targets, rules)") # Unit test data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0,0]), \ ([1,0,0],[0,0,0]), \ ([0,1,0],[1,0,1]), \ ([0,0,1],[0,0,1]), \ ([1,1,0],[1,0,0]), \ ([1,0,1],[0,1,0]), \ ([0,1,1],[1,0,1]), \ ([1,1,1],[1,1,0])] feature_names=["p_t-1","q_t-1","r_t-1"] target_names=["p_t","q_t","r_t"] dataset = pylfit.preprocessing.transitions_dataset_from_array(data=data, feature_names=feature_names, target_names=target_names) model = DMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm="gula") model.fit(dataset=dataset) feature_state = Algorithm.encode_state([0,0,0], model.features) self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules)]), set([(1,0,0), (0,0,0), (0, 0, 1), (1,0,1)])) feature_state = Algorithm.encode_state([1,1,1], model.features) self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules)]), set([(1,1,0)])) feature_state = Algorithm.encode_state([0,1,0], model.features) self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules)]), set([(1,0,1)])) # incomplete program, semantics with default model = DMVLP(features=dataset.features, targets=dataset.targets) rules = [ "p_t(1) :- q_t-1(1)", "q_t(1) :- p_t-1(1), r_t-1(1)", "r_t(1) :- p_t-1(0)"] model.rules = [Rule.from_string(s, model.features, model.targets) for s in rules] default = [("p_t", [0]), ("q_t", [0]), ("r_t", [0])] # with default feature_state = Algorithm.encode_state([1,1,1], model.features) self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules, default)]), set([(1,1,0)])) # Default to unknow feature_state = Algorithm.encode_state([1,1,1], model.features) self.assertEqual(set([tuple(s) for s in Synchronous.next(feature_state, model.targets, model.rules, None)]), set([(1,1,-1)])) # Random tests for i in range(self._nb_tests): model = random_DMVLP( \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values, \ algorithm="pride") feature_state = random.choice(model.feature_states()) feature_state = Algorithm.encode_state(feature_state, model.features) output = Synchronous.next(feature_state, model.targets, model.rules, default=None) domains = [set() for var in model.targets] # extract conclusion of all matching rules for r in model.rules: if(r.matches(feature_state)): domains[r.head_variable].add(r.head_value) # Check variables without next value for i,domain in enumerate(domains): if len(domain) == 0: domains[i] = [-1] # generate all combination of domains expected = [list(i) for i in list(itertools.product(*domains))] target_states = output.keys() expected = [tuple(i) for i in expected] for s2 in target_states: self.assertTrue(s2 in expected) for s2 in expected: self.assertTrue(s2 in target_states) for state, rules in output.items(): for r in rules: self.assertTrue(r.matches(feature_state)) self.assertEqual(r.head_value,state[r.head_variable]) self.assertEqual([],[r for r in model.rules if r not in rules and r.matches(feature_state) and r.head_value == state[r.head_variable]])
def _check_rules_and_predictions(self, dataset, expected_string_rules, expected_string_constraints): expected_string_rules = [ s.strip() for s in expected_string_rules.strip().split("\n") if len(s) > 0 ] expected_string_constraints = [ s.strip() for s in expected_string_constraints.strip().split("\n") if len(s) > 0 ] expected_rules = [] for string_rule in expected_string_rules: expected_rules.append( Rule.from_string(string_rule, dataset.features, dataset.targets)) expected_constraints = [] for string_constraint in expected_string_constraints: expected_constraints.append( Rule.from_string(string_constraint, dataset.features, dataset.targets)) #eprint(expected_rules) rules, constraints = Synchronizer.fit(dataset) #eprint(output) for r in expected_rules: if r not in rules: eprint("Missing rule: ", r) self.assertTrue(r in rules) for r in rules: if r not in expected_rules: eprint("Additional rule: ", r) self.assertTrue(r in expected_rules) for r in expected_constraints: if r not in constraints: eprint("Missing constraint: ", r) self.assertTrue(r in constraints) for r in constraints: if r not in expected_constraints: eprint("Additional constraint: ", r) self.assertTrue(r in constraints) model = CDMVLP(dataset.features, dataset.targets, rules, constraints) #model.compile("synchronizer") #model.summary() expected = set((tuple(s1), tuple(s2)) for s1, s2 in dataset.data) predicted = model.predict(model.feature_states()) predicted = set( (tuple(s1), tuple(s2)) for (s1, S2) in predicted for s2 in S2) eprint() done = 0 for s1, s2 in expected: done += 1 eprint("\rChecking transitions ", done, "/", len(expected), end='') self.assertTrue((s1, s2) in predicted) done = 0 for s1, s2 in predicted: done += 1 eprint("\rChecking transitions ", done, "/", len(predicted), end='') self.assertTrue((s1, s2) in expected)
def evaluate_explanation_on_bn_benchmark(algorithm, benchmark, expected_model, run_tests, train_size, mode, benchmark_name, semantics_name, full_transitions=None): """ Evaluate accuracy of an algorithm over a given benchmark with a given number/proporsion of training samples. Args: algorithm: Class Class of the algorithm to be tested benchmark: DMVLP benchmark model to be tested expected_model: WDMVLP optimal WDMVLP that model the transitions of the benchmark. train_size: float in [0,1] or int Size of the training set in proportion (float in [0,1]) mode: string "all_from_init_states": training contains all transitions from its initials states "random": training contains random transitions, 80%/20% train/test then train is reduced to train_size benchmark_name: string for csv output. benchmark_name: string for csv output. Returns: train_set_size: int test_set_size: int accuracy: float Average accuracy score. csv_output: String csv string format of all tests run statistiques. """ csv_output = "" # 0) Extract logic program #----------------------- #eprint(benchmark.to_string()) # 1) Generate transitions #------------------------------------- # Boolean network benchmarks only have rules for value 1, if none match next value is 0 #default = [[0] for v in benchmark.targets] if full_transitions is None: eprint(">>> Generating benchmark transitions...") full_transitions = [ (np.array(feature_state), np.array(["0" if x == "?" else "1" for x in target_state])) for feature_state in program.feature_states() for target_state in program.predict([feature_state], semantics)[ tuple(feature_state)] ] full_transitions_grouped = { tuple(s1): set( tuple(s2_) for s1_, s2_ in full_transitions if tuple(s1) == tuple(s1_)) for s1, s2 in full_transitions } #eprint("Transitions: ", full_transitions) #eprint("Grouped: ", full_transitions_grouped) #eprint(benchmark.to_string()) #eprint(semantics.states(P)) #eprint(full_transitions) # 2) Prepare scores containers #--------------------------- results_time = [] results_score = [] # 3) Average over several tests #----------------------------- for run in range(run_tests): # 3.1 Split train/test sets on initial states #---------------------------------------------- all_feature_states = list(full_transitions_grouped.keys()) random.shuffle(all_feature_states) # Test set: all transition from last 20% feature states test_begin = max(1, int(0.8 * len(all_feature_states))) test_feature_states = all_feature_states[test_begin:] test = [] for s1 in test_feature_states: test.extend([(list(s1), list(s2)) for s2 in full_transitions_grouped[s1]]) random.shuffle(test) # Train set # All transition from first train_size % feature states (over 80% include some test set part) if mode == "all_from_init_states": train_end = max(1, int(train_size * len(all_feature_states))) train_feature_states = all_feature_states[:train_end] train = [] for s1 in train_feature_states: train.extend([(list(s1), list(s2)) for s2 in full_transitions_grouped[s1]]) random.shuffle(train) # Random train_size % of transitions from the feature states not in test set elif mode == "random_transitions": train_feature_states = all_feature_states[:test_begin] train = [] for s1 in train_feature_states: train.extend([(list(s1), list(s2)) for s2 in full_transitions_grouped[s1]]) random.shuffle(train) train_end = int(max(1, train_size * len(train))) train = train[:train_end] else: raise ValueError("Wrong mode requested") #eprint("train: ", train) #eprint("test: ", test) #exit() # DBG if run == 0: eprint(">>> Start Training on " + str(len(train)) + "/" + str(len(full_transitions)) + " transitions (" + str(round(100 * len(train) / len(full_transitions), 2)) + "%)") eprint(">>>> run: " + str(run + 1) + "/" + str(run_tests), end='') train_dataset = StateTransitionsDataset([(np.array(s1), np.array(s2)) for (s1, s2) in train], benchmark.features, benchmark.targets) # 3.2) Learn from training set #------------------------------------------ if algorithm == "gula" or algorithm == "pride": # possibilities start = time.time() model = WDMVLP(features=benchmark.features, targets=benchmark.targets) model.compile(algorithm=algorithm) model.fit(dataset=train_dataset) #model = algorithm.fit(train, benchmark.features, benchmark.targets, supported_only=True) end = time.time() results_time.append(round(end - start, 3)) # 3.4) Evaluate on accuracy of domain prediction on test set #------------------------------------------------------------ test_dataset = StateTransitionsDataset([(np.array(s1), np.array(s2)) for s1, s2 in test], benchmark.features, benchmark.targets) # csv format of results expected_train_size = train_size expected_test_size = 0.2 real_train_size = round(len(train) / (len(full_transitions)), 2) real_test_size = round(len(test) / (len(full_transitions)), 2) if mode == "random_transitions": expected_train_size = round(train_size * 0.8, 2) common_settings = \ semantics_name + "," +\ benchmark_name + "," +\ str(len(benchmark.features)) + "," +\ str(len(full_transitions)) + "," +\ mode + "," +\ str(expected_train_size) + "," +\ str(expected_test_size) + "," +\ str(real_train_size) + "," +\ str(real_test_size) + "," +\ str(len(train)) + "," +\ str(len(test)) if algorithm == "gula" or algorithm == "pride": score = explanation_score(model=model, expected_model=expected_model, dataset=test_dataset) print(algorithm + "," + common_settings + "," + str(score)) results_score.append(score) eprint(" explanation score: " + str(round(score * 100, 2)) + "%") if algorithm == "baseline": eprint() # Perfect prediction random rule predictions = {tuple(s1): {variable: {value: (proba, \ (int(proba*100), random_rule(var_id,val_id,test_dataset.features,test_dataset.targets)),\ (100 - int(proba*100), random_rule(var_id,val_id,test_dataset.features,test_dataset.targets)) )\ for val_id, value in enumerate(values) for proba in [int(val_id in set(test_dataset.targets[var_id][1].index(s2[var_id]) for s1_, s2 in test_dataset.data if tuple(s1_)==s1))]}\ for var_id, (variable, values) in enumerate(test_dataset.targets)}\ for s1 in test_feature_states} score = explanation_score_from_predictions( predictions=predictions, expected_model=expected_model, dataset=test_dataset) print("baseline_perfect_predictions_random_rules," + common_settings + "," + str(score)) eprint(">>>>> explanation score: " + str(round(score * 100, 2)) + "% (baseline_perfect_predictions_random_rules)") # Perfect prediction empty_program": predictions = {tuple(s1): {variable: {value: (proba, \ (int(proba*100), None),\ (100 - int(proba*100), None) )\ for val_id, value in enumerate(values) for proba in [int(val_id in set(test_dataset.targets[var_id][1].index(s2[var_id]) for s1_, s2 in test_dataset.data if tuple(s1_)==s1))]}\ for var_id, (variable, values) in enumerate(test_dataset.targets)}\ for s1 in test_feature_states} score = explanation_score_from_predictions( predictions=predictions, expected_model=expected_model, dataset=test_dataset) print("baseline_perfect_predictions_no_rules," + common_settings + "," + str(score)) eprint(">>>>> explanation score: " + str(round(score * 100, 2)) + "% (baseline_perfect_predictions_no_rules)") # Perfect prediction most general rule predictions = {tuple(s1): {variable: {value: (proba, \ (int(proba*100), Rule(var_id, val_id, len(test_dataset.features))),\ (100 - int(proba*100), Rule(var_id, val_id, len(test_dataset.features))) )\ for val_id, value in enumerate(values) for proba in [int(val_id in set(test_dataset.targets[var_id][1].index(s2[var_id]) for s1_, s2 in test_dataset.data if tuple(s1_)==s1))]}\ for var_id, (variable, values) in enumerate(test_dataset.targets)}\ for s1 in test_feature_states} score = explanation_score_from_predictions( predictions=predictions, expected_model=expected_model, dataset=test_dataset) print("baseline_perfect_predictions_most_general_rules," + common_settings + "," + str(score)) eprint(">>>>> explanation score: " + str(round(score * 100, 2)) + "% (baseline_perfect_predictions_most_general_rules)") # Perfect prediction most specific rule: predictions = {tuple(s1): {variable: {value: (proba, \ (int(proba*100), most_specific_matching_rule),\ (100 - int(proba*100), most_specific_matching_rule) )\ for val_id, value in enumerate(values)\ for proba in [int(val_id in set(test_dataset.targets[var_id][1].index(s2[var_id]) for s1_, s2 in test_dataset.data if tuple(s1_)==s1))] \ for most_specific_matching_rule in [Rule(var_id,val_id,len(test_dataset.features),[(cond_var,cond_val) for cond_var,cond_val in enumerate(GULA.encode_state(s1,test_dataset.features))])]}\ for var_id, (variable, values) in enumerate(test_dataset.targets)}\ for s1 in test_feature_states} score = explanation_score_from_predictions( predictions=predictions, expected_model=expected_model, dataset=test_dataset) print("baseline_perfect_predictions_most_specific_rules," + common_settings + "," + str(score)) eprint(">>>>> explanation score: " + str(round(score * 100, 2)) + "% (baseline_perfect_predictions_most_specific_rules)") # Random prediction # random prediction and rules #predictions = {tuple(s1): {variable: {value: (proba, \ #(int(proba*100), random_rule(var_id,val_id,test_dataset.features,test_dataset.targets)),\ #(100 - int(proba*100), random_rule(var_id,val_id,test_dataset.features,test_dataset.targets)) )\ #for val_id, value in enumerate(values) for proba in [round(random.uniform(0.0,1.0),2)]}\ #for var_id, (variable, values) in enumerate(test_dataset.targets)}\ #for s1 in test_feature_states} #score = explanation_score_from_predictions(predictions=predictions, expected_model=expected_model, dataset=test_dataset) #print("baseline_random_predictions_random_rules," + common_settings + "," + str(score)) #eprint(">>>>> explanation score: " + str(round(score * 100,2)) + "% (baseline_random_predictions_random_rules)") # empty_program": #predictions = {tuple(s1): {variable: {value: (proba, \ #(int(proba*100), None),\ #(100 - int(proba*100), None) )\ #for val_id, value in enumerate(values) for proba in [round(random.uniform(0.0,1.0),2)]}\ #for var_id, (variable, values) in enumerate(test_dataset.targets)}\ #for s1 in test_feature_states} #score = explanation_score_from_predictions(predictions=predictions, expected_model=expected_model, dataset=test_dataset) #print("baseline_random_predictions_no_rules," + common_settings + "," + str(score)) #eprint(">>>>> explanation score: " + str(round(score * 100,2)) + "% (baseline_random_predictions_no_rules)") # random prediction and most general rule #predictions = {tuple(s1): {variable: {value: (proba, \ #(int(proba*100), Rule(var_id, val_id, len(test_dataset.features))),\ #(100 - int(proba*100), Rule(var_id, val_id, len(test_dataset.features))) )\ #for val_id, value in enumerate(values) for proba in [round(random.uniform(0.0,1.0),2)]}\ #for var_id, (variable, values) in enumerate(test_dataset.targets)}\ #for s1 in test_feature_states} #score = explanation_score_from_predictions(predictions=predictions, expected_model=expected_model, dataset=test_dataset) #print("baseline_random_predictions_most_general_rules," + common_settings + "," + str(score)) #eprint(">>>>> explanation score: " + str(round(score * 100,2)) + "% (baseline_random_predictions_most_general_rules)") # random prediction and most specific rule: #predictions = {tuple(s1): {variable: {value: (proba, \ #(int(proba*100), most_specific_matching_rule),\ #(100 - int(proba*100), most_specific_matching_rule) )\ #for val_id, value in enumerate(values)\ #for proba in [round(random.uniform(0.0,1.0),2)] \ #for most_specific_matching_rule in [Rule(var_id,val_id,len(test_dataset.features),[(cond_var,cond_val) for cond_var,cond_val in enumerate(GULA.encode_state(s1,test_dataset.features))])]}\ #for var_id, (variable, values) in enumerate(test_dataset.targets)}\ #for s1 in test_feature_states} #score = explanation_score_from_predictions(predictions=predictions, expected_model=expected_model, dataset=test_dataset) #print("baseline_random_predictions_most_specific_rules," + common_settings + "," + str(score)) #eprint(">>>>> explanation score: " + str(round(score * 100,2)) + "% (baseline_random_predictions_most_specific_rules)") # 4) Average scores #------------------- if algorithm in ["gula", "pride"]: score = sum(results_score) / run_tests #run_time = sum(results_time) / run_tests eprint(">>> AVG explanation score: " + str(round(score * 100, 2)) + "%")