def generate_model(self, structure_specification): # 1. create defined number of nodes per object v, node_cpds, temp_dict = self._create_nodes(structure_specification) # 2. create edges incl. time between vertices e, temp_gap_dict = self._create_edges(v, structure_specification, temp_dict) # 3. add temporal information inverted_temp_dict = self._invert_dict(temp_dict) self._temporal_information(v, temp_gap_dict, node_cpds, self._parents_dict_from_edges(e), temp_dict, inverted_temp_dict) # node_cpds passed by reference and contain temporal information # 4. Skeleton skel = GraphSkeleton() skel.V = v skel.E = e skel.toporder() # 5. Create Model tbn = TSCBN("", skel, node_cpds, unempty=True, forbid_never=True, discrete_only=True) # Discrete case - later continuous nodes # 6. Set cpds of value nodes self._set_cpds(tbn, structure_specification) return tbn
def create_tscbn(states_dict, nodes, edges, ran_gen=True): """ Creates a TSCBN given a set of nodes and edges :param states_dict: :param nodes: :param edges: :param ran_gen: :return: """ self = TSCBNStructureModel() # Define Vertices v, node_cpds = [], dict() # Initialize nodes print("Do") for node in nodes: tv = "_".join(node.split("_")[:-1]) if tv[0] == "_": tv = tv[1:] v += self._dynamic_node(node, "disc", states_dict[tv], node_cpds) # Define Temporal Information dL_mean = 0 dL_var = 0.1 # Add default entries for temporal nodes skel, node_cpds = self._add_temporal_basics_dump(dL_mean, dL_var, node_cpds, v, edges) # Create Network tscbn = TSCBN("", skel, node_cpds, unempty=True, forbid_never=False, discrete_only=True, default_is_distributed=True, random_gen=ran_gen) # Discrete case - later continuous nodes return tscbn
def run_structure_experiment(target_path, parameter_temp_nodes_experiment=False, parameter_signals_experiment=False, comparison_experiment_temp_nodes=False, comparison_experiment_signals=False, comparison_experiment_scp=False): # number of iterations per experiment iterations = 25 # number of sequences per experiment sample_size = 5000 # ---------------------------------------------------------------------------------------- # Structure Generator Setup # ---------------------------------------------------------------------------------------- sg = StructureGenerator(test_type=TestStructureEnum.SPECIFICATION) sg.add_base_structure_models([TSCBNStructureModel]) sg.reference_model = TSCBNStructureModel # TIME SETTINGS (fixed for all experiments) sg.set_temporal_range(min_per_object_gap=0.5, max_per_object_gap=1.0) sg.set_temporal_variance(0.001) sg.set_dbn_tolerance(0.1) # PROBABILITY SETTINGS (fixed for all experiments) sg.set_state_change_probability(min_probability=0.95, max_probability=0.95) # ---------------------------------------------------------------------------------------- # Experiment with different parameters of the SBTreeDiscoverer # ---------------------------------------------------------------------------------------- if parameter_temp_nodes_experiment or parameter_signals_experiment: sd = SBTreeDiscoverer(min_out_degree=0.1, k_infrequent=0.1, approach='parent_graph', parallel=False) # filtering parameters fixed at 0.1 # parent graph approach means exact score optimization (but not exhaustive) # structure optimization not iteration in parallel for edges_per_object in [1, 3]: print('edges_per_object: ' + str(edges_per_object) + '...') L().log.info('edges_per_object: ' + str(edges_per_object) + '...') # EDGE SETTINGS sg.set_connection_ranges(min_edges_per_object=edges_per_object, max_edges_per_object=edges_per_object, min_percent_inter=1.0, max_percent_inter=1.0) if parameter_temp_nodes_experiment: # 1st experiment: Increase number of temporal variables per signal # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] for metric in metrics:ev.add_metric(metric) eval_results = dict() discovery_algorithms = set() for number_of_signals in [2, 3, 4]: print('number_of_signals: ' + str(number_of_signals) + '...') L().log.info('number_of_signals: ' + str(number_of_signals) + '...') if edges_per_object >= number_of_signals: continue numbers_of_temp_nodes = [1, 2, 3, 4, 5, 6, 7] for number_of_temp_nodes in numbers_of_temp_nodes: print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') # NODE SETTINGS sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=3, max_states=3) eval_results.update({number_of_temp_nodes: dict()}) for iteration in range(0, iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {}) sequences = \ sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] # additional information for evaluation additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None} for score in ['BIC', 'AIC', 'Bdeu', 'K2']: print('score: ' + str(score) + '...') L().log.info('score: ' + str(score) + '...') for temporal_threshold in np.arange(0.0, 2.5, 0.5): print('temporal_threshold: ' + str(temporal_threshold) + '...') L().log.info('temporal_threshold: ' + str(temporal_threshold) + '...') # STRUCTURE DISCOVERER SETUP sd.score = score sd.max_time_difference = temporal_threshold sd_name = 'SBTreeDiscoverer_' + score + '_TH_' + str(temporal_threshold) if sd_name not in eval_results.get(number_of_temp_nodes): # initialise metrics_dict metrics_dict = dict((metric, []) for metric in metrics) eval_results.get(number_of_temp_nodes).update({sd_name: metrics_dict}) discovery_algorithms.add(sd_name) model_name = sd_name + ' (' + str(iteration) + ')' # RUN ALGORITHM L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data, 'psi_execution_time': sd.parent_set_identification_time, 'so_execution_time': sd.structure_optimization_time} L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # CREATE TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # EVALUATION eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[number_of_temp_nodes][sd_name][metric].append(value) pass pass pass pass experiment_name = 'ParameterTmpNodesExperiment_EPO_' + str(edges_per_object) + '_Sig_' + \ str(number_of_signals) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, numbers_of_temp_nodes, 'number_of_temp_nodes', target_path) pass pass if parameter_signals_experiment: # 2nd experiment: Increase number of signals if edges_per_object == 3: continue # TODO: remove this, when choosing a maximal number of signals larger than 5 # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] for metric in metrics: ev.add_metric(metric) eval_results = dict() discovery_algorithms = set() for number_of_temp_nodes in [3, 5]: print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') numbers_of_signals = [2, 3, 4, 5] evaluated_numbers_of_signals = copy.deepcopy(numbers_of_signals) for number_of_signals in numbers_of_signals: print('number_of_signals: ' + str(number_of_signals) + '...') L().log.info('number_of_signals: ' + str(number_of_signals) + '...') if edges_per_object >= number_of_signals: evaluated_numbers_of_signals.remove(number_of_signals) continue # NODE SETTINGS sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=3, max_states=3) eval_results.update({number_of_signals: dict()}) for iteration in range(iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(1000, {}) sequences = \ sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] # additional information for evaluation additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None} for score in ['BIC', 'AIC', 'Bdeu', 'K2']: print('score: ' + str(score) + '...') L().log.info('score: ' + str(score) + '...') for temporal_threshold in np.arange(0.0, 2.5, 0.5): print('temporal_threshold: ' + str(temporal_threshold) + '...') L().log.info('temporal_threshold: ' + str(temporal_threshold) + '...') # STRUCTURE DISCOVERER SETUP sd.score = score sd.max_time_difference = temporal_threshold sd_name = 'SBTreeDiscoverer_' + score + '_TH_' + str(temporal_threshold) if sd_name not in eval_results.get(number_of_signals): # initialise metrics_dict metrics_dict = dict((metric, []) for metric in metrics) eval_results.get(number_of_signals).update({sd_name: metrics_dict}) discovery_algorithms.add(sd_name) model_name = sd_name + ' (' + str(iteration) + ')' # RUN ALGORITHM L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data, 'psi_execution_time': sd.parent_set_identification_time, 'so_execution_time': sd.structure_optimization_time} L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # CREATE TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # EVALUATION eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[number_of_signals][sd_name][metric].append(value) pass pass pass pass experiment_name = 'ParameterSignalsExperiment_EPO_' + str(edges_per_object) + '_TmpNodes_' + \ str(number_of_temp_nodes) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, evaluated_numbers_of_signals, 'num_signals', target_path) pass pass pass pass # ---------------------------------------------------------------------------------------- # Experiments with all algorithms # ---------------------------------------------------------------------------------------- # 1st experiment: increase number of temporal nodes if comparison_experiment_temp_nodes: # EDGE SETTINGS sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2, min_percent_inter=1.0, max_percent_inter=1.0) # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"] for metric in metrics: ev.add_metric(metric) eval_results = dict() for number_of_signals in [3, 4]: print('number_of_signals: ' + str(number_of_signals) + '...') L().log.info('number_of_signals: ' + str(number_of_signals) + '...') discovery_algorithms = set() numbers_of_temp_nodes = [2, 3, 4, 5, 6, 7, 8] for number_of_temp_nodes in numbers_of_temp_nodes: print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') # NODE SETTINGS sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=3, max_states=3) eval_results.update({number_of_temp_nodes: dict()}) metrics_dict = dict((metric, []) for metric in metrics) # --------------------------------------------------- # RUN Structure Discovery several times # --------------------------------------------------- for iteration in range(iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {}) sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None} # --------------------------------------------------- # Discovery Algorithm # --------------------------------------------------- for sd_name, sd in get_structure_discovery_algorithms(): # LIMITATIONS DUE TO RUNTIME PROBLEMS # TODO: run all algorithms for all networks on a better hardware if str.startswith(sd_name, 'Astar') and number_of_signals * number_of_temp_nodes > 16: print('Network to large for A* algorithm.') continue if str.startswith(sd_name, 'PC') and number_of_signals * number_of_temp_nodes > 24: print('Network to large for PC algorithm.') continue discovery_algorithms.add(sd_name) if sd_name not in eval_results.get(number_of_temp_nodes): eval_results.get(number_of_temp_nodes).update({sd_name: copy.deepcopy(metrics_dict)}) model_name = sd_name + ' (' + str(iteration) + ')' L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data} L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # create TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # ---------------------------------------------------------------------------------------- # EVALUATION # ---------------------------------------------------------------------------------------- eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[number_of_temp_nodes][sd_name][metric].append(value) pass pass pass experiment_name = 'TempNodesExperiment_Sig_' + str(number_of_signals) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, numbers_of_temp_nodes, 'number_of_temp_nodes', target_path) # 2nd experiment: increase number of signals if comparison_experiment_signals: # EDGE SETTINGS sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2, min_percent_inter=1.0, max_percent_inter=1.0) # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] for metric in metrics: ev.add_metric(metric) eval_results = dict() for number_of_temp_nodes in [3]: # TODO: run with larger numbers on better hardware print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') discovery_algorithms = set() numbers_of_signals = [3, 4, 5, 6, 7, 8] for number_of_signals in numbers_of_signals: print('number_of_signals: ' + str(number_of_signals) + '...') L().log.info('number_of_signals: ' + str(number_of_signals) + '...') # NODE SETTINGS sg.set_node_range(min_objects=number_of_signals, max_objects=number_of_signals, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=3, max_states=3) eval_results.update({number_of_signals: dict()}) metrics_dict = dict((metric, []) for metric in metrics) # --------------------------------------------------- # RUN Structure Discovery several times # --------------------------------------------------- for iteration in range(iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {}) sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None, 'psi-execution-time': 0.0, 'so-execution-time': 0.0} # --------------------------------------------------- # Discovery Algorithm # --------------------------------------------------- for sd_name, sd in get_structure_discovery_algorithms(): # LIMITATIONS DUE TO RUNTIME PROBLEMS # TODO: run all algorithms for all networks on a better hardware if str.startswith(sd_name, 'Astar') and number_of_signals * number_of_temp_nodes > 16: print('Network to large for A* algorithm.') continue if str.startswith(sd_name, 'PC') and number_of_signals * number_of_temp_nodes > 24: print('Network to large for PC algorithm.') continue if str.startswith(sd_name, 'sbPTM') and number_of_signals * number_of_temp_nodes > 30: print('Network to large for PTM algorithm.') continue if str.startswith(sd_name, 'cbPTM') and number_of_signals * number_of_temp_nodes > 30: print('Network to large for PTM algorithm.') continue discovery_algorithms.add(sd_name) if sd_name not in eval_results.get(number_of_signals): eval_results.get(number_of_signals).update({sd_name: copy.deepcopy(metrics_dict)}) model_name = sd_name + ' (' + str(iteration) + ')' L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data, 'psi_execution_time': 0.0, 'so_execution_time': 0.0} if sd.parent_set_identification_time and sd.structure_optimization_time: additional_infos[model_name].update( {'psi_execution_time': sd.parent_set_identification_time, 'so_execution_time': sd.structure_optimization_time}) L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # create TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # ---------------------------------------------------------------------------------------- # EVALUATION # ---------------------------------------------------------------------------------------- eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[number_of_signals][sd_name][metric].append(value) pass pass pass experiment_name = 'SignalExperiment_TmpNodes_' + str(number_of_temp_nodes) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time", "psi-execution-time", "so-execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, numbers_of_signals, 'number_of_signals', target_path) # 3rd experiment: different values for the state change probability if comparison_experiment_scp: # EDGE SETTINGS sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=2, min_percent_inter=1.0, max_percent_inter=1.0) # EVALUATOR SETUP ev = StructureEvaluator(True) ev.set_output_path(os.path.join(target_path, r"structure_eval_%s.csv" % strftime("%Y_%m_%d-%H_%M_%S", localtime()))) metrics = ["add-edges", "del-edges", "num-add-edges", "num-del-edges", "shd", "add-edges-skel", "del-edges-skel", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"] for metric in metrics: ev.add_metric(metric) eval_results = dict() for number_of_temp_nodes in [3, 4]: print('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') L().log.info('number_of_temp_nodes: ' + str(number_of_temp_nodes) + '...') # NODE SETTINGS sg.set_node_range(min_objects=3, max_objects=3, min_temp_nodes=number_of_temp_nodes, max_temp_nodes=number_of_temp_nodes, min_states=2, max_states=4) sg.set_connection_ranges(min_edges_per_object=2, max_edges_per_object=3, min_percent_inter=0.5, max_percent_inter=1.0) discovery_algorithms = set() state_change_probabilities = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0] for state_change_probability in state_change_probabilities: print('state_change_probability: ' + str(state_change_probability) + '...') L().log.info('state_change_probability: ' + str(state_change_probability) + '...') sg.set_state_change_probability(min_probability=state_change_probability, max_probability=state_change_probability) eval_results.update({state_change_probability: dict()}) metrics_dict = dict((metric, []) for metric in metrics) # --------------------------------------------------- # RUN Structure Discovery several times # --------------------------------------------------- for iteration in range(iterations): print('iteration: ' + str(iteration) + '...') L().log.info('iteration: ' + str(iteration) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() in_seq = models[sg.reference_model.__name__].randomsample(sample_size, {}) sequences = sequences_to_intervals(in_seq, models[sg.reference_model.__name__].Vdata, False)[0] additional_infos = dict() additional_infos[sg.reference_model.__name__] = {'execution_time': 0.0, 'data': None} # --------------------------------------------------- # Discovery Algorithm # --------------------------------------------------- for sd_name, sd in get_structure_discovery_algorithms(): # LIMITATIONS DUE TO RUNTIME PROBLEMS # TODO: run all algorithms for all networks on a better hardware if str.startswith(sd_name, 'Astar') and 3 * number_of_temp_nodes > 16: print('Network to large for A* algorithm.') continue discovery_algorithms.add(sd_name) if sd_name not in eval_results.get(state_change_probability): eval_results.get(state_change_probability).update({sd_name: copy.deepcopy(metrics_dict)}) model_name = sd_name + ' (' + str(iteration) + ')' L().log.info('----------------------------------------------------------') print('Run approach ' + model_name + '.') L().log.info('Run approach ' + model_name + '.') ping = clock() nodes, edges = sd.discover_structure(sequences) L().log.info('Nodes: ' + str(nodes)) L().log.info('Edges: ' + str(edges)) execution_time = clock() - ping additional_infos[model_name] = {'execution_time': execution_time, 'data': sd.data} L().log.info('Execution time: ' + str(execution_time)) L().log.info('----------------------------------------------------------') # create TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # ---------------------------------------------------------------------------------------- # EVALUATION # ---------------------------------------------------------------------------------------- eval_result = ev.evaluate(model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): eval_results[state_change_probability][sd_name][metric].append(value) pass pass pass experiment_name = 'SCP_Experiment_Sig_3_TmpNodes_' + str(number_of_temp_nodes) relevant_metrics = ["num-add-edges", "num-del-edges", "shd", "num-add-edges-skel", "num-del-edges-skel", "shd-skel", "kld", "execution-time"] write_pgfplots_data(experiment_name, eval_results, relevant_metrics, discovery_algorithms, state_change_probabilities, 'state_change_probability', target_path)
def generate_model(self, structure_specification={}): ''' Using the structure extracted with the naive SPM approach and expert knowledge a TSCBN is extracted The data to learn this TSCBN is also preprocessed manually to fit the definition :param structure_specification: :return: ''' # Define Vertices v, node_cpds = [], dict() defaults = { "S-E_0": "s", "S-D_0": "p", "S-C_0": "m", "S-B_0": "j", "S-A_0": "a" } # Initial nodes v += self._dynamic_node("S-A_0", "disc", ["e", "c", "f", "d", "a"], node_cpds) # NEVER STATE IST SINNLOS v += self._dynamic_node( "S-E_0", "disc", ["s", "t", "r"], node_cpds ) # v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An", "Never"], node_cpds) v += self._dynamic_node("S-D_0", "disc", ["p", "q"], node_cpds) v += self._dynamic_node("S-C_0", "disc", ["m", "n", "o"], node_cpds) v += self._dynamic_node("S-B_0", "disc", ["j", "i", "k", "u", "l", "v"], node_cpds) # More nodes v += self._dynamic_node("S-A_1", "disc", ["e", "c", "f", "d", "a"], node_cpds) # NEVER STATE IST SINNLOS v += self._dynamic_node( "S-E_1", "disc", ["s", "t", "r"], node_cpds ) # v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An", "Never"], node_cpds) v += self._dynamic_node("S-D_1", "disc", ["p", "q"], node_cpds) v += self._dynamic_node("S-C_1", "disc", ["m", "n", "o"], node_cpds) v += self._dynamic_node("S-B_1", "disc", ["j", "i", "k", "u", "l", "v"], node_cpds) v += self._dynamic_node("S-A_2", "disc", ["e", "c", "f", "d", "a"], node_cpds) # NEVER STATE IST SINNLOS v += self._dynamic_node( "S-E_2", "disc", ["s", "t", "r"], node_cpds ) # v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An", "Never"], node_cpds) v += self._dynamic_node("S-C_2", "disc", ["m", "n", "o"], node_cpds) v += self._dynamic_node("S-B_2", "disc", ["j", "i", "k", "u", "l", "v"], node_cpds) v += self._dynamic_node("S-C_3", "disc", ["m", "n", "o"], node_cpds) # Define Edges e = [] e += self._self_dependencies(v, ["S-A", "S-E", "S-D", "S-C", "S-B"]) e += [["S-A_0", "S-D_1"]] # FL Assistant muss an sein e += [["S-A_0", "S-B_1"]] # If reason e += [["S-A_1", "S-D_1"]] # If reason e += [["S-A_2", "S-B_2"]] # If reason e += [["S-A_2", "S-E_2"]] # If reason e += [["S-D_1", "S-C_2"]] # If reason e += [["S-B_1", "S-C_1"]] # If reason e += [["S-C_1", "S-E_1"]] # If reason # ------------------------------------------------------------------ # Define Temporal Information - Manually # ------------------------------------------------------------------ dL_mean = 3 dL_var = 1 skel, node_cpds = self._add_temporal_basics(dL_mean, dL_var, node_cpds, v, e, defaults) # ------------------------------------------------------------------ # Create Network # ------------------------------------------------------------------ tscbn = TSCBN("", skel, node_cpds, unempty=True, forbid_never=False, discrete_only=True, default_states=defaults, default_is_distributed=True ) # Discrete case - later continuous nodes return tscbn
def _model_4(self): ''' Größeres Modell - ''' # ------------------------------------------------------------------ # Define Basic Structure - Manually # ------------------------------------------------------------------ # Define Vertices v, node_cpds = [], dict() defaults = {"BedLicht_0" : "Aus_bed", "StgLicht_0" : "Aus", "StaLicht_0" : "Aus"} # Initial nodes v += self._dynamic_node("BedLicht_0", "disc", ["Aus_bed", "An_bed"], node_cpds) # NEVER STATE IST SINNLOS v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An"], node_cpds) # v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An", "Never"], node_cpds) v += self._dynamic_node("StgLicht_0", "disc", ["Aus", "An"], node_cpds) # More nodes v += self._dynamic_node("BedLicht_1", "disc", ["Aus_bed", "An_bed"], node_cpds) v += self._dynamic_node("StaLicht_1", "disc", ["Aus", "An"], node_cpds) v += self._dynamic_node("StgLicht_1", "disc", ["Aus", "An"], node_cpds) # Succeeding v += self._dynamic_node("BedLicht_2", "disc", ["Aus_bed", "An_bed"], node_cpds) v += self._dynamic_node("StaLicht_2", "disc", ["Aus", "An"], node_cpds) v += self._dynamic_node("StgLicht_2", "disc", ["Aus", "An"], node_cpds) # Define Edges e = [] e += self._self_dependencies(v, ["BedLicht", "StaLicht", "StgLicht"]) e += [["BedLicht_1", "StgLicht_1"]] # Bedienung passiert immer vor Steuerung e += [["StgLicht_1", "StaLicht_1"]] # Steuerung passiert immer vor Status # ------------------------------------------------------------------ # Define Temporal Information - Manually # ------------------------------------------------------------------ dL_mean = 3 dL_var = 1 skel, node_cpds = self._add_temporal_basics(dL_mean, dL_var, node_cpds, v, e, defaults) # ------------------------------------------------------------------ # Create Network # ------------------------------------------------------------------ tscbn = TSCBN("", skel, node_cpds, unempty=True, forbid_never=False, discrete_only=True, default_states=defaults, default_is_distributed=True) # Discrete case - later continuous nodes tscbn.draw("ext") # set some probabilities that I am sure about tscbn.Vdata["StgLicht_0"]["cprob"] = np.array([0.5, 0.5]) tscbn.Vdata["BedLicht_0"]["cprob"] = np.array([0.5, 0.5]) tscbn.Vdata["StaLicht_0"]["cprob"] = np.array([0.5, 0.5]) # wenn StgLicht1 auf an geht geht auch StaLicht1 auf ein tscbn.Vdata["StgLicht_1"]["cprob"]["['Aus', 'Aus_bed']"] = np.array([0.8, 0.2]) tscbn.Vdata["StgLicht_1"]["cprob"]["['Aus', 'An_bed']"] = np.array([0.2, 0.8]) # Halbfalsch tscbn.Vdata["StgLicht_1"]["cprob"]["['An', 'Aus_bed']"] = np.array([0.8, 0.2]) # Halbfalsch tscbn.Vdata["StgLicht_1"]["cprob"]["['An', 'An_bed']"] = np.array( [0.2, 0.8]) # Muss an bleiben - weil ich würde hier das event An sehen - es würde auftreten - # das würde man in dem Intervall nicht sehen!!!!! # ABER: es würde ja StaLicht beeinflussen - weil es ja tatsächlich passiert ist # d.h. zum Samplen muss ich das so machen # Never würde hier Fehler bedeuten - also BedLicht ging an aber StgLicht hat Nie reagiert somit hat StaLicht nie reagiert tscbn.Vdata["StgLicht_2"]["cprob"]["['Aus']"] = np.array([0.8, 0.2]) # FALSCH NOCH tscbn.Vdata["StgLicht_2"]["cprob"]["['An']"] = np.array([0.2, 0.8]) # FALSCH NOCH tscbn.Vdata["BedLicht_1"]["cprob"]["['Aus_bed']"] = np.array([0.1, 0.9]) # tscbn.Vdata["BedLicht_1"]["cprob"]["['An_bed']"] = np.array([0.9, 0.1]) tscbn.Vdata["BedLicht_2"]["cprob"]["['Aus_bed']"] = np.array([0.9, 0.1]) tscbn.Vdata["BedLicht_2"]["cprob"]["['An_bed']"] = np.array([0.1, 0.9]) tscbn.Vdata["StaLicht_1"]["cprob"]["['Aus', 'Aus']"] = np.array([0.8, 0.2]) tscbn.Vdata["StaLicht_1"]["cprob"]["['Aus', 'An']"] = np.array([0.2, 0.8]) # FALSCH NOCH tscbn.Vdata["StaLicht_1"]["cprob"]["['An', 'Aus']"] = np.array([0.8, 0.2]) # FALSCH NOCH tscbn.Vdata["StaLicht_1"]["cprob"]["['An', 'An']"] = np.array([0.2, 0.8]) tscbn.Vdata["StaLicht_2"]["cprob"]["['Aus']"] = np.array([0.8, 0.2]) # hier macht never sinn - weil ich habe keinen kausalen Einfluss der meinen Wert auf ein Signal setzen würde # stattdessen bleibe ich Aus einfach weil nix passiert ist und ich passiere somit auch nicht # Frage? Warum bin ich dann überhaupt da - weil kann sein das ich mit 0.9 Never bin aber mit 0.1 Aus weil ich # mich spontan gewechselt habe tscbn.Vdata["StaLicht_2"]["cprob"]["['An']"] = np.array([0.2, 0.8]) return tscbn
def _easy_model_1(self): # Reference model # Define Vertices v, node_cpds = [], dict() defaults = {"V0_0": "o0_0", "V1_0": "o1_0", "V2_0": "o2_1"} # Initial nodes v += self._dynamic_node("V0_0", "disc", ["o0_0", "o0_1", "o0_2"], node_cpds) # NEVER STATE IST SINNLOS v += self._dynamic_node("V1_0", "disc", ["o1_0", "o1_1", "o1_2"], node_cpds) # v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An", "Never"], node_cpds) v += self._dynamic_node("V2_0", "disc", ["o2_0", "o2_1", "o2_2"], node_cpds) # More nodes v += self._dynamic_node("V0_1", "disc", ["o0_0", "o0_1", "o0_2"], node_cpds) # NEVER STATE IST SINNLOS v += self._dynamic_node("V1_1", "disc", ["o1_0", "o1_1", "o1_2"], node_cpds) # v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An", "Never"], node_cpds) v += self._dynamic_node("V2_1", "disc", ["o2_0", "o2_1", "o2_2"], node_cpds) # Succeeding v += self._dynamic_node("V0_2", "disc", ["o0_0", "o0_1", "o0_2"], node_cpds) # NEVER STATE IST SINNLOS v += self._dynamic_node("V1_2", "disc", ["o1_0", "o1_1", "o1_2"], node_cpds) # v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An", "Never"], node_cpds) v += self._dynamic_node("V2_2", "disc", ["o2_0", "o2_1", "o2_2"], node_cpds) v += self._dynamic_node("V0_3", "disc", ["o0_0", "o0_1", "o0_2"], node_cpds) # NEVER STATE IST SINNLOS v += self._dynamic_node("V1_3", "disc", ["o1_0", "o1_1", "o1_2"], node_cpds) # v += self._dynamic_node("StaLicht_0", "disc", ["Aus", "An", "Never"], node_cpds) v += self._dynamic_node("V2_3", "disc", ["o2_0", "o2_1", "o2_2"], node_cpds) # Define Edges e = [] e += self._self_dependencies(v, ["V0", "V1", "V2"]) e += [["V0_1", "V1_2"]] # Bedienung passiert immer vor Steuerung e += [["V2_1", "V0_2"]] # Steuerung passiert immer vor Status # ------------------------------------------------------------------ # Define Temporal Information - Manually # ------------------------------------------------------------------ dL_mean = 3 dL_var = 1 skel, node_cpds = self._add_temporal_basics(dL_mean, dL_var, node_cpds, v, e, defaults) # ------------------------------------------------------------------ # Create Network # ------------------------------------------------------------------ tscbn = TSCBN("", skel, node_cpds, unempty=True, forbid_never=False, discrete_only=True, default_states=defaults, default_is_distributed=True) # Discrete case - later continuous nodes #tscbn.draw("ext") # set some probabilities that I am sure about tscbn.Vdata["V0_0"]["cprob"] = np.array([0.2, 0.5, 0.3]) tscbn.Vdata["V1_0"]["cprob"] = np.array([0.5, 0.2, 0.3]) tscbn.Vdata["V2_0"]["cprob"] = np.array([0.3, 0.2, 0.5]) # wenn V11 auf an geht geht auch StaLicht1 auf ein tscbn.Vdata["V0_1"]["cprob"]["['o0_0']"] = np.array([0.2, 0.3, 0.5]) tscbn.Vdata["V0_1"]["cprob"]["['o0_1']"] = np.array([0.6, 0.2, 0.2]) tscbn.Vdata["V0_1"]["cprob"]["['o0_2']"] = np.array([0.2, 0.6, 0.2]) tscbn.Vdata["V0_2"]["cprob"]["['o0_0', 'o2_0']"] = np.array([0.2, 0.3, 0.5]) tscbn.Vdata["V0_2"]["cprob"]["['o0_0', 'o2_1']"] = np.array([0.2, 0.6, 0.2]) tscbn.Vdata["V0_2"]["cprob"]["['o0_0', 'o2_2']"] = np.array([0.2, 0.6, 0.2]) tscbn.Vdata["V0_2"]["cprob"]["['o0_1', 'o2_0']"] = np.array([0.1, 0.2, 0.7]) tscbn.Vdata["V0_2"]["cprob"]["['o0_1', 'o2_1']"] = np.array([0.7, 0.2, 0.1]) tscbn.Vdata["V0_2"]["cprob"]["['o0_1', 'o2_2']"] = np.array([0.2, 0.2, 0.6]) tscbn.Vdata["V0_2"]["cprob"]["['o0_2', 'o2_0']"] = np.array([0.5, 0.3, 0.2]) tscbn.Vdata["V0_2"]["cprob"]["['o0_2', 'o2_1']"] = np.array([0.6, 0.2, 0.2]) tscbn.Vdata["V0_2"]["cprob"]["['o0_2', 'o2_2']"] = np.array([0.2, 0.6, 0.2]) tscbn.Vdata["V1_1"]["cprob"]["['o1_0']"] = np.array([0.2, 0.2, 0.6]) tscbn.Vdata["V1_1"]["cprob"]["['o1_1']"] = np.array([0.6, 0.2, 0.2]) tscbn.Vdata["V1_1"]["cprob"]["['o1_2']"] = np.array([0.2, 0.6, 0.2]) tscbn.Vdata["V1_2"]["cprob"]["['o1_0', 'o0_0']"] = np.array([0.2, 0.3, 0.5]) tscbn.Vdata["V1_2"]["cprob"]["['o1_0', 'o0_1']"] = np.array([0.2, 0.6, 0.2]) tscbn.Vdata["V1_2"]["cprob"]["['o1_0', 'o0_2']"] = np.array([0.2, 0.6, 0.2]) tscbn.Vdata["V1_2"]["cprob"]["['o1_1', 'o0_0']"] = np.array([0.1, 0.2, 0.7]) tscbn.Vdata["V1_2"]["cprob"]["['o1_1', 'o0_1']"] = np.array([0.6, 0.2, 0.2]) tscbn.Vdata["V1_2"]["cprob"]["['o1_1', 'o0_2']"] = np.array([0.2, 0.2, 0.6]) tscbn.Vdata["V1_2"]["cprob"]["['o1_2', 'o0_0']"] = np.array([0.5, 0.3, 0.2]) tscbn.Vdata["V1_2"]["cprob"]["['o1_2', 'o0_1']"] = np.array([0.6, 0.2, 0.2]) tscbn.Vdata["V1_2"]["cprob"]["['o1_2', 'o0_2']"] = np.array([0.2, 0.6, 0.2]) tscbn.Vdata["V2_1"]["cprob"]["['o2_0']"] = np.array([0.1, 0.3, 0.6]) tscbn.Vdata["V2_1"]["cprob"]["['o2_1']"] = np.array([0.7, 0.1, 0.2]) tscbn.Vdata["V2_1"]["cprob"]["['o2_2']"] = np.array([0.2, 0.7, 0.1]) tscbn.Vdata["V2_2"]["cprob"]["['o2_0']"] = np.array([0.1, 0.3, 0.6]) tscbn.Vdata["V2_2"]["cprob"]["['o2_1']"] = np.array([0.7, 0.1, 0.2]) tscbn.Vdata["V2_2"]["cprob"]["['o2_2']"] = np.array([0.2, 0.7, 0.1]) tscbn.Vdata["V2_3"]["cprob"]["['o2_0']"] = np.array([0.2, 0.2, 0.6]) tscbn.Vdata["V2_3"]["cprob"]["['o2_1']"] = np.array([0.6, 0.2, 0.2]) tscbn.Vdata["V2_3"]["cprob"]["['o2_2']"] = np.array([0.2, 0.6, 0.2]) return tscbn
def experiment_discovery( _run, approach, sample_size, iterations, min_per_object_gap, max_per_object_gap, temporal_variance, dbn_tolerance, sc_probability, edges_per_object, inter_edge_percent, number_of_signals, number_of_temp_nodes, sb_min_out_degree, sb_k_infrequent, sb_score, sb_max_time_difference, pc_min_out_degree, pc_k_infrequent, pc_alpha, pc_max_time_difference, pcd_alpha, pcd_max_reach, astar_score, ghc_score, ghc_tabu_length, novel_filtering, novel_k_infrequent, novel_alpha, novel_draw_it, novel_max_reach, novel_min_out_degree, pc_chi): # ---------------------------------------------------------------------------------------- # Setup # ---------------------------------------------------------------------------------------- if edges_per_object >= number_of_signals: return # Generator Setup sg = initialize_generator(min_per_object_gap, max_per_object_gap, temporal_variance, dbn_tolerance, sc_probability, edges_per_object, inter_edge_percent, number_of_signals, number_of_temp_nodes) # SD Approach sd = get_sd_approach(approach, sb_min_out_degree, sb_k_infrequent, sb_score, sb_max_time_difference, pc_min_out_degree, pc_k_infrequent, pc_alpha, pc_max_time_difference, pcd_alpha, pcd_max_reach, astar_score, ghc_score, ghc_tabu_length, novel_filtering, novel_k_infrequent, novel_alpha, novel_draw_it, novel_min_out_degree, novel_max_reach, pc_chi) # Evaluation Metrics ev = initialize_evaluator() # ---------------------------------------------------------------------------------------- # Run Experiment # ---------------------------------------------------------------------------------------- eval_results = dict() for iteration in range(iterations): print('iteration: ' + str(iteration + 1) + '...') # SAMPLE DATA models, specifications = sg.run_next_testcase() print("NUMBER INTER EDGES: %s" % str( len([ e for e in models["TSCBNStructureModel"].E if e[0].split("_")[0] != e[1].split("_")[0] and not str.startswith(e[1], "dL_") ]))) in_seq = models[sg.reference_model.__name__].randomsample( sample_size, {}) sequences = sequences_to_intervals( in_seq, models[sg.reference_model.__name__].Vdata, False)[0] additional_infos = dict() additional_infos[sg.reference_model.__name__] = { 'execution_time': 0.0, 'data': None } # LIMITATIONS DUE TO RUNTIME PROBLEMS if hw_limitation_reached(approach, number_of_signals, number_of_temp_nodes): continue # RUN DISCOVERY ping = clock() nodes, edges = sd.discover_structure(sequences) execution_time = clock() - ping # CREATE GROUND TRUTH TSCBN skel = GraphSkeleton() skel.V = nodes skel.E = edges skel.toporder() model = TSCBN("", skel, models[sg.reference_model.__name__].Vdata, unempty=True, forbid_never=True, discrete_only=True) # ---------------------------------------------------------------------------------------- # Run Evaluation current Iteration # ---------------------------------------------------------------------------------------- model_name = str(approach) + ' (' + str(iteration) + ')' additional_infos[model_name] = { 'execution_time': execution_time, 'data': sd.data } eval_result = ev.evaluate( model_dict={model_name: model}, reference=models[sg.reference_model.__name__], additional_infos=additional_infos) #ev.print_eval_results(eval_results=eval_result, specs=specifications, to_csv=True) for metric, value in eval_result[model_name].items(): if not metric in eval_results: eval_results[metric] = [] eval_results[metric].append(value) try: float(value) _run.log_scalar(metric, value) except: pass # ---------------------------------------------------------------------------------------- # Run Evaluation average over all Iteration # ---------------------------------------------------------------------------------------- for metric in eval_results: try: float(eval_results[metric]) _run.log_scalar("avg_%s" % metric, np.mean(eval_results[metric])) except: pass