def run_continuous(df, pc=None): """ Run the algorithm against a continuous dataframe to return a dot format causal graph. """ single_run = False dot_str = None try: # start java vm and get algo runner if pc is None: pc = pycausal() pc.start_vm() single_run = True tetrad = s.tetradrunner() tetrad.run(algoId='fges', dfs=df, maxDegree=-1, faithfulnessAssumed=True, verbose=AlgorithmConstants.VERBOSE) graph = tetrad.getTetradGraph() dot_str = pc.tetradGraphToDot(graph) # shutdown java vm if single_run: pc.stop_vm() except Exception as e: _logger.error(str(e)) print(str(e)) return dot_str
def run_discrete(df, pc=None): """ Run the algorithm against a discrete dataframe to return a dot format causal graph. :param df: dataframe :return: dot graph string """ single_run = False dot_str = None try: # start java vm and get algo runner if pc is None: pc = pycausal() pc.start_vm() single_run = True tetrad = s.tetradrunner() tetrad.run(algoId='fges', dfs=df, scoreId='bdeu-score', dataType=AlgorithmConstants.DISCRETE, maxDegree=3, faithfulnessAssumed=True, symmetricFirstStep=True, verbose=AlgorithmConstants.VERBOSE) graph = tetrad.getTetradGraph() dot_str = pc.tetradGraphToDot(graph) # shutdown java vm if single_run: pc.stop_vm() except Exception as e: _logger.error(str(e)) print(str(e)) return dot_str
def test_multiple_algo_run(self): dot_str_list = [] pc = pycausal() pc.start_vm() tetrad = s.tetradrunner() data_dir = os.path.join(self.data_dir, "sim_discrete_data_20vars_100cases.txt") df = pd.read_table(data_dir, sep="\t") dot_str_list.append(self.pc_util.algo_bayes_est(df, pc)) data_dir = os.path.join(self.data_dir, "charity.txt") df = pd.read_table(data_dir, sep="\t") dot_str_list.append(self.pc_util.algo_fci(df, pc)) data_dir = os.path.join(self.data_dir, "charity.txt") df = pd.read_table(data_dir, sep="\t") dot_str_list.append(self.pc_util.algo_pc(df, pc)) data_dir = os.path.join(self.data_dir, "charity.txt") df = pd.read_table(data_dir, sep="\t") dot_str_list.append(self.pc_util.algo_fges_continuous(df, pc)) data_dir = os.path.join(self.data_dir, "audiology.txt") df = pd.read_table(data_dir, sep="\t") dot_str_list.append(self.pc_util.algo_fges_discrete(df, pc)) pc.stop_vm() self.assertTrue(len(dot_str_list) == 5)
def run_continuous(df, pc=None): """ Run the algorithm against a continuous dataframe to return a dot format causal graph. """ single_run = False dot_str = None try: # start java vm and get algo runner if pc is None: pc = pycausal() pc.start_vm() single_run = True tetrad = s.tetradrunner() tetrad.run(algoId='rfci', dfs=df, testId='fisher-z-test', depth=-1, maxPathLength=-1, completeRuleSetUsed=False, verbose=AlgorithmConstants.VERBOSE) graph = tetrad.getTetradGraph() dot_str = pc.tetradGraphToDot(graph) # shutdown java vm if single_run: pc.stop_vm() except Exception as e: _logger.error(str(e)) print(str(e)) return dot_str
def run_discrete(df, pc=None): """ Run the algorithm against a discrete dataframe to return a dot format causal graph. :param df: dataframe :return: dot graph string """ single_run = False dot_str = None try: # start java vm and get algo runner if pc is None: pc = pycausal() pc.start_vm() single_run = True tetrad = s.tetradrunner() tetrad.run(algoId='rfci', dfs=df, testId='chi-square-test', dataType=AlgorithmConstants.DISCRETE, depth=3, maxPathLength=-1, completeRuleSetUsed=True, verbose=AlgorithmConstants.VERBOSE) graph = tetrad.getTetradGraph() dot_str = pc.tetradGraphToDot(graph) # shutdown java vm if single_run: pc.stop_vm() except Exception as e: _logger.error(str(e)) print(str(e)) return dot_str
def run(df, pc=None): """ Run the algorithm against the dataframe to return a dot format causal graph. """ single_run = False dot_str = None try: # start java vm and get algo runner if pc is None: pc = pycausal() pc.start_vm() single_run = True tetrad = s.tetradrunner() tetrad.run(algoId='pc-all', dfs=df, testId='fisher-z-test', fasRule=2, depth=2, conflictRule=1, concurrentFAS=True, useMaxPOrientationHeuristic=True, verbose=AlgorithmConstants.VERBOSE) graph = tetrad.getTetradGraph() dot_str = pc.tetradGraphToDot(graph) # shutdown java vm if single_run: pc.stop_vm() except Exception as e: _logger.error(str(e)) print(str(e)) return dot_str
def test_run_full_analysis(self): # setup pc = pycausal() pc.start_vm() aitia = App() data_dir = os.path.join(self.data_dir, "charity.txt") df = pd.read_table(data_dir, sep="\t") # just need a test graph dot_str = aitia.algo_runner.algo_pc(df, pc) # feature selection algos feature_selection_list = [] feature_selection_list.append(aitia.feature_selection.LINEAR_REGRESSION) feature_selection_list.append(aitia.feature_selection.PRINCIPAL_FEATURE_ANALYSIS) # causal algo list algorithm_list = [] algorithm_list.append(aitia.algo_runner.PC) algorithm_list.append(aitia.algo_runner.FCI) analysis_results, summary_df, _ = aitia.run_analysis(df, target_graph_str=dot_str, n_features=4, feature_selection_list=feature_selection_list, algorithm_list=algorithm_list, pc=pc) self.assertTrue(analysis_results is not None) self.assertTrue(summary_df is not None)
def test_run_all_algorithms(self): pc = pycausal() pc.start_vm() dot_str_list = [] data_dir = os.path.join(self.data_dir, "charity.txt") df = pd.read_table(data_dir, sep="\t") for algo in self.pc_util.get_all_causal_algorithms(): algo = algo[1] # just need the func dot_str_list.append(algo(df, pc)) pc.stop_vm() self.assertTrue(len(dot_str_list) == 12)
def test_run_causal_analysis(self): pc = pycausal() pc.start_vm() aitia = App() data_dir = os.path.join(self.data_dir, "charity.txt") df = pd.read_table(data_dir, sep="\t") # just need a test graph dot_str = aitia.algo_runner.algo_pc(df, pc) # algo list algorithm_list = [] algorithm_list.append(aitia.algo_runner.PC) algorithm_list.append(aitia.algo_runner.FCI) analysis_results = aitia._run_causal_algorithms(df, algorithm_list=algorithm_list, target_graph_str=dot_str, pc=pc) self.assertTrue(analysis_results is not None)
def run(df, pc=None): """ Run the algorithm against the dataframe to return a dot format causal graph. """ dot_str = None try: # start java vm and get algo runner if pc is None: pc = pycausal() pc.start_vm() bayes_est = s.bayesEst(df, depth=-1, alpha=0.05, verbose=AlgorithmConstants.VERBOSE) graph = bayes_est.getTetradGraph() dot_str = pc.tetradGraphToDot(graph) # stop java vm if pc is None: pc.stop_vm() except Exception as e: _logger.error(str(e)) print(str(e)) return dot_str
def run_mixed(df, pc=None): """ Run the algorithm against a mixed dataframe to return a dot format causal graph. :param df: dataframe :return: dot graph string """ single_run = False dot_str = None try: # start java vm and get algo runner if pc is None: pc = pycausal() pc.start_vm() single_run = True tetrad = s.tetradrunner() tetrad.run(algoId='gfci', dfs=df, testId='cg-lr-test', scoreId='cg-bic-score', dataType=AlgorithmConstants.MIXED, numCategoriesToDiscretize=4, maxDegree=3, maxPathLength=-1, completeRuleSetUsed=False, faithfulnessAssumed=True, verbose=AlgorithmConstants.VERBOSE) graph = tetrad.getTetradGraph() dot_str = pc.tetradGraphToDot(graph) # shutdown java vm if single_run: pc.stop_vm() except Exception as e: _logger.error(str(e)) print(str(e)) return dot_str
def _run_causal_algorithms(self, incoming_df, requested_features=None, feature_selection_method=None, n_features=None, algorithm_list=None, target_graph_str=None, latent_edges=[], pc=None, verbose=True): """ Internal. Runs an analysis on the supplied dataframe. This can take a PyCausalWrapper if multiple runs are being done. """ analysis_results = AnalysisResults() pc_supplied = True # get py-causal if needed if pc is None: pc_supplied = False pc = pycausal() pc.start_vm() algo_list = self._get_causal_algorithms(algorithm_list) for algo in algo_list: # dict to store run result analysis_result = SingleAnalysisResult() analysis_result.feature_selection_method = feature_selection_method analysis_result.feature_list = requested_features analysis_result.num_features_requested = n_features analysis_result.causal_algorithm = algo[0] analysis_result.latent_edges = latent_edges if verbose: print("Running causal discovery using {0}".format(algo[0])) # get the graph from the algo algo_fn = algo[1] dot_str = self._discover_graph(algo_fn, incoming_df, pc) # store the dot graph analysis_result.dot_format_string = dot_str # convert the causal graph if dot_str is not None: causal_graph = self.graph_util.get_causal_graph_from_dot(dot_str) analysis_result.causal_graph = causal_graph nx_graph = self.graph_util.get_digraph_from_dot(dot_str) analysis_result.causal_graph_with_latent_edges = \ self.graph_util.get_causal_graph_with_latent_edges(nx_graph, latent_edges) analysis_results.results.append(analysis_result) # shutdown the java vm if needed if not pc_supplied: pc.stop_vm() # filter the results analysis_results_filtered = self._filter_empty_results(analysis_results) # add the causal metrics updated_analysis_results = self._add_causal_metrics(analysis_results_filtered, target_graph_str) return updated_analysis_results