Пример #1
0
    def run_continuous(df, pc=None):
        """
        Run the algorithm against a continuous dataframe to return a dot format causal graph.
        """
        single_run = False
        dot_str = None
        try:
            # start java vm and get algo runner
            if pc is None:
                pc = pycausal()
                pc.start_vm()
                single_run = True

            tetrad = s.tetradrunner()
            tetrad.run(algoId='fges',
                       dfs=df,
                       maxDegree=-1,
                       faithfulnessAssumed=True,
                       verbose=AlgorithmConstants.VERBOSE)
            graph = tetrad.getTetradGraph()
            dot_str = pc.tetradGraphToDot(graph)

            # shutdown java vm
            if single_run:
                pc.stop_vm()
        except Exception as e:
            _logger.error(str(e))
            print(str(e))
        return dot_str
Пример #2
0
    def run_discrete(df, pc=None):
        """
        Run the algorithm against a discrete dataframe to return a dot format causal graph.
        :param df: dataframe
        :return: dot graph string
        """
        single_run = False
        dot_str = None
        try:
            # start java vm and get algo runner
            if pc is None:
                pc = pycausal()
                pc.start_vm()
                single_run = True

            tetrad = s.tetradrunner()
            tetrad.run(algoId='fges',
                       dfs=df,
                       scoreId='bdeu-score',
                       dataType=AlgorithmConstants.DISCRETE,
                       maxDegree=3,
                       faithfulnessAssumed=True,
                       symmetricFirstStep=True,
                       verbose=AlgorithmConstants.VERBOSE)
            graph = tetrad.getTetradGraph()
            dot_str = pc.tetradGraphToDot(graph)

            # shutdown java vm
            if single_run:
                pc.stop_vm()
        except Exception as e:
            _logger.error(str(e))
            print(str(e))
        return dot_str
Пример #3
0
    def test_multiple_algo_run(self):
        dot_str_list = []

        pc = pycausal()
        pc.start_vm()
        tetrad = s.tetradrunner()

        data_dir = os.path.join(self.data_dir,
                                "sim_discrete_data_20vars_100cases.txt")
        df = pd.read_table(data_dir, sep="\t")
        dot_str_list.append(self.pc_util.algo_bayes_est(df, pc))

        data_dir = os.path.join(self.data_dir, "charity.txt")
        df = pd.read_table(data_dir, sep="\t")
        dot_str_list.append(self.pc_util.algo_fci(df, pc))

        data_dir = os.path.join(self.data_dir, "charity.txt")
        df = pd.read_table(data_dir, sep="\t")
        dot_str_list.append(self.pc_util.algo_pc(df, pc))

        data_dir = os.path.join(self.data_dir, "charity.txt")
        df = pd.read_table(data_dir, sep="\t")
        dot_str_list.append(self.pc_util.algo_fges_continuous(df, pc))

        data_dir = os.path.join(self.data_dir, "audiology.txt")
        df = pd.read_table(data_dir, sep="\t")
        dot_str_list.append(self.pc_util.algo_fges_discrete(df, pc))

        pc.stop_vm()

        self.assertTrue(len(dot_str_list) == 5)
Пример #4
0
    def run_continuous(df, pc=None):
        """
        Run the algorithm against a continuous dataframe to return a dot format causal graph.
        """
        single_run = False
        dot_str = None
        try:
            # start java vm and get algo runner
            if pc is None:
                pc = pycausal()
                pc.start_vm()
                single_run = True

            tetrad = s.tetradrunner()
            tetrad.run(algoId='rfci',
                       dfs=df,
                       testId='fisher-z-test',
                       depth=-1,
                       maxPathLength=-1,
                       completeRuleSetUsed=False,
                       verbose=AlgorithmConstants.VERBOSE)
            graph = tetrad.getTetradGraph()
            dot_str = pc.tetradGraphToDot(graph)

            # shutdown java vm
            if single_run:
                pc.stop_vm()
        except Exception as e:
            _logger.error(str(e))
            print(str(e))
        return dot_str
Пример #5
0
    def run_discrete(df, pc=None):
        """
        Run the algorithm against a discrete dataframe to return a dot format causal graph.
        :param df: dataframe
        :return: dot graph string
        """
        single_run = False
        dot_str = None
        try:
            # start java vm and get algo runner
            if pc is None:
                pc = pycausal()
                pc.start_vm()
                single_run = True

            tetrad = s.tetradrunner()
            tetrad.run(algoId='rfci',
                       dfs=df,
                       testId='chi-square-test',
                       dataType=AlgorithmConstants.DISCRETE,
                       depth=3,
                       maxPathLength=-1,
                       completeRuleSetUsed=True,
                       verbose=AlgorithmConstants.VERBOSE)
            graph = tetrad.getTetradGraph()
            dot_str = pc.tetradGraphToDot(graph)

            # shutdown java vm
            if single_run:
                pc.stop_vm()
        except Exception as e:
            _logger.error(str(e))
            print(str(e))
        return dot_str
Пример #6
0
    def run(df, pc=None):
        """
        Run the algorithm against the dataframe to return a dot format causal graph.
        """
        single_run = False
        dot_str = None
        try:
            # start java vm and get algo runner
            if pc is None:
                pc = pycausal()
                pc.start_vm()
                single_run = True

            tetrad = s.tetradrunner()
            tetrad.run(algoId='pc-all',
                       dfs=df,
                       testId='fisher-z-test',
                       fasRule=2,
                       depth=2,
                       conflictRule=1,
                       concurrentFAS=True,
                       useMaxPOrientationHeuristic=True,
                       verbose=AlgorithmConstants.VERBOSE)
            graph = tetrad.getTetradGraph()
            dot_str = pc.tetradGraphToDot(graph)

            # shutdown java vm
            if single_run:
                pc.stop_vm()
        except Exception as e:
            _logger.error(str(e))
            print(str(e))
        return dot_str
Пример #7
0
    def test_run_full_analysis(self):
        # setup
        pc = pycausal()
        pc.start_vm()
        aitia = App()
        data_dir = os.path.join(self.data_dir, "charity.txt")
        df = pd.read_table(data_dir, sep="\t")

        # just need a test graph
        dot_str = aitia.algo_runner.algo_pc(df, pc)

        # feature selection algos
        feature_selection_list = []
        feature_selection_list.append(aitia.feature_selection.LINEAR_REGRESSION)
        feature_selection_list.append(aitia.feature_selection.PRINCIPAL_FEATURE_ANALYSIS)

        # causal algo list
        algorithm_list = []
        algorithm_list.append(aitia.algo_runner.PC)
        algorithm_list.append(aitia.algo_runner.FCI)
        analysis_results, summary_df, _ = aitia.run_analysis(df,
                                                          target_graph_str=dot_str,
                                                          n_features=4,
                                                          feature_selection_list=feature_selection_list,
                                                          algorithm_list=algorithm_list,
                                                          pc=pc)
        self.assertTrue(analysis_results is not None)
        self.assertTrue(summary_df is not None)
Пример #8
0
 def test_run_all_algorithms(self):
     pc = pycausal()
     pc.start_vm()
     dot_str_list = []
     data_dir = os.path.join(self.data_dir, "charity.txt")
     df = pd.read_table(data_dir, sep="\t")
     for algo in self.pc_util.get_all_causal_algorithms():
         algo = algo[1]  # just need the func
         dot_str_list.append(algo(df, pc))
     pc.stop_vm()
     self.assertTrue(len(dot_str_list) == 12)
Пример #9
0
 def test_run_causal_analysis(self):
     pc = pycausal()
     pc.start_vm()
     aitia = App()
     data_dir = os.path.join(self.data_dir, "charity.txt")
     df = pd.read_table(data_dir, sep="\t")
     # just need a test graph
     dot_str = aitia.algo_runner.algo_pc(df, pc)
     # algo list
     algorithm_list = []
     algorithm_list.append(aitia.algo_runner.PC)
     algorithm_list.append(aitia.algo_runner.FCI)
     analysis_results = aitia._run_causal_algorithms(df,
                                                     algorithm_list=algorithm_list,
                                                     target_graph_str=dot_str,
                                                     pc=pc)
     self.assertTrue(analysis_results is not None)
Пример #10
0
    def run(df, pc=None):
        """
        Run the algorithm against the dataframe to return a dot format causal graph.
        """
        dot_str = None
        try:
            # start java vm and get algo runner
            if pc is None:
                pc = pycausal()
                pc.start_vm()

            bayes_est = s.bayesEst(df, depth=-1, alpha=0.05, verbose=AlgorithmConstants.VERBOSE)
            graph = bayes_est.getTetradGraph()
            dot_str = pc.tetradGraphToDot(graph)

            # stop java vm
            if pc is None:
                pc.stop_vm()
        except Exception as e:
            _logger.error(str(e))
            print(str(e))
        return dot_str
Пример #11
0
    def run_mixed(df, pc=None):
        """
        Run the algorithm against a mixed dataframe to return a dot format causal graph.
        :param df: dataframe
        :return: dot graph string
        """
        single_run = False
        dot_str = None
        try:
            # start java vm and get algo runner
            if pc is None:
                pc = pycausal()
                pc.start_vm()
                single_run = True

            tetrad = s.tetradrunner()
            tetrad.run(algoId='gfci',
                       dfs=df,
                       testId='cg-lr-test',
                       scoreId='cg-bic-score',
                       dataType=AlgorithmConstants.MIXED,
                       numCategoriesToDiscretize=4,
                       maxDegree=3,
                       maxPathLength=-1,
                       completeRuleSetUsed=False,
                       faithfulnessAssumed=True,
                       verbose=AlgorithmConstants.VERBOSE)
            graph = tetrad.getTetradGraph()
            dot_str = pc.tetradGraphToDot(graph)

            # shutdown java vm
            if single_run:
                pc.stop_vm()
        except Exception as e:
            _logger.error(str(e))
            print(str(e))
        return dot_str
Пример #12
0
    def _run_causal_algorithms(self,
                               incoming_df,
                               requested_features=None,
                               feature_selection_method=None,
                               n_features=None,
                               algorithm_list=None,
                               target_graph_str=None,
                               latent_edges=[],
                               pc=None,
                               verbose=True):
        """
        Internal. Runs an analysis on the supplied dataframe.
        This can take a PyCausalWrapper if multiple runs are being done.
        """
        analysis_results = AnalysisResults()
        pc_supplied = True

        # get py-causal if needed
        if pc is None:
            pc_supplied = False
            pc = pycausal()
            pc.start_vm()

        algo_list = self._get_causal_algorithms(algorithm_list)

        for algo in algo_list:
            # dict to store run result
            analysis_result = SingleAnalysisResult()
            analysis_result.feature_selection_method = feature_selection_method
            analysis_result.feature_list = requested_features
            analysis_result.num_features_requested = n_features
            analysis_result.causal_algorithm = algo[0]
            analysis_result.latent_edges = latent_edges

            if verbose:
                print("Running causal discovery using {0}".format(algo[0]))

            # get the graph from the algo
            algo_fn = algo[1]
            dot_str = self._discover_graph(algo_fn, incoming_df, pc)

            # store the dot graph
            analysis_result.dot_format_string = dot_str

            # convert the causal graph
            if dot_str is not None:
                causal_graph = self.graph_util.get_causal_graph_from_dot(dot_str)
                analysis_result.causal_graph = causal_graph
                nx_graph = self.graph_util.get_digraph_from_dot(dot_str)
                analysis_result.causal_graph_with_latent_edges = \
                    self.graph_util.get_causal_graph_with_latent_edges(nx_graph, latent_edges)

            analysis_results.results.append(analysis_result)

        # shutdown the java vm if needed
        if not pc_supplied:
            pc.stop_vm()

        # filter the results
        analysis_results_filtered = self._filter_empty_results(analysis_results)

        # add the causal metrics
        updated_analysis_results = self._add_causal_metrics(analysis_results_filtered, target_graph_str)

        return updated_analysis_results