Exemplo n.º 1
0
 def init_causal_graph_dot_src(self, df, forbidden_edges, required_edges):
     p = pc()
     p.start_vm()
     tetrad = s.tetradrunner()
     prior = pr.knowledge(forbiddirect = forbidden_edges, requiredirect = required_edges)
     tetrad.run(algoId = 'fges', dfs = df, priorKnowledge = prior, scoreId = 'sem-bic', dataType = 'continuous', penaltyDiscount = 2, maxDegree = -1, faithfulnessAssumed = True, verbose = True)
     dot_src = p.tetradGraphToDot(tetrad.getTetradGraph())
     #p.stop_vm()
     self.edges = tetrad.getEdges()
     self.nodes = tetrad.getNodes()
     dot_src = self.trim_init_src_string(dot_src)
     self.dot_src_lines = self.dot_src_to_lines(dot_src)
     self.dot_src = self.lines_to_dot_src(self.dot_src_lines)
     self.init_dot_src = self.dot_src
     self.uncolored_dot_src = self.init_dot_src
Exemplo n.º 2
0
 def learn_fci(self, df, tabu_edges):
     """This function is used to learn model using FCI"""
     from pycausal.pycausal import pycausal as pc
     from pycausal import search as s
     from pycausal import prior as p
     pc = pc()
     pc.start_vm()
     forbid = [list(i) for i in tabu_edges]
     prior = p.knowledge(forbiddirect=forbid)
     tetrad = s.tetradrunner()
     tetrad.getAlgorithmParameters(algoId='fci', testId='fisher-z-test')
     tetrad.run(algoId='fci',
                dfs=df,
                testId='fisher-z-test',
                depth=-1,
                maxPathLength=-1,
                completeRuleSetUsed=False,
                verbose=False)
     edges = tetrad.getEdges()
     dot_str = pc.tetradGraphToDot(tetrad.getTetradGraph())
     graph = pydot.graph_from_dot_data(dot_str)
     # graph[0].write_pdf(fname)
     pc.stop_vm()
     return edges
Exemplo n.º 3
0
     # Make prediction
     y_pred = model.predict(X)
     
     #Generate causal assured dataset D'
     causal_targets = pd.DataFrame(y_pred, columns = [args.target_var])
     causal_targets.reset_index(drop=True, inplace = True)
     causal_df = input_df.copy().join(causal_targets)
 
     
     # Calculate Score
     mse = mean_squared_error(y_pred, y)
     #print("MSE = ", mse)
     
     edges = list(map(tuple, args.e))
     #print("Required edges = ", edges)
     prior = p.knowledge(requiredirect= edges)
     if args.discrete:
         ll = get_ll_mixed(causal_df, prior)
     else:
         ll = get_ll_continuous(causal_df, prior)
     #print("LL(G|D) = ", ll)
     
     CAM = ll * args.l + mse
     #print("Overall CAM score = ", CAM)
     MSE.append(mse)
     LL.append(ll)
     filenames.append(file)
     # Save score
 combined = normalize(LL) * args.l + normalize(MSE)
 s = sorted(zip(combined, filenames))
 print(s)
Exemplo n.º 4
0
def fges_stem(file_path, sys_iter, SGA_l, A_D):

    BIC_l = [float(0)]

    SGA = pd.DataFrame(SGA_l)
    SGA.columns = ['cause gene name']
    A_D_i = A_D

    for i in range(sys_iter):
        print(i)
        file_l = os.listdir(file_path + '/Output/run%i' % i)
        while 'completeMatrixn.csv' not in file_l:
            df_name = file_path + '/Output/run%i/completeMatrix.csv' % i
            df = pd.read_csv(df_name, header=0, index_col=None)

            from pycausal.pycausal import pycausal as pc
            pc = pc()
            pc.start_vm(java_max_heap_size='6400M')

            from pycausal import prior as p
            # get knowledge from knowledge file
            #prior = p.knowledgeFromFile(file_path + '/Input/Knowledge')

            # get knowledge from DEG and SGA list
            DEG_l = [x for x in df.columns if x not in SGA_l]
            A_D_i = A_D_i[DEG_l]
            forbid = create_knowledge(SGA, SGA_l, A_D_i)
            temporal = [SGA_l, p.ForbiddenWithin(DEG_l)]
            prior = p.knowledge(forbiddirect=forbid, addtemporal=temporal)

            from pycausal import search as s
            tetrad = s.tetradrunner()
            tetrad.getAlgorithmParameters(algoId='fges', scoreId='bdeu-score')

            tetrad.run(
                algoId='fges',
                dfs=df,
                scoreId='bdeu-score',
                priorKnowledge=prior,
                dataType='discrete',
                structurePrior=1.0,
                samplePrior=1.0,
                maxDegree=100,
                faithfulnessAssumed=True,
                verbose=True,
                symmetricFirstStep=True
            )  # , numberResampling=10, resamplingEnsemble=1, addOriginalDataset=True)

            # save edges.csv
            node_l = tetrad.getNodes()
            edge_l = tetrad.getEdges()
            #edge_split_l = []
            #for edge in edge_l:
            #if '---' in edge:
            #edge_n = edge.split(' ')
            #if np.sum(df[edge.split(' ')[0]]) > np.sum(df[edge.split(' ')[2]]):
            #    edge_n.reverse()
            #else:
            #    edge_n = edge_n
            #edge_split_l.append(edge_n)
            #else:
            #edge_split_l.append(edge.split(' '))

            #edge_split_l = [edge.split(' ') for edge in edge_l if '---' not in edge]
            edge_split_l = [edge.split(' ') for edge in edge_l]

            edge_df = pd.DataFrame(edge_split_l).iloc[:, [0, 2]]
            edge_df.to_csv(file_path + '/Output/run%i/Edge.csv' % i,
                           index=False,
                           header=False)

            # save completeMatrixn.csv
            new_df = df.loc[:, node_l]
            new_df.to_csv(file_path + '/Output/run%i/completeMatrixn.csv' % i,
                          index=False,
                          header=True)

            # save BIC.txt
            print(tetrad.getTetradGraph(),
                  file=open(file_path + '/Output/run%i/BIC.txt' % i, 'a'))
            file_l = os.listdir(file_path + '/Output/run%i' % i)

        else:
            # save BIC which used to verify convergency
            with open(file_path + '/Output/run%i/BIC.txt' % i, 'r') as BIC_txt:
                for line in BIC_txt:
                    if 'BIC: -' in line:
                        BIC_l.append(float(line[5:-1]))

            j = i + 1
            mk_dir(file_path + '/Output/run%d' % j)
            next_file_l = os.listdir(file_path + '/Output/run%i' % j)
            while 'completeMatrix.csv' not in next_file_l:
                exe_path = './MCMC/inferSGAInNetwork_TDI.exe'
                m_path = ' -m ' + file_path + '/Output/run%i/completeMatrixn.csv' % i
                i_path = ' -i ' + file_path + '/Input/S_A0.csv'
                e_path = ' -e ' + file_path + '/Output/run%i/Edge.csv' % i
                o_path = ' -o ' + file_path + '/Output/run%d/ -x 50' % j
                combine = exe_path + m_path + i_path + e_path + o_path
                os.system(combine)
                time.sleep(20)
                next_file_l = os.listdir(file_path + '/Output/run%i' % j)
            else:
                pd.DataFrame(BIC_l).to_csv(file_path + '/Output/BIC.csv',
                                           index=False,
                                           header=False)