def calculate_f_D_m(df, edge_list): with open("./data_files/epsilon.pickle", "rb") as f: epsilon_dict = pickle.load(f) res = 1 pa_dict = parents(edge_list) for v in df.keys(): mu_pri_list = [] sigma_pri_list = [] for pa in pa_dict[v]: para_pri_list = para_pri([pa, v]) mu_pri_list.append(para_pri_list[0]) sigma_pri_list.append(para_pri_list[1]) mu_pri = np.array(mu_pri_list) Sigma_pri = np.diag(sigma_pri_list) Sigma = np.diag([epsilon_dict[v][1]] * len(df)) + np.dot( np.dot(df[pa_dict[v]].values, Sigma_pri), df[pa_dict[v]].values.T) mu = (np.dot(df[pa_dict[v]].values, mu_pri) + epsilon_dict[v][0]).reshape(len(df), 1) value = multivariate_normal.pdf(df[[v]].T.values[0].tolist(), mean=mu.T[0].tolist(), cov=Sigma.tolist()) if value == 0: return None else: res *= value return res
def search_covariate(edge_list): DAG = nx.DiGraph() for edge in edge_list: nx.add_path(DAG, edge) UNDAG = DAG.to_undirected() pa_dict = parents(edge_list) X_pa_list = pa_dict['X'] Y_pa_list = pa_dict['Y'] ZZ = [] ZZ_on_path = [] if 'X' in ZZ: ZZ.remove('X') for path in nx.all_simple_paths(UNDAG, source='X', target='Y'): if path[1] in X_pa_list and path[-2] in Y_pa_list: ZZ.append(path[-2]) # ZZ.append(path[-2]) else: ZZ_on_path.append(path[-2]) # append pa of Y not on path for pa in Y_pa_list: if pa not in ZZ_on_path: ZZ.append(pa) return ZZ
def detect_k(DAG, k, l): edge_list = [list(x) for x in list(DAG.edges)] pa_dict = parents(edge_list) UNDAG = DAG.to_undirected() for path in nx.all_simple_paths(UNDAG, source='X', target='Y'): if path[1] in k or path[1] in l: pass # include k elif path[1] in pa_dict['X'] and path[1] in pa_dict['Y']: return False # include l elif 'X' in pa_dict[path[1]] and path[1] in pa_dict['Y']: return False return True
def calculate_tau_k_D(df, x_do, k, l): with open("./data_files/epsilon.pickle", "rb") as f: epsilon_dict = pickle.load(f) pa_dict = parents(init_edge(k=k, l=l, vv=list(df.keys()))) res = epsilon_dict['Y'][0] mu_pos_y = calculate_param_pos(df, 'Y', pa_dict['Y']) res += mu_pos_y['theta_XY'] * x_do for z in k: res += mu_pos_y['theta_' + z[-1] + 'Y'] * epsilon_dict[z][0] for z_ in l: mu_pos_z = calculate_param_pos(df, z_, pa_dict[z_]) res += (mu_pos_z['theta_' + 'X' + z_[-1]] * x_do + epsilon_dict[z_][0]) * mu_pos_y['theta_' + z_[-1] + 'Y'] return res
def ges_algo(df): import random res_list = [] # Initialize for v in df.keys(): res_list.append([v]) res = calculate_f_D_m(df, res_list) for v in df.keys(): opt_pa = [] for w in df.keys(): if v == w: continue else: target_pa = opt_pa + [w] for pa in target_pa: res_list.append([pa, v]) target_res = calculate_f_D_m(df, res_list) try: if target_res < res: for pa in target_pa: res_list.remove([pa, v]) else: res = target_res # res or target_res should be None except TypeError: return None # remove useless nodes to clean up pa_dict = parents(res_list) for key, value in pa_dict.items(): if len(value) > 0: res_list.remove([key]) print("*****Predicted CPDAG*****") print_dag(res_list) for v in df.keys(): try: pa_list = parents(res_list) for w in pa_list[v]: if v in pa_list[w]: # decide the arrow randomly if random.randint(0, 1) == 1: res_list.remove([v, w]) # if it's not dag, remove the edge if detect_dag(res_list): res_list.append([v, w]) res_list.remove([w, v]) if detect_dag(res_list): res_list.remove([v, w]) else: res_list.remove([w, v]) if detect_dag(res_list): res_list.append([w, v]) res_list.remove([v, w]) if detect_dag(res_list): res_list.remove([w, v]) except KeyError: pass for v in df.keys(): flag = 0 for edge in res_list: if v in edge: flag = 1 if flag == 0: res_list.append([v]) print("*****Predicted DAG*****") print_dag(res_list) return res_list