def test_transform_test(self): p = 3 df = pd.DataFrame({'a': np.arange(100), 'b': np.arange(100, 200)}) expected_mapping = { 0: 'a_t', 1: 'b_t', 2: 'a_t-1', 3: 'b_t-1', 4: 'a_t-2', 5: 'b_t-2', 6: 'a_t-3', 7: 'b_t-3' } expected_matrix = np.array([ np.arange(3, 100), np.arange(103, 200), np.arange(2, 99), np.arange(102, 199), np.arange(1, 98), np.arange(101, 198), np.arange(0, 97), np.arange(100, 197) ]).T result_mapping, result_matrix = transform_ts(df, p) self.assertDictEqual(expected_mapping, result_mapping) self.assertTrue(np.all(expected_matrix == result_matrix))
def pc_chen_modified(indep_test_func, ts_data, p, alpha): dim = ts_data.shape[1] node_mapping, data_matrix = transform_ts(ts_data, p) corr_matrix = np.corrcoef(data_matrix, rowvar=False) adj_matrix = np.zeros((data_matrix.shape[1], data_matrix.shape[1])) adj_matrix[dim:, :dim] = 1 adj_matrix = np.maximum(adj_matrix, adj_matrix.T) G = nx.from_numpy_matrix(adj_matrix) G, _ = _estimate_skeleton(G, partial_corr_test, data_matrix, alpha, corr_matrix=corr_matrix) DAG = G.to_directed() DAG.remove_edges_from([(u, v) for (u, v) in DAG.edges() if v >= u]) return nx.relabel_nodes(DAG, node_mapping)
def pc_chen(indep_test_func, ts_data, p, alpha): dim = ts_data.shape[1] node_mapping, data_matrix = transform_ts(ts_data, p) corr_matrix = np.corrcoef(data_matrix, rowvar=False) adj_matrix = np.ones((data_matrix.shape[1], data_matrix.shape[1])) np.fill_diagonal(adj_matrix, 0) G = nx.from_numpy_matrix(adj_matrix) G, sep_sets = _estimate_skeleton(G, partial_corr_test, data_matrix, alpha, corr_matrix=corr_matrix) DG = G.to_directed() DG.remove_edges_from([(u, v) for (u, v) in DG.edges() if v >= dim]) DAG = estimate_cpdag(DG, sep_sets) return nx.relabel_nodes(DAG, node_mapping)
def pc_incremental_subsets(indep_test, ts, alpha=0.05, max_p=20, start=0, steps=1, ic='bic', patiency=1, verbose=False): # precalculated information dim = ts.shape[1] # verbose information graphs = {} times = {} bics = {} # initial graph present_nodes = range(dim) if start > 0: node_mapping, data_matrix = transform_ts(ts, start) corr_matrix = np.corrcoef(data_matrix, rowvar=False) start_time = time() G = pc_chen_modified(indep_test, ts, start, alpha) times[start] = time() - start_time graphs[start] = nx.relabel_nodes(G.copy(), node_mapping) bics[start] = _graph_ic(start, dim, data_matrix, G, ic) best_bic = bics[start] best_p = start else: G = nx.DiGraph() G.add_nodes_from(present_nodes) best_bic = np.inf best_p = 0 no_imp = 0 # iteration step for p in range(start + steps, max_p + 1, steps): start_time = time() node_mapping, data_matrix = transform_ts(ts, p) corr_matrix = np.corrcoef(data_matrix, rowvar=False) new_nodes = list( range((p - steps + 1) * dim, min(p + 1, max_p + 1) * dim)) # step 1 G.add_nodes_from(new_nodes) # step 2 for x_t, x in product(present_nodes, new_nodes): p_value, statistic = indep_test(data_matrix, x_t, x, set(), corr_matrix=corr_matrix) if p_value <= alpha: G.add_edge(x, x_t) # step 3 for each subset for subset_size in range(1, len(G.nodes())): for x_t in present_nodes: in_set = set(G.predecessors(x_t)) if len(in_set) <= subset_size: continue for x in in_set: cond_max = in_set - set([x]) for cond in set(combinations(cond_max, subset_size)): p_value, statistic = indep_test( data_matrix, x_t, x, cond, corr_matrix=corr_matrix) if p_value > alpha: G.remove_edge(x, x_t) break # verbose information graphs[p] = nx.relabel_nodes(G.copy(), node_mapping) times[p] = time() - start_time bics[p] = _graph_ic(p, dim, data_matrix, G, ic) # early stopping if bics[p] < best_bic: best_bic = bics[p] best_p = p no_imp = 0 else: no_imp += 1 if no_imp >= patiency: break if verbose: return nx.relabel_nodes(graphs[best_p], node_mapping), graphs, times, bics else: return nx.relabel_nodes(graphs[best_p], node_mapping)
def pc_incremental_pc1(indep_test, ts, alpha=0.05, max_p=20, start=0, steps=1, ic='bic', patiency=1, verbose=False, **kwargs): # precalculated information dim = ts.shape[1] # verbose information graphs = {} times = {} bics = {} # initial graph present_nodes = range(dim) if start > 0: node_mapping, data_matrix = transform_ts(ts, start) corr_matrix = np.corrcoef(data_matrix, rowvar=False) start_time = time() G = pc_chen_modified(indep_test, ts, start, alpha) times[start] = time() - start_time graphs[start] = nx.relabel_nodes(G.copy(), node_mapping) bics[start] = _graph_ic(start, dim, data_matrix, G, ic) best_bic = bics[start] best_p = start else: G = nx.DiGraph() G.add_nodes_from(present_nodes) best_bic = np.inf best_p = 0 no_imp = 0 # iteration step for p in range(start + steps, max_p + 1, steps): start_time = time() node_mapping, data_matrix = transform_ts(ts, p) corr_matrix = np.corrcoef(data_matrix, rowvar=False) new_nodes = list( range((p - steps + 1) * dim, min(p + 1, max_p + 1) * dim)) # step 1: Add new nodes G.add_nodes_from(new_nodes) # step 2: Connect new nodes if not unconditionally independent for x_t, x in product(present_nodes, new_nodes): p_value, statistic = indep_test(data_matrix, x_t, x, set(), corr_matrix=corr_matrix) if p_value <= alpha: G.add_edge(x, x_t) # step 3: Check all connected nodes for x_t in present_nodes: parents = list(set(G.predecessors(x_t))) # Goes up to full neighborhood, perhaps limit this max_cond_dim = float('inf') condition_size = 0 # PC_1 while condition_size < max_cond_dim and condition_size < len( parents) - 1: parent_stats = defaultdict(lambda: float('inf')) for x in parents: other_parents = [e for e in parents if e != x] condition = other_parents[:condition_size] p_value, statistic = indep_test(data_matrix, x_t, x, condition, corr_matrix=corr_matrix) parent_stats[x] = min(parent_stats[x], np.abs(statistic)) if p_value > alpha: G.remove_edge(x, x_t) del parent_stats[x] parents = [ k for k, v in sorted( parent_stats.items(), key=lambda v: v[1], reverse=True) ] condition_size += 1 # verbose information graphs[p] = nx.relabel_nodes(G.copy(), node_mapping) times[p] = time() - start_time bics[p] = _graph_ic(p, dim, data_matrix, G, ic) # early stopping if bics[p] < best_bic: best_bic = bics[p] best_p = p no_imp = 0 else: no_imp += 1 if no_imp >= patiency: break if verbose: return nx.relabel_nodes(graphs[best_p], node_mapping), graphs, times, bics else: return nx.relabel_nodes(graphs[best_p], node_mapping)