def getCIGroups(local_data, ds_context=None, scope=None, families=None): """ :param local_data: np array :param scope: a list of index to output variables :param alpha: threshold :param families: obsolete :return: np array of clustering This function take tuple (output, conditional) as input and returns independent groups alpha is the cutoff parameter for connected components BE CAREFUL WITH SPARSE DATA! """ # data = preproc(local_data, ds_context, None, ohe) y, x = get_YX(local_data, ds_context.feature_size) pvals = testRcoT(y, x) + epsilon pvals[pvals > alpha] = 0 clusters = np.zeros(y.shape[1]) for i, c in enumerate(connected_components(from_numpy_matrix(pvals))): clusters[list(c)] = i + 1 return split_conditional_data_by_clusters(y, x, clusters, scope, rows=False)
def getIndependentRDCGroups_py(data_slice, threshold, k=None, s=1. / 6., non_linearity=numpy.sin, n_jobs=1, rand_gen=None): rdc_adjacency_matrix = rdc_test(data_slice, k=k, s=s, non_linearity=non_linearity, n_jobs=n_jobs, rand_gen=rand_gen) n_features = len(data_slice.cols) # # thresholding rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0 #print("thresholding", rdc_adjacency_matrix) # # getting connected components result = numpy.zeros(n_features) for i, c in enumerate(connected_components(from_numpy_matrix(rdc_adjacency_matrix))): result[list(c)] = i + 1 return result
def plotNetwork(path,corr): # Transform it in a links data frame #links=corr.stack().reset_index() #Build graph corr=Corr_mat adj_matrix = corr constits_latest = corr.index # remove self-loops adj_matrix = np.where((adj_matrix<=1.000001) & (adj_matrix>=0.99999),0,adj_matrix) # replace values that are below threshold # create undirected graph from adj_matrix graph = from_numpy_matrix(adj_matrix, parallel_edges=False, create_using= nx.Graph()) # set names to crypots graph = nx.relabel.relabel_nodes(graph, dict(zip(range(len(constits_latest)), constits_latest))) pos_og = nx.circular_layout(graph, scale=2) pos = nx.circular_layout(graph, scale=1.7) for p in pos: # raise text positions if pos[p][1]>1: pos[p][1] += 0.15 if pos[p][1]<-1: pos[p][1] -= 0.15 elif pos[p][0]<0: pos[p][0] -= 0.3 else: pos[p][0]+=0.3 plt = mpl.figure(figsize = (5,5)) nx.draw(graph, pos_og, with_labels= False) nx.draw_networkx_labels(graph, pos) plt.savefig(path,dpi=300 ,transparent=True) mpl.clf();mpl.close() return
def getCIGroups(local_data, ds_context=None, scope=None, alpha=0.001, families=None): """ :param local_data: np array :param scope: a list of index to output variables :param alpha: threshold :param families: obsolete :return: np array of clustering This function take tuple (output, conditional) as input and returns independent groups alpha is the cutoff parameter for connected components BE CAREFUL WITH SPARSE DATA! """ data = preproc(local_data, ds_context, None, ohe) num_instance = data.shape[0] output_mask = np.zeros(data.shape, dtype=bool) # todo check scope and node.scope again output_mask[:, np.arange(len(scope))] = True dataOut = data[output_mask].reshape(num_instance, -1) dataIn = data[~output_mask].reshape(num_instance, -1) assert len(dataIn) > 0 assert len(dataOut) > 0 pvals = testRcoT(dataOut, dataIn) pvals[pvals > alpha] = 0 clusters = np.zeros(dataOut.shape[1]) for i, c in enumerate(connected_components(from_numpy_matrix(pvals))): clusters[list(c)] = i + 1 return split_conditional_data_by_clusters(local_data, clusters, scope, rows=False)
def read_sigle_data(data_dir,filename): temp = h5py.File(osp.join(data_dir, filename), 'r') # read edge and edge attribute pcorr = np.abs(temp['pcorr'].value) # only keep the top 10% edges th = np.percentile(pcorr.reshape(-1),95) pcorr[pcorr < th] = 0 # set a threshold num_nodes = pcorr.shape[0] G = from_numpy_matrix(pcorr) A = nx.to_scipy_sparse_matrix(G) adj = A.tocoo() edge_att = np.zeros((len(adj.row))) for i in range(len(adj.row)): edge_att[i] = pcorr[adj.row[i], adj.col[i]] edge_index = np.stack([adj.row, adj.col]) edge_index, edge_att = remove_self_loops(torch.from_numpy(edge_index).long(), torch.from_numpy(edge_att).float()) edge_index, edge_att = coalesce(edge_index, edge_att, num_nodes, num_nodes) att = temp['corr'].value return edge_att.data.numpy(),edge_index.data.numpy(),att,temp['indicator'].value, num_nodes
def process_single_data(index, use_gdc=False): # key to how we adapt to our model # read edge and edge attribute, partial correlation PTE_data = np.load("/home/wenhuicu/ImagePTE/pte_gcn/PRGNN_fMRI-main/PTE_parPearson_BCI-DNI.npz") NONPTE_data = np.load("/home/wenhuicu/ImagePTE/pte_gcn/PRGNN_fMRI-main/NONPTE_parPearson_BCI-DNI.npz") if index < PTE_data["conn_mat"].shape[0]: data = PTE_data new_index = index label = 1 else: data = NONPTE_data new_index = index - PTE_data["conn_mat"].shape[0] label = 0 # index = min(index, ) pcorr = np.abs(data['partial_mat'][new_index]) # only keep the top 10% edges th = np.percentile(pcorr.reshape(-1), 95) pcorr[pcorr < th] = 0 # set a threshold num_nodes = pcorr.shape[0] G = from_numpy_matrix(pcorr) A = nx.to_scipy_sparse_matrix(G) adj = A.tocoo() edge_att = np.zeros(len(adj.row)) for i in range(len(adj.row)): edge_att[i] = pcorr[adj.row[i], adj.col[i]] edge_index = np.stack([adj.row, adj.col]) edge_index, edge_att = remove_self_loops(torch.from_numpy(edge_index), torch.from_numpy(edge_att)) edge_index = edge_index.long() edge_index, edge_att = coalesce(edge_index, edge_att, num_nodes, num_nodes) att = data['conn_mat'][new_index] att_torch = torch.from_numpy(att).float() y_torch = torch.from_numpy(np.array(label)).long() # classification data = Data(x=att_torch, edge_index=edge_index.long(), y=y_torch, edge_attr=edge_att) if use_gdc: ''' Implementation of https://papers.nips.cc/paper/2019/hash/23c894276a2c5a16470e6a31f4618d73-Abstract.html ''' data.edge_attr = data.edge_attr.squeeze() gdc = GDC(self_loop_weight=1, normalization_in='sym', normalization_out='col', diffusion_kwargs=dict(method='ppr', alpha=0.2), sparsification_kwargs=dict(method='topk', k=20, dim=0), exact=True) data = gdc(data) return data.edge_attr.data.numpy(), data.edge_index.data.numpy(), data.x.data.numpy(), data.y.data.item(), num_nodes else: return edge_att.data.numpy(), edge_index.data.numpy(), att, label, num_nodes
def make_disk_graph(X, radius, metric='euclidean'): """Make a generalized disk graph, in which points whose distance is less than a certain radius are considered adjacent. Params: X: a 2D numpy array of shape (n_observations, n_features). radius: the radius of disks for adjacency. metric: string, representing which metric. Options are given by sklearn.metrics.pairwise.distance_metrics. Default is 'euclidean'. Returns: a networkx simple Graph """ metric = distance_metrics()[metric] dist = metric(X) adj = np.asarray(dist < radius, dtype=np.float) return from_numpy_matrix(adj, create_using=Graph)
def process_single_data(index): # key to how we adapt to our model # read edge and edge attribute, partial correlation PTE_data = np.load("/home/wenhuicu/ImagePTE/pte_gcn/PRGNN_fMRI-main/PTE_parPearson_BCI-DNI_aug.npz") NONPTE_data = np.load("/home/wenhuicu/ImagePTE/pte_gcn/PRGNN_fMRI-main/NONPTE_parPearson_BCI-DNI_aug.npz") if index < PTE_data["conn_mat"].shape[0]: data = PTE_data new_index = index else: print(index) data = NONPTE_data new_index = index - PTE_data["conn_mat"].shape[0] # index = min(index, ) pcorr = np.abs(data['partial_mat'][new_index]) # only keep the top 10% edges th = np.percentile(pcorr.reshape(-1), 95) pcorr[pcorr < th] = 0 # set a threshold num_nodes = pcorr.shape[0] G = from_numpy_matrix(pcorr) A = nx.to_scipy_sparse_matrix(G) adj = A.tocoo() edge_att = np.zeros((len(adj.row))) for i in range(len(adj.row)): edge_att[i] = pcorr[adj.row[i], adj.col[i]] edge_index = np.stack([adj.row, adj.col]) edge_index, edge_att = remove_self_loops(torch.from_numpy(edge_index).long(), torch.from_numpy(edge_att).float()) edge_index, edge_att = coalesce(edge_index, edge_att, num_nodes, num_nodes) # node attribute, Pearson correlation node_att = data["conn_mat"][new_index] pearson_corr = data["conn_mat"][new_index] mean_fmri = np.mean(data['features'][new_index], axis=-1, keepdims=True) std_fmri = np.std(data['features'][new_index], axis=-1, keepdims=True) # node_att = np.concatenate([pearson_corr, mean_fmri], axis=-1) # print(edge_att.data.numpy().shape, edge_index.data.numpy().shape, node_att.shape, num_nodes) # print(std_fmri) # pdb.set_trace() return edge_att.data.numpy(), edge_index.data.numpy(), node_att, num_nodes # read_data("")
def load_from_numpy(self, np_adjacency_matrix): """ Load data from 2D numpy array interpreted as an Adjacency matrix into the Graph datatype of NetworkX Parameters ---------- np_adjacency_matrix: np.array of shape(Nnodes, Nnodes,) Adjacency matrix to be converted to graph Returns -------- NetworkX graph with nodes labeled by indicy and directed weights given as per the np adjacency matrix """ # self.graph = to_directed(from_numpy_matrix(np.array(np_adjacency_matrix))) graph = convert_matrix.from_numpy_matrix(np.array(np_adjacency_matrix), create_using=DiGraph) self.nodes = len(graph.nodes) return graph
def make_kernel_graph(X, metric='rbf', cutoff=0, **kwargs): """Make a weighted graph, using the a pairwise kernel function for weights. Params: X: a 2D numpy array of shape (n_observations, n_features). metric: string or function, the metric to use when calculating kernel. Options are given by sklearn.metrics.pairwise.pairwise_kernels. Default is 'rbf'. cutoff: float, optional kernal truncation value, entries below which are set to 0. **kwargs: passed to pairwise_kernels Returns: a networkx weighted Graph """ kernel = pairwise_kernels(X, metric=metric, **kwargs) if cutoff: kernel[kernel < cutoff] = 0 return from_numpy_matrix(kernel, create_using=Graph)
def getIndependentGDTGroups_py(data_slice, threshold, # n_jobs=1, rand_gen=None): gdt_adjacency_matrix = pairwise_gdt(data_slice, ) n_features = len(data_slice.cols) # # thresholding gdt_adjacency_matrix[gdt_adjacency_matrix < threshold] = 0 #print("thresholding", gdt_adjacency_matrix) # # getting connected components result = numpy.zeros(n_features) for i, c in enumerate(connected_components(from_numpy_matrix(gdt_adjacency_matrix))): result[list(c)] = i + 1 return result
def getIndependentGroupsStabilityTest(data, alpha=0.001): #data = numpy.loadtxt("/Users/alejomc/Dropbox/pspn/spyn/experiments/graphclassification/wl/1mutag.build_wl_corpus.csv", dtype=int, delimiter=",") #df = pandas.read_csv('/Users/alejomc/Dropbox/pspn/spyn/experiments/graphclassification/wl/1mutag.build_wl_corpus.csv') #df = pandas.read_csv('/Users/alejomc/Dropbox/pspn/spyn/experiments/graphclassification/wl/5nci1.build_wl_corpus.csv') df = DataFrame(data, columns=["V" + str(i) for i in range(1, data.shape[1] + 1)]) #pvals = bonferroniCorrection(computeEstabilityTest(df, 0)) #compute stability test with Pool() as pool: pvals = pool.starmap(computePvals, zip(repeat(df), range(df.shape[1]))) #print(pvals) pvals = numpy.asarray(pvals) #print(pvals[0,:]) #convert graph to undirected graph #print("AM SHAPE ",pvals.shape) for i, j in zip(*numpy.tril_indices(pvals.shape[1])): pvals[i, j] = pvals[j, i] = min(pvals[i, j], pvals[j, i]) pvals[numpy.diag_indices_from(pvals)] = 1 #print(pvals) pvals[pvals > alpha] = 0 result = numpy.zeros(df.shape[1]) for i, c in enumerate(connected_components(from_numpy_matrix(pvals))): result[list(c)] = i + 1 return result
def getIndependentRDCGroups_py(local_data, threshold, meta_types, domains, k=None, s=1.0 / 6.0, non_linearity=np.sin, n_jobs=-2, rand_gen=None): rdc_adjacency_matrix = rdc_test(local_data, meta_types, domains, k=k, s=s, non_linearity=non_linearity, n_jobs=n_jobs, rand_gen=rand_gen) # # Why is this necessary? # rdc_adjacency_matrix[np.isnan(rdc_adjacency_matrix)] = 0 n_features = local_data.shape[1] # # thresholding rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0 # logger.info("thresholding %s", rdc_adjacency_matrix) # # getting connected components result = np.zeros(n_features) for i, c in enumerate( connected_components(from_numpy_matrix(rdc_adjacency_matrix))): result[list(c)] = i + 1 return result
up_to_conjugation_elements[conjugation_class] = 0 up_to_conjugation_elements[conjugation_class] += 1 graph_labelling = UpToConjugationGraphLabelling(up_to_conjugation_elements, elements_generator) graph_covering = GraphCovering(graph, representation) action2 = PGLGroupAction(pgl2) representation2 = TransitiveActionUnitaryStandardRepresentation(action2, pgl2.get_pf().infinity()) graph_labelling2 = UpToConjugationGraphLabelling(conjugation_classes, pgl2) graph_covering2 = GraphCovering(graph, representation2) matching_polynomials = {} characteristic_polynomials = {} for labelling, weight in graph_labelling.weighted_labellings(graph): adjacency = graph_covering.adjacency(labelling).astype(int) lifted_graph = from_numpy_matrix(adjacency, create_using=nx.MultiGraph, parallel_edges=True) polynomial = get_matching_polynomial(lifted_graph) matching_polynomials[list(labelling.values())[0]] = (polynomial, weight) for labelling, weight in graph_labelling2.weighted_labellings(graph): polynomial = graph_covering2.get_polynomial(labelling) characteristic_polynomials[list(labelling.values())[0]] = (polynomial, weight) s = q+1 # s = sympy.symbols("s") m = [] elements = list(characteristic_polynomials.keys()) weights = []
if final_mat.iloc[i, j] >= final_mat.iloc[j, i]: final_mat.iloc[i, j] += final_mat.iloc[j, i] final_mat.iloc[j, i] = 0 else: final_mat.iloc[j, i] += final_mat.iloc[i, j] final_mat.iloc[i, j] = 0 print(final_mat) for column in CONFIG.column_names: final_mat[column] = np.where(np.abs(final_mat[column]) < .5, 0, 1) # Save final binary adjacency matrix final_mat.to_csv("results/final_adjacency_matrix.csv", index=True) # Draw the DAG final_DAG = from_numpy_matrix(final_mat.to_numpy(), create_using=nx.DiGraph) final_DAG = nx.relabel_nodes( final_DAG, dict(zip(list(range(CONFIG.data_variable_size)), CONFIG.column_names))) final_DAG.remove_nodes_from(list(nx.isolates(final_DAG))) nx.draw( final_DAG, node_color="lightcoral", node_size=75, font_size=3, width=0.5, arrowsize=4, with_labels=True, pos=nx.spring_layout(final_DAG),
def get_translations(structure, structural_type='100'): assert structural_type in ['100', '110'] metal = [ Element.from_Z(z).symbol for z in set(structure.atomic_numbers) if Element.from_Z(z).is_metal or Element.from_Z(z).is_metalloid ] mul_structures, conn_components_, ab_indices = [], [], [0, 1, 2] conn_indices = [[2, 1, 1], [1, 2, 1], [1, 1, 2]] number_connected_components = [ conn_comps_sci(adjacency_matrix(structure.__mul__(i)))[0] for i in conn_indices ] c_index = number_connected_components.index( max(number_connected_components)) ab_indices.remove(c_index) extended_structure = structure.__mul__(3) extended_components = list( conn_comps_netx(from_numpy_matrix( adjacency_matrix(extended_structure)))) extended_sites = [[extended_structure[i] for i in components] for components in extended_components] layers = [ s for s in extended_sites if metal[0] in [site.specie.symbol for site in s] ] max_coords = [ max([a.coords[c_index] for a in layer if a.specie.symbol == metal[0]]) for layer in layers ] first_layer_index = max_coords.index(sorted(max_coords)[0]) second_layer_index = max_coords.index(sorted(max_coords)[1]) first_layer_coords = array([ a.coords for a in layers[first_layer_index] if a.specie.symbol == metal[0] ]) second_layer_coords = array([ a.coords for a in layers[second_layer_index] if a.specie.symbol == metal[0] ]) if structural_type == '110': first_layer_coords = first_layer_coords[ first_layer_coords[:, c_index].argsort( )][:int(first_layer_coords.shape[0] / 2), :] second_layer_coords = second_layer_coords[ second_layer_coords[:, c_index].argsort( )][:int(second_layer_coords.shape[0] / 2), :] a_axis = extended_structure.lattice.matrix[ab_indices][0] b_axis = extended_structure.lattice.matrix[ab_indices][1] perp = cross(a_axis / norm(a_axis), b_axis / norm(b_axis)) dir_1 = sorted([ c[0] - c[1] for c in combinations(first_layer_coords, 2) if abs(dot(c[0] - c[1], perp) / norm(c[0] - c[1]) / norm(perp)) < 0.07 ], key=norm)[0] m_dist = norm(dir_1) dir_1 = dir_1 / norm(dir_1) dir_2 = cross(perp / norm(perp), dir_1 / norm(dir_1)) a_projections, b_projections = [], [] for site_coords in first_layer_coords: nearest_site = second_layer_coords[KDTree(second_layer_coords).query( site_coords)[1]] a_projections.append(dot((site_coords - nearest_site), dir_1)) b_projections.append(dot((site_coords - nearest_site), dir_2)) a_translation = min([ min(abs(m_dist - abs(p) % m_dist), abs(p) % m_dist) for p in a_projections ]) / m_dist if structural_type == '110': m_dist = m_dist * sqrt(2) b_translation = min([ min(abs(m_dist - abs(p) % m_dist), abs(p) % m_dist) for p in b_projections ]) / m_dist return sorted([round(a_translation, 2), round(b_translation, 2)])
def getIndependentRDCGroups_py(local_data, threshold, meta_types, domains, scope, l_rfft=None, is_pair=False, k=None, s=1.0 / 6.0, non_linearity=np.sin, n_jobs=-2, rand_gen=None): # modified by zhongjie on 04.10.2019, byu adding scope and keepComplexPairs rdc_adjacency_matrix = rdc_test(local_data, meta_types, domains, k=k, s=s, non_linearity=non_linearity, n_jobs=n_jobs, rand_gen=rand_gen) # # Why is this necessary? # rdc_adjacency_matrix[np.isnan(rdc_adjacency_matrix)] = 0 n_features = local_data.shape[1] # # Add function to keep correlation between real and imag coefficients. # rdc_adjacency_matrix = keepComplexPairs(rdc_adjacency_matrix, scope) """ Additional comments: we can do splitting based on real coefs only, in order to achieve that, all the correlations between imag-real and imag-imag can be set to 0, """ if l_rfft is not None: for s_real in scope: # select scope that belongs to the REAL part. if l_rfft - 1 > s_real % (l_rfft * 2) > 0: # keep the real and imag coefs connected index_real = scope.index(s_real) index_imag = scope.index(s_real + l_rfft) rdc_adjacency_matrix[index_real, index_imag] = 1 rdc_adjacency_matrix[index_imag, index_real] = 1 rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0 # rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0 # # # thresholding rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0 # logger.info("thresholding %s", rdc_adjacency_matrix) # # getting connected components result = np.zeros(n_features) for i, c in enumerate( connected_components(from_numpy_matrix(rdc_adjacency_matrix))): result[list(c)] = i + 1 return result