def create_embedding_datamatrix(dm, n_components): em = dataclasses.datamatrix( rowname=dm.rowname, rowlabels=dm.rowlabels.copy(), rowmeta=dm.rowmeta.copy(), columnname='latent_component', columnlabels=np.array(['LC' + str(x) for x in range(n_components)], dtype='object'), columnmeta={}, matrixname='sdae_embedding_of_' + dm.matrixname, matrix=np.zeros((dm.shape[0], n_components), dtype='float32')) return em
with open('aligned_matrices_{0}/gene_atb_{1}.pickle'.format( analysis_version, datasetabbrev), mode='rb') as fr: gene_atb = pickle.load(fr) with open('aligned_matrices_{0}/gene_cst_{1}.pickle'.format( analysis_version, datasetabbrev), mode='rb') as fr: gene_cst = pickle.load(fr) # initialize outputs atb_cst = {} atb_cst['test_statistic_values'] = dc.datamatrix( rowname=gene_atb.columnname, rowlabels=gene_atb.columnlabels.copy(), rowmeta=copy.deepcopy(gene_atb.columnmeta), columnname=gene_cst.columnname, columnlabels=gene_cst.columnlabels.copy(), columnmeta=copy.deepcopy(gene_cst.columnmeta), matrixname='atb_cluster_correlation', matrix=np.zeros((gene_atb.shape[1], gene_cst.shape[1]), dtype='float64')) atb_cst['pvalues'] = copy.deepcopy(atb_cst['test_statistic_values']) # computation starttime = time.time() print('starting {0!s} permutations...'.format(numperm)) atb_cst['test_statistic_values'].matrix, atb_cst[ 'pvalues'].matrix = feature_selection_test_function(X=gene_cst.matrix, Y=gene_atb.matrix, numperm=numperm) atb_cst['pvalues'].matrix = atb_cst['pvalues'].matrix.T if batch == 0:
sys.path.append(custompath) del custompath, custompaths import numpy as np import machinelearning.dataclasses as dc import pickle # load clusters with open('clusters.pickle', 'rb') as fr: gene_syms, gene_ids, cluster_ids = pickle.load(fr) unique_cluster_ids = np.array([str(x) for x in np.unique(cluster_ids)], dtype='object') # create matrix gene_clust = dc.datamatrix( rowname='gene_sym', rowlabels=gene_syms, rowmeta={'gene_id': gene_ids}, columnname='cluster_id', columnlabels=unique_cluster_ids, columnmeta={}, matrixname= 'gene_cluster_assignments_from_denoising_autoencoder_applied_to_GTEX', matrix=np.zeros((gene_syms.size, unique_cluster_ids.size), dtype='bool')) for j, cluster_id in enumerate(gene_clust.columnlabels): gene_clust.matrix[:, j] = cluster_ids == int(cluster_id) # write matrix with open('gene_cluster_matrix.pickle', 'wb') as fw: pickle.dump(gene_clust, fw)
) / np.pi # divide by pi if similarity scores can be negative, otherwise divide by pi/2 else: D = np.arccos(D) / (np.pi / 2) return D else: raise ValueError('invalid distance metric') gene_atb = dc.datamatrix(rowname='GeneSym', rowlabels=np.concatenate( (train.rowlabels, valid.rowlabels, test.rowlabels)), rowmeta={ x: np.concatenate( (train.rowmeta[x], valid.rowmeta[x], test.rowmeta[x])) for x in train.rowmeta }, columnname='Tissue', columnlabels=train.columnlabels.copy(), columnmeta={}, matrixname='zscored_tissue_expression', matrix=np.concatenate( (train.matrix, valid.matrix, test.matrix), 0)) gene_proj = copy.deepcopy(gene_atb) gene_proj.columnlabels = np.array(['X', 'Y'], dtype='object') gene_proj.columnname = 'Neuron' gene_proj.matrixname = '2d_dnn_projection_of_zscored_tissue_expression' gene_proj.matrix = sess.run(h[-1], feed_dict={x: gene_atb.matrix}) gene_proj.updatesizeattribute() gene_proj.updateshapeattribute()