}, 'DM': { 'name': 'Insect', } } for celltype in celltypes: print("Loading celltype: {celltype:s}".format(celltype=celltype)) # rGfile = '../../04-network/results/network/{celltype:s}/net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format( celltype=celltype, network=network, threshold=threshold_str) G = nx.read_gpickle(rGfile) # for layer in layers: print('Separate layer {layer:s}'.format(layer=layer)) Gl = get_network_layer(G, layer) # data[layer][celltype] = {'graph': Gl} # Compute Jaccard r = [] for celltype_i, celltype_j in combinations(celltypes, 2): for layer in layers: G_i = data[layer][celltype_i]['graph'] G_j = data[layer][celltype_j]['graph'] genes_i = G_i.nodes() genes_j = G_j.nodes()
# For "sign indeterminacy" np.random.seed(1) # # Load Network # print('Reading {celltype:s}-{network:s}-{threshold:s} Network'.format( celltype=celltype, network=network, threshold=threshold_str)) rGfile_gpickle = 'results/network/{celltype:s}/net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format( celltype=celltype, network=network, threshold=threshold_str) G = nx.read_gpickle(rGfile_gpickle) # SVD per Layer for layer in ['HS', 'MM', 'DM']: print('Isolate {layer:s} Layer'.format(layer=layer)) Gt = get_network_layer(G, layer=layer) # dfG = pd.DataFrame(data={ 'gene': [d.get('label', None) for n, d in Gt.nodes(data=True)] }, index=Gt.nodes) # print('Extract Adjacency Matrix') M = nx.to_numpy_matrix(Gt) print('Calculating PCA (sklearn)') pca = PCA(n_components=None, svd_solver='full') res = pca.fit(M).transform(M) # columns = ['{:d}c'.format(i) for i in range(1, components + 1)] df_pca = pd.DataFrame(res[:, 0:components],
args = parser.parse_args() celltype = args.celltype # spermatocyte or enterocyte network = 'full' attribute = 'combined_score' threshold = args.threshold print('Loading Full Network') path_net = '../../04-network/results/network/{celltype:s}/'.format( celltype=celltype) rGfile_gpickle = path_net + 'net-{celltype:s}-{network:s}.gpickle'.format( celltype=celltype, network=network) G = nx.read_gpickle(rGfile_gpickle) print('Separate Layers') HSG = get_network_layer(G, 'HS') MMG = get_network_layer(G, 'MM') DMG = get_network_layer(G, 'DM') print('Get edge weights') values_HS = sorted([ d[attribute] / 1000 for i, j, d in HSG.edges(data=True) if attribute in d ], reverse=True) values_MM = sorted([ d[attribute] / 1000 for i, j, d in MMG.edges(data=True) if attribute in d ], reverse=True) values_DM = sorted([
from utils import get_network_layer, ensurePathExists if __name__ == '__main__': threshold = 0.5 threshold_str = str(threshold).replace('.', 'p') path_net = '../../04-network/results/network/{celltype:s}/'.format(celltype='spermatocyte') rG_spermatocyte_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(celltype='spermatocyte', network='conserved', threshold=threshold_str) print('Load Spermatocyte graph') Gs = nx.read_gpickle(rG_spermatocyte_file_gpickle) print('Separate Layers') HSGs = get_network_layer(Gs, 'HS') MMGs = get_network_layer(Gs, 'MM') DMGs = get_network_layer(Gs, 'DM') path_net = '../../04-network/results/network/{celltype:s}/'.format(celltype='enterocyte') rG_enterocyte_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(celltype='enterocyte', network='conserved', threshold=threshold_str) print('Load Enterocyte graph') Ge = nx.read_gpickle(rG_enterocyte_file_gpickle) print('Separate Layers') HSGe = get_network_layer(Ge, 'HS') MMGe = get_network_layer(Ge, 'MM') DMGe = get_network_layer(Ge, 'DM') dict_data = { 'HS': {
celltype = 'spermatocyte' threshold = 0.5 threshold_str = str(threshold).replace('.', 'p') network = 'full' path_net = '../../04-network/results/network/{celltype:s}/'.format( celltype=celltype) rG_full_file_gpickle = path_net + 'net-{celltype:s}-{network:s}.gpickle'.format( celltype=celltype, network=network) print('Load {celltype:s} {network:s} graph'.format(celltype=celltype, network=network)) Gf = nx.read_gpickle(rG_full_file_gpickle) print('Separate Layers') HSGf = get_network_layer(Gf, 'HS') MMGf = get_network_layer(Gf, 'MM') DMGf = get_network_layer(Gf, 'DM') network = 'conserved' path_net = '../../04-network/results/network/{celltype:s}/'.format( celltype=celltype) rG_con_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format( celltype=celltype, network=network, threshold=threshold_str) print('Load {celltype:s} {network:s} graph'.format(celltype=celltype, network=network)) Gc = nx.read_gpickle(rG_con_file_gpickle) print('Separate Layers') HSGc = get_network_layer(Gc, 'HS') MMGc = get_network_layer(Gc, 'MM')
celltype = 'enterocyte' threshold = 0.5 threshold_str = str(threshold).replace('.', 'p') network = 'thr' path_net = '../../04-network/results/network/{celltype:s}/'.format( celltype=celltype) rG_thr_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format( celltype=celltype, network=network, threshold=threshold_str) print('Load {celltype:s} {network:s} graph'.format(celltype=celltype, network=network)) Gt = nx.read_gpickle(rG_thr_file_gpickle) print('Separate Layers') HSGt = get_network_layer(Gt, 'HS') MMGt = get_network_layer(Gt, 'MM') DMGt = get_network_layer(Gt, 'DM') network = 'conserved' path_net = '../../04-network/results/network/{celltype:s}/'.format( celltype=celltype) rG_con_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format( celltype=celltype, network=network, threshold=threshold_str) print('Load {celltype:s} {network:s} graph'.format(celltype=celltype, network=network)) Gc = nx.read_gpickle(rG_con_file_gpickle) print('Separate Layers') HSGc = get_network_layer(Gc, 'HS') MMGc = get_network_layer(Gc, 'MM')
default='spermatocyte', type=str, choices=['spermatocyte', 'enterocyte'], help= "Cell type. Must be either 'spermatocyte' or 'enterocyte'. Defaults to spermatocyte" ) args = parser.parse_args() celltype = args.celltype # spermatocyte or enterocyte print('Reading Network') rGfile_gpickle = 'results/net-{celltype:s}.gpickle'.format( celltype=celltype) G = nx.read_gpickle(rGfile_gpickle) DMG = get_network_layer(G, 'DM') core_DM = nx.get_node_attributes(DMG, name='core') gene_DM = nx.get_node_attributes(DMG, name='label') df_DM_m = pd.DataFrame(data={'gene': gene_DM, 'core': core_DM}) df_DM_m['core'] = df_DM_m['core'].fillna(False) r = [] Gt = DMG.copy() r.append([ None, Gt.number_of_nodes(), Gt.number_of_edges(), len([ i for i, d in Gt.nodes(data=True) if d.get('core', False) == True ])
data = { 'HS': {}, 'MM': {}, 'DM': {} } print('-- Conserved --') for celltype in celltypes: print('Loading {celltype:s} {network:s} {threshold:s}'.format(celltype=celltype, network='conserved', threshold=threshold_str)) path_net = '../../04-network/results/network/{celltype:s}/'.format(celltype=celltype) rGc_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(celltype=celltype, network='conserved', threshold=threshold_str) Gc = nx.read_gpickle(rGc_file_gpickle) for layer in layers: print('Separating layer {layer:s}'.format(layer=layer)) Gcl = get_network_layer(Gc, layer) conserved_genes = set(Gcl.nodes()) data[layer][celltype] = conserved_genes for layer in layers: print('Calculating venn {layer:s} conserved'.format(layer=layer)) ns = data[layer] for intersected, unioned, count in venn_count(ns): print('|{}{}| = {}'.format(' & '.join(sorted(intersected)), ' - ' + ' - '.join(sorted(unioned)) if unioned else '', count)) # # Non-conserved #