示例#1
0
        },
        'DM': {
            'name': 'Insect',
        }
    }

    for celltype in celltypes:
        print("Loading celltype: {celltype:s}".format(celltype=celltype))
        #
        rGfile = '../../04-network/results/network/{celltype:s}/net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(
            celltype=celltype, network=network, threshold=threshold_str)
        G = nx.read_gpickle(rGfile)
        #
        for layer in layers:
            print('Separate layer {layer:s}'.format(layer=layer))
            Gl = get_network_layer(G, layer)
            #
            data[layer][celltype] = {'graph': Gl}

    # Compute Jaccard
    r = []
    for celltype_i, celltype_j in combinations(celltypes, 2):

        for layer in layers:

            G_i = data[layer][celltype_i]['graph']
            G_j = data[layer][celltype_j]['graph']

            genes_i = G_i.nodes()
            genes_j = G_j.nodes()
示例#2
0
    # For "sign indeterminacy"
    np.random.seed(1)
    #
    # Load Network
    #
    print('Reading {celltype:s}-{network:s}-{threshold:s} Network'.format(
        celltype=celltype, network=network, threshold=threshold_str))
    rGfile_gpickle = 'results/network/{celltype:s}/net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(
        celltype=celltype, network=network, threshold=threshold_str)
    G = nx.read_gpickle(rGfile_gpickle)

    # SVD per Layer
    for layer in ['HS', 'MM', 'DM']:
        print('Isolate {layer:s} Layer'.format(layer=layer))
        Gt = get_network_layer(G, layer=layer)
        #
        dfG = pd.DataFrame(data={
            'gene': [d.get('label', None) for n, d in Gt.nodes(data=True)]
        },
                           index=Gt.nodes)
        #
        print('Extract Adjacency Matrix')
        M = nx.to_numpy_matrix(Gt)

        print('Calculating PCA (sklearn)')
        pca = PCA(n_components=None, svd_solver='full')
        res = pca.fit(M).transform(M)
        #
        columns = ['{:d}c'.format(i) for i in range(1, components + 1)]
        df_pca = pd.DataFrame(res[:, 0:components],
    args = parser.parse_args()

    celltype = args.celltype  # spermatocyte or enterocyte
    network = 'full'
    attribute = 'combined_score'
    threshold = args.threshold

    print('Loading Full Network')
    path_net = '../../04-network/results/network/{celltype:s}/'.format(
        celltype=celltype)
    rGfile_gpickle = path_net + 'net-{celltype:s}-{network:s}.gpickle'.format(
        celltype=celltype, network=network)
    G = nx.read_gpickle(rGfile_gpickle)

    print('Separate Layers')
    HSG = get_network_layer(G, 'HS')
    MMG = get_network_layer(G, 'MM')
    DMG = get_network_layer(G, 'DM')

    print('Get edge weights')
    values_HS = sorted([
        d[attribute] / 1000
        for i, j, d in HSG.edges(data=True) if attribute in d
    ],
                       reverse=True)
    values_MM = sorted([
        d[attribute] / 1000
        for i, j, d in MMG.edges(data=True) if attribute in d
    ],
                       reverse=True)
    values_DM = sorted([
from utils import get_network_layer, ensurePathExists


if __name__ == '__main__':

    threshold = 0.5
    threshold_str = str(threshold).replace('.', 'p')

    path_net = '../../04-network/results/network/{celltype:s}/'.format(celltype='spermatocyte')
    rG_spermatocyte_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(celltype='spermatocyte', network='conserved', threshold=threshold_str)

    print('Load Spermatocyte graph')
    Gs = nx.read_gpickle(rG_spermatocyte_file_gpickle)

    print('Separate Layers')
    HSGs = get_network_layer(Gs, 'HS')
    MMGs = get_network_layer(Gs, 'MM')
    DMGs = get_network_layer(Gs, 'DM')

    path_net = '../../04-network/results/network/{celltype:s}/'.format(celltype='enterocyte')
    rG_enterocyte_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(celltype='enterocyte', network='conserved', threshold=threshold_str)
    print('Load Enterocyte graph')
    Ge = nx.read_gpickle(rG_enterocyte_file_gpickle)

    print('Separate Layers')
    HSGe = get_network_layer(Ge, 'HS')
    MMGe = get_network_layer(Ge, 'MM')
    DMGe = get_network_layer(Ge, 'DM')

    dict_data = {
        'HS': {
    celltype = 'spermatocyte'
    threshold = 0.5
    threshold_str = str(threshold).replace('.', 'p')

    network = 'full'
    path_net = '../../04-network/results/network/{celltype:s}/'.format(
        celltype=celltype)
    rG_full_file_gpickle = path_net + 'net-{celltype:s}-{network:s}.gpickle'.format(
        celltype=celltype, network=network)
    print('Load {celltype:s} {network:s} graph'.format(celltype=celltype,
                                                       network=network))
    Gf = nx.read_gpickle(rG_full_file_gpickle)

    print('Separate Layers')
    HSGf = get_network_layer(Gf, 'HS')
    MMGf = get_network_layer(Gf, 'MM')
    DMGf = get_network_layer(Gf, 'DM')

    network = 'conserved'
    path_net = '../../04-network/results/network/{celltype:s}/'.format(
        celltype=celltype)
    rG_con_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(
        celltype=celltype, network=network, threshold=threshold_str)
    print('Load {celltype:s} {network:s} graph'.format(celltype=celltype,
                                                       network=network))
    Gc = nx.read_gpickle(rG_con_file_gpickle)

    print('Separate Layers')
    HSGc = get_network_layer(Gc, 'HS')
    MMGc = get_network_layer(Gc, 'MM')
    celltype = 'enterocyte'
    threshold = 0.5
    threshold_str = str(threshold).replace('.', 'p')

    network = 'thr'
    path_net = '../../04-network/results/network/{celltype:s}/'.format(
        celltype=celltype)
    rG_thr_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(
        celltype=celltype, network=network, threshold=threshold_str)
    print('Load {celltype:s} {network:s} graph'.format(celltype=celltype,
                                                       network=network))
    Gt = nx.read_gpickle(rG_thr_file_gpickle)

    print('Separate Layers')
    HSGt = get_network_layer(Gt, 'HS')
    MMGt = get_network_layer(Gt, 'MM')
    DMGt = get_network_layer(Gt, 'DM')

    network = 'conserved'
    path_net = '../../04-network/results/network/{celltype:s}/'.format(
        celltype=celltype)
    rG_con_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(
        celltype=celltype, network=network, threshold=threshold_str)
    print('Load {celltype:s} {network:s} graph'.format(celltype=celltype,
                                                       network=network))
    Gc = nx.read_gpickle(rG_con_file_gpickle)

    print('Separate Layers')
    HSGc = get_network_layer(Gc, 'HS')
    MMGc = get_network_layer(Gc, 'MM')
        default='spermatocyte',
        type=str,
        choices=['spermatocyte', 'enterocyte'],
        help=
        "Cell type. Must be either 'spermatocyte' or 'enterocyte'. Defaults to spermatocyte"
    )
    args = parser.parse_args()

    celltype = args.celltype  # spermatocyte or enterocyte

    print('Reading Network')
    rGfile_gpickle = 'results/net-{celltype:s}.gpickle'.format(
        celltype=celltype)
    G = nx.read_gpickle(rGfile_gpickle)

    DMG = get_network_layer(G, 'DM')

    core_DM = nx.get_node_attributes(DMG, name='core')
    gene_DM = nx.get_node_attributes(DMG, name='label')
    df_DM_m = pd.DataFrame(data={'gene': gene_DM, 'core': core_DM})
    df_DM_m['core'] = df_DM_m['core'].fillna(False)

    r = []
    Gt = DMG.copy()
    r.append([
        None,
        Gt.number_of_nodes(),
        Gt.number_of_edges(),
        len([
            i for i, d in Gt.nodes(data=True) if d.get('core', False) == True
        ])
    data = {
        'HS': {},
        'MM': {},
        'DM': {}
    }
    print('-- Conserved --')
    for celltype in celltypes:
        print('Loading {celltype:s} {network:s} {threshold:s}'.format(celltype=celltype, network='conserved', threshold=threshold_str))

        path_net = '../../04-network/results/network/{celltype:s}/'.format(celltype=celltype)
        rGc_file_gpickle = path_net + 'net-{celltype:s}-{network:s}-{threshold:s}.gpickle'.format(celltype=celltype, network='conserved', threshold=threshold_str)
        Gc = nx.read_gpickle(rGc_file_gpickle)

        for layer in layers:
            print('Separating layer {layer:s}'.format(layer=layer))
            Gcl = get_network_layer(Gc, layer)
            conserved_genes = set(Gcl.nodes())

            data[layer][celltype] = conserved_genes

    for layer in layers:
        print('Calculating venn {layer:s} conserved'.format(layer=layer))

        ns = data[layer]

        for intersected, unioned, count in venn_count(ns):
            print('|{}{}| = {}'.format(' & '.join(sorted(intersected)), ' - ' + ' - '.join(sorted(unioned)) if unioned else '', count))

    #
    # Non-conserved
    #