def plot_MA(df, core=[], pool=[], file='image.pdf', title="plotMA",
            c_up='#ff9896', c_not='black', c_down='#aec7e8'
            ):
    s = 5
    lw = 0
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(4, 3))

    minLogFC = math.log2(2)
    maxFDR = 0.05

    # Divide data into DGE Blocks
    dfU = df.loc[(df['FDR'] <= maxFDR) & (df['logFC'].abs() >= minLogFC) & (df['logFC'] >= 0), :]
    dfD = df.loc[(df['FDR'] <= maxFDR) & (df['logFC'].abs() >= minLogFC) & (df['logFC'] <= 0), :]
    dfN = df.loc[~df.index.isin(dfU.index.tolist() + dfD.index.tolist()), :]

    # Counts
    n_up, n_down, n_not = len(dfU), len(dfD), len(dfN)

    print("Up  : {rest:d} rest".format(rest=n_up))
    print("Down: {rest:d} rest".format(rest=n_down))
    print("Not : {rest:d} rest".format(rest=n_not))
    # Plot
    ax.scatter(dfU['logCPM'], dfU['logFC'], c=c_up, s=s, lw=lw, marker='o', zorder=3, rasterized=True)
    ax.scatter(dfD['logCPM'], dfD['logFC'], c=c_down, s=s, lw=lw, marker='o', zorder=3, rasterized=True)
    ax.scatter(dfN['logCPM'], dfN['logFC'], c=c_not, s=s / 3, lw=lw, marker='o', zorder=2, rasterized=True)

    # Draw a line at y=(-1,0,1)
    ax.axhline(y=-1, color='b', lw=1, linestyle='--', zorder=5)
    ax.axhline(y=0, color='gray', lw=1, linestyle='--', zorder=5)
    ax.axhline(y=+1, color='b', lw=1, linestyle='--', zorder=5)

    ax.set_xlim(-1, 18)
    ax.set_ylim(-15, 15)

    # Labels
    ax.set_title(title)
    ax.set_ylabel('logFC')
    ax.set_xlabel('Average logCPM')
    # Layout
    #plt.tight_layout()
    plt.subplots_adjust(left=0.17, bottom=0.17, right=0.97, top=0.90)
    # Save

    ensurePathExists(file)
    fig.savefig(file, dpi=300)
def plot(df, species, phase, figsize=(4, 3)):
    fig, ax = plt.subplots(figsize=figsize)
    #
    speciest = {'HS': 'Human', 'MM': 'Mouse', 'DM': 'Insect'}[species]
    color = {'HS': '#2ca02c', 'MM': '#7f7f7f', 'DM': '#ff7f0e'}[species]
    positions = list(range(len(df.columns)))
    #
    ax.plot(positions,
            df.sample(frac=0.3).T,
            color=color,
            lw=1,
            alpha=0.05,
            zorder=5,
            rasterized=False)
    ax.boxplot(df.values,
               positions=positions,
               meanline=True,
               notch=False,
               widths=0.6,
               showfliers=False,
               medianprops={
                   'color': 'black',
                   'lw': 1
               },
               zorder=4)
    #
    ax.set_title('{species:s} meiotic {phase:s}'.format(species=speciest,
                                                        phase=phase))
    ax.set_xticks(positions)
    ax.set_xticklabels(df.columns, rotation=90)
    ax.set_ylabel('Read counts (L2 norm)')
    #ax.set_xlabel('Sample')

    plt.tight_layout()
    #plt.subplots_adjust(left=0.17, bottom=0.17, right=0.97, top=0.90)
    plt.subplots_adjust(bottom=0.32, top=0.90)
    wIMGfile = 'images/phase-reads/img-{species:s}-meiotic-{phase:s}.pdf'.format(
        species=species, phase=phase)
    ensurePathExists(wIMGfile)
    fig.savefig(wIMGfile, dpi=150)
        ]
        id_gene_MM = [
            dict_MM_id_gene_to_id_string[n] for n in id_string_MM
            if n in dict_MM_id_gene_to_id_string
        ]
        id_gene_DM = [
            dict_DM_id_gene_to_id_string[n] for n in id_string_DM
            if n in dict_DM_id_gene_to_id_string
        ]
        # only ids already in graph
        id_gene_HS = [n for n in id_gene_HS if n in set_all_node_ids]
        id_gene_MM = [n for n in id_gene_MM if n in set_all_node_ids]
        id_gene_DM = [n for n in id_gene_DM if n in set_all_node_ids]
        # all pairs for each pairwise product
        all_pairs = chain(product(*[id_gene_HS, id_gene_MM]),
                          product(*[id_gene_HS, id_gene_DM]),
                          product(*[id_gene_MM, id_gene_DM]))
        cross_edges.extend(all_pairs)
    G.add_edges_from(cross_edges, type='cross')

    ##
    # Export
    ##
    print('Exporting')
    wGfile_gpickle = 'results/network/net-{network:s}.gpickle'.format(
        network=network)
    ensurePathExists(wGfile_gpickle)
    nx.write_gpickle(G, wGfile_gpickle)

    print('Done.')
Пример #4
0
    columns = ['id_string_HS', 'id_string_MM', 'id_string_DM']
    for column in columns:
        df[column] = df[column].apply(lambda x: ",".join([str(y) for y in x]))

    columns = [
        'HS_CyteGonia', 'MM_CyteGonia', 'DM_MiddleApical', 'HS_TidCyte',
        'MM_TidCyte', 'DM_BasalMiddle', 'biotype_HS', 'biotype_MM',
        'biotype_DM', 'id_gene_HS', 'id_gene_MM', 'id_gene_DM', 'gene_HS',
        'gene_MM', 'gene_DM'
    ]

    # Export
    print("> Exporting")
    wCSVFile = 'results/pipeline-{pipeline:s}/meta_meiotic_genes.csv'.format(
        pipeline=pipeline)
    ensurePathExists(wCSVFile)
    df.to_csv(wCSVFile)

    # HS
    wCSVFileHS = 'results/pipeline-{pipeline:s}/HS_meiotic_genes.csv'.format(
        pipeline=pipeline)
    ensurePathExists(wCSVFileHS)
    df_HS.to_csv(wCSVFileHS)

    # MM
    wCSVFileMM = 'results/pipeline-{pipeline:s}/MM_meiotic_genes.csv'.format(
        pipeline=pipeline)
    ensurePathExists(wCSVFileMM)
    df_MM.to_csv(wCSVFileMM)

    wCSVFileDM = 'results/pipeline-{pipeline:s}/DM_meiotic_genes.csv'.format(
Пример #5
0
def plot_goea(df, celltype='spermatocyte', species='HS', facecolor='red', ns='BP'):

    df = df.copy()
    # Select
    df = df.loc[(df['NS'] == ns), :]
    # Trim
    df = df.loc[(df['depth'] >= 5), :]

    # All zeros are set to the smallest computable float
    df.loc[df['p_fdr_bh'] == 0.0, 'p_fdr_bh'] = np.nextafter(0, 1)
    #
    df['1-log(p)'] = 1 - (np.log(df['p_fdr_bh']))

    print('Plotting GOEA Bars: {celltype:s} {species} {ns:s}'.format(celltype=celltype, species=species, ns=ns))
    species_str = dict_species[species]
    ns_str = dict_ns[ns]

    df = df.sort_values('1-log(p)', ascending=False)
    #
    dft10 = df.iloc[:10, :].sort_values('1-log(p)', ascending=True)
    sl = 75 # string slice
    dft10['name'] = dft10['name'].apply(lambda x: x[0:sl] + '..' if len(x) > sl else x)

    if len(dft10) == 0:
        print('No significant GOs.')
        return None

    # Plot
    fig, ax = plt.subplots(figsize=(4.7, 3.0))

    # P-values
    title = 'GO enrichment - {species:s} {ns:s}'.format(species=species_str, ns=ns_str)

    ind = np.arange(0, len(dft10), 1)
    bp = ax.barh(ind, 1 - np.log(dft10['p_fdr_bh']), height=0.8, facecolor=facecolor, zorder=4)
    ax.set_title(title, fontsize='large')

    minx, maxx = ax.get_xlim()
    for bar, name in zip(bp.patches, dft10['name'].tolist()):
        bx = bar.get_x()
        by = bar.get_y()
        bh = bar.get_height()
        # bw = bar.get_width()
        tx = bx + (0.01 * maxx)
        ty = (by + (bh / 2))
        ax.text(x=tx, y=ty, s=name, ha='left', va='center', fontsize='x-small', zorder=5)
    #
    ax.axvline(x=(1 - math.log(0.01)), color='#666666', ls='dotted')
    ax.axvline(x=(1 - math.log(0.05)), color='#c7c7c7', ls='dashed')
    ax.set_yticks(ind)
    ax.set_yticklabels(dft10['GO'])
    ax.set_xlabel(r'$1 - $log($p$-value)')
    ax.set_ylim(-0.7, (10 - 0.3))
    ax.grid(axis='x', zorder=1)

    plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89)
    #plt.tight_layout()
    #
    wIMGFile = 'images/goea-bars/img-goea-bars-{celltype:s}-{species:s}-core-genes-{ns:s}.pdf'.format(celltype=celltype, species=species, ns=ns)
    print(wIMGFile)
    ensurePathExists(wIMGFile)
    plt.savefig(wIMGFile, dpi=300, bbox_inches=None, pad_inches=0.0)
    plt.close()
Пример #6
0
            FROM dw_interaction i
            WHERE
                i.age IS NOT NULL
            GROUP BY i.id_patient, i.age
        ) as t
        GROUP BY t.age
    """
    dfi = pd.read_sql(sqli, con=engine, index_col='age')
    # Map age to age_group
    dfi['age_group'] = map_age_to_age_group(dfi.index)
    # Group by age_group
    dfi = dfi.groupby('age_group').agg({'patient-inter': 'sum'})

    # Concat Results
    dfr = pd.concat([dfp, dfc, dfi], axis='columns', sort='False').fillna(0)

    # Relative Risk of CoAdministration (per gender)
    dfr['RRC^{g=F}'] = (dfr['patient-coadmin'] / dfr['patient']) / (
        dfr.loc['Male', 'patient-coadmin'] / dfr.loc['Male', 'patient'])

    # Relative Risk of Interaction (per gender)
    dfr['RRI^{g=F}'] = (dfr['patient-inter'] / dfr['patient']) / (
        dfr.loc['Male', 'patient-inter'] / dfr.loc['Male', 'patient'])

    print(dfr)

    # Export
    wCSVfile = 'results/age.csv'
    ensurePathExists(wCSVfile)
    dfr.to_csv(wCSVfile)
Пример #7
0
                    'matches': []
                }
                for match in s.get_unique_matches():
                    for mid in match.id:
                        mj['matches'].append({
                            'id': mid,
                            'id_parent': dict_id_parent[mid],
                            'token': dict_token[mid],
                            'parent': dict_parent[mid],
                            'type': dict_type[mid]
                        })
                list_post_mentions.append(mj)

        print(
            'nr_posts: {n_posts:d} | nr_matched_posts: {n_posts_with_matches:d}'
            .format(n_posts=n_posts,
                    n_posts_with_matches=n_posts_with_matches))

        if n_posts_with_matches <= 0:
            print('> NO MATCHED POSTS, SKIPPING')
            continue

    # to DataFrame
    dfR = pd.DataFrame(list_post_mentions)

    # Export
    wCSVfile = '../tmp-data/01-instagram-epilepsy-mentions-{dicttimestamp:s}.csv.gz'.format(
        dicttimestamp=dicttimestamp)
    utils.ensurePathExists(wCSVfile)
    dfR.to_csv(wCSVfile)
    ax.set_ylabel('Weight')
    ax.set_xlabel('Edge rank')
    ax.set_xscale('log')

    axin.set_ylabel('Probability', fontsize='small')
    axin.set_xlabel('Weight', fontsize='small')
    axin.set_xticks([0.2, 0.5, 1.0])

    # Legend
    ax.legend(handles=(phs, pmm, pdm),
              labels=('Human', 'Mouse', 'Insect'),
              loc='lower left')

    # Grid
    ax.grid(zorder=1)
    axin.grid(zorder=1)

    plt.subplots_adjust(left=0.12,
                        right=0.97,
                        bottom=0.12,
                        top=0.92,
                        wspace=0,
                        hspace=0)
    img_path = 'images/net-edge-attributes/{celltype:s}/'.format(
        celltype=celltype)
    file = img_path + 'img-net-{celltype:s}-full-edge-{attribute:s}-dist.pdf'.format(
        celltype=celltype, attribute=attribute)
    ensurePathExists(file)
    fig.savefig(file)
    plt.close()
Пример #9
0
        print('Computing Backbone ({layer:s})'.format(layer=layer))

        print('Create empty graph ({layer:s})'.format(layer=layer))
        B = nx.Graph()
        B.add_nodes_from(Gtmp.nodes())
        B.add_edges_from(Gtmp.edges())
        #
        # Compute Backbones
        #
        print('Dijkstra ({layer:s})'.format(layer=layer))
        dict_edges_backbone, dict_edges_s_values = compute_metric_backbone(
            Gtmp)

        # To DataFrame
        dfB = pd.DataFrame({
            'backbone': dict_edges_backbone,
            's_values': dict_edges_s_values
        })

        ##
        # Export
        ##
        print('Exporting ({layer:s})'.format(layer=layer))
        wBfile = 'results/backbone/{celltype:s}/net-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-backbone.csv.gz'.format(
            celltype=celltype,
            network=network,
            threshold=threshold_str,
            layer=layer)
        ensurePathExists(wBfile)
        dfB.to_csv(wBfile)
Пример #10
0
def plot_distance_and_angles(celltype, network, threshold, layer, radius_window, radius_overlap, angle_window, angle_overlap):
    """ Plots Distance and Angles """
    threshold_str = str(threshold).replace('.', 'p')

    print('Plotting Distance & Angles for {celltype:s}-{network:s}-{threshold:s}-{layer:s}'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer))
    rDiAnFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dian.csv.gz'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)
    rEntrFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-entropy.csv.gz'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)
    #
    df_dian = pd.read_csv(rDiAnFile, index_col=0)
    df_ent = pd.read_csv(rEntrFile, index_col=0)
    df_cp = df_ent.loc[df_ent['cut-rank'].notnull(), :].sort_values(['dim', 'cut-rank'])

    #
    cyc = (cycler(color=['#1f77b4', '#ff7f0e', '#2ca02c']) + cycler(linestyle=['solid', 'dashed', 'dotted']))()
    #
    for dim in range(1, 10):
        print('- Dim: {:d}'.format(dim))
        df_ent_tmp = df_ent.loc[df_ent['dim'] == dim].copy()
        df_cp_tmp = df_cp.loc[(df_cp['dim'] == dim), :]
        #
        fig, ax = plt.subplots(figsize=(3.66, 3))
        axt = ax.twinx()
        #
        cx = str(dim) + 'c'
        cy = str(dim + 1) + 'c'
        dist_label = '{cx:s}-{cy:s}-dist'.format(cx=cx, cy=cy)
        angle_label = '{cx:s}-{cy:s}-angle'.format(cx=cx, cy=cy)
        facecolors = '#c7c7c7'
        edgecolors = 'black'
        df_dian = df_dian.sort_values([dist_label, angle_label], ascending=[True, True])
        xs = df_dian[dist_label]
        ys = df_dian[angle_label]
        #
        #
        ax.scatter(xs, ys, c=facecolors, marker='o', edgecolors=edgecolors, lw=0.2, s=10, zorder=4, rasterized=True)
        axt.plot(df_ent_tmp['radius-start'], df_ent_tmp['entropy-norm'], color='#d62728', zorder=6, marker='.', markersize=3, lw=0)
        axt.plot(df_ent_tmp['radius-start'], df_ent_tmp['entropy-smooth'], color='#ff9896', zorder=5)

        # Plot Cut Points
        for idx, cut_rank, radius in df_cp_tmp[['cut-rank', 'radius-start']].to_records():
            props = next(cyc)
            ax.axvline(x=radius, zorder=6, **props)
        #
        ax.set_title('Components {dim1} and {dim2}'.format(dim1=dim, dim2=(dim + 1)))
        ax.set_xlabel(r'radius ($\theta_w = {radius_window:.2f}, \theta_o = {radius_overlap:.2f}$)'.format(radius_window=radius_window, radius_overlap=radius_overlap))
        ax.set_ylabel(r'angle ($\varphi_w = {angle_window:d}, \varphi_o = {angle_overlap:d}$)'.format(angle_window=angle_window, angle_overlap=angle_overlap))
        yticks = [-180, -135, -90, -45, 0, 45, 90, 135, 180]
        #yticklabels = [r'$-\pi(180\degree)$', r'$-\frac{3\pi}{4}(135\degree)$', r'$-\frac{\pi}{2}(90\degree)$', r'$-\frac{\pi}{4}(45\degree)$', r'$0(0)$', r'$\frac{\pi}{4}(45\degree)$', r'$\frac{\pi}{2}(90\degree)$', r'$\frac{3\pi}{4}(135\degree)$', r'$\pi(180\degree)$']
        yticklabels = [r'$-\pi$', r'$-\frac{3\pi}{4}$', r'$-\frac{\pi}{2}$', r'$-\frac{\pi}{4}$', r'$0$', r'$\frac{\pi}{4}$', r'$\frac{\pi}{2}$', r'$\frac{3\pi}{4}$', r'$\pi$']
        ax.set_yticks(yticks)
        ax.set_yticklabels(yticklabels, fontsize='medium')
        #
        axt.set_ylabel('entropy (normed)')
        axt.set_ylim(0, 1)
        ax.grid()
        
        #
        plt.subplots_adjust(left=0.17, right=0.84, bottom=0.17, top=0.89)
        #plt.tight_layout()
        wIMGFile = 'images/pca-entropy/{celltype:s}/{layer:s}/img-entropy-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-C{dimx:d}x{dimy:d}.pdf'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer, dimx=dim, dimy=(dim + 1))
        ensurePathExists(wIMGFile)
        plt.savefig(wIMGFile, dpi=300)
        plt.close()
Пример #11
0
                layer=layer))
    rPCAFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dim.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    wDiAnFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dian.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    wEntrFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-entropy.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    #
    df_pca = pd.read_csv(rPCAFile, index_col=0, encoding='utf-8')
    #
    df_ent, df_dian = compute_entropy(df_pca,
                                      radius_window=radius_window,
                                      radius_overlap=radius_overlap,
                                      angle_window=angle_window,
                                      angle_overlap=angle_overlap,
                                      components=9)
    #
    ensurePathExists(wDiAnFile)
    ensurePathExists(wEntrFile)
    df_ent.to_csv(wEntrFile)
    df_dian.to_csv(wDiAnFile)
Пример #12
0
def plot_goea(celltype='spermatocyte', network='thr', threshold=0.5, layer='DM', modules=[]):

    rCSVFile = 'results/goea/{celltype:s}/goea-{celltype:s}-{network:s}-{threshold:s}-{layer:s}.csv.gz'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)
    df = pd.read_csv(rCSVFile)

    # Trim
    df = df.loc[(df['depth'] >= 5), :]

    # All zeros are set to the smallest computable float
    df.loc[df['p_fdr_bh'] == 0.0, 'p_fdr_bh'] = np.nextafter(0, 1)
    #
    df['1-log(p)'] = 1 - (np.log(df['p_fdr_bh']))

    print('Plotting GOEA Bars: {celltype:s} - {network:s} - {threshold:s} - {layer}'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer))
    specie = dict_specie[layer]

    for module in modules:

        mid = module['id']
        mname = module['name']

        print("Module: M{mid:d}-{mname:s}".format(mid=mid, mname=mname))
        facecolor = module['facecolor']

        dft = df.loc[(df['module-id'] == mid), :].sort_values('1-log(p)', ascending=False)
        #
        dft10 = dft.iloc[:10, :].sort_values('1-log(p)', ascending=True)
        sl = 75 # string slice
        dft10['name'] = dft10['name'].apply(lambda x: x[0:sl] + '..' if len(x) > sl else x)

        if len(dft10) == 0:
            print('No significant GOs.')
            continue

        # Plot
        fig, ax = plt.subplots(figsize=(4.7, 3.0))

        # P-values
        title = 'GOEA-{specie:s} {celltype:s} M{mid:d}-{mname:s}'.format(specie=specie, celltype=celltype, mid=mid, mname=dict_replace[mname])

        ind = np.arange(0, len(dft10), 1)
        bp = ax.barh(ind, 1 - np.log(dft10['p_fdr_bh']), height=0.8, facecolor=facecolor, zorder=4)
        ax.set_title(title, fontsize='large')

        minx, maxx = ax.get_xlim()
        for bar, name in zip(bp.patches, dft10['name'].tolist()):
            bx = bar.get_x()
            by = bar.get_y()
            bh = bar.get_height()
            # bw = bar.get_width()
            tx = bx + (0.01 * maxx)
            ty = (by + (bh / 2))
            ax.text(x=tx, y=ty, s=name, ha='left', va='center', fontsize='x-small', zorder=5)
        #
        ax.axvline(x=(1 - math.log(0.01)), color='#666666', ls='dotted')
        ax.axvline(x=(1 - math.log(0.05)), color='#c7c7c7', ls='dashed')
        ax.set_yticks(ind)
        ax.set_yticklabels(dft10['GO'])
        ax.set_xlabel(r'$1 - $log($p$-value)')
        ax.set_ylim(-0.7, (10 - 0.3))
        ax.grid(axis='x', zorder=1)

        plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89)
        #plt.tight_layout()
        #
        wIMGFile = 'images/goea-bars/{celltype:s}/{layer:s}/img-goea-bars-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-M{mid:d}.pdf'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer, mid=mid)
        ensurePathExists(wIMGFile)
        plt.savefig(wIMGFile, dpi=300, bbox_inches=None, pad_inches=0.0)
        plt.close()
Пример #13
0
def plot_wordcloud(celltype='spermatocyte', network='thr', threshold=0.5, layer='DM', modules=[]):

    celltype_str = celltype.title()
    rCSVFile = 'results/goea/{celltype:s}/goea-{celltype:s}-{network:s}-{threshold:s}-{layer:s}.csv.gz'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer)
    df = pd.read_csv(rCSVFile)

    # Trim
    df = df.loc[(df['depth'] >= 5), :]

    # All zeros are set to the smallest computable float
    df.loc[df['p_fdr_bh'] == 0.0, 'p_fdr_bh'] = np.nextafter(0, 1)
    #
    df['1-log(p)'] = 1 - (np.log(df['p_fdr_bh']))

    specie = dict_specie[layer]
    #
    english_stopwords = stopwords.words('english')
    print('Plotting GOEA Wordcloud: {celltype:s} - {network:s} - {threshold:s} - {layer}'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer))

    for module in modules:

        mid = module['id']
        mname = module['name']
        text_color = module['facecolor']
        #
        print("Module: M{mid:d}-{mname:s}".format(mid=mid, mname=mname))

        # WordCloud
        dft = df.loc[(df['module-id'] == mid), :]
        text = ' '.join(dft['name'].tolist())

        if len(text) == 0:
            print('No significant GOs.')
            continue

        text = text.replace('-', ' ')
        #
        fig, ax = plt.subplots(figsize=(4.0, 3.0))

        def color_func(*args, **kwargs):
            return (0, 0, 0)

        wordcloud = WordCloud(background_color='white', max_font_size=45, width=400, height=300, stopwords=english_stopwords, relative_scaling='auto', colormap='tab10', color_func=color_func, collocation_threshold=20)

        def calc_frequencies(dfA):
            r = []
            for i, dfAt in dfA.iterrows():
                name = dfAt['name']
                pvalue = dfAt['1-log(p)']
                name = name.replace('-', ' ').replace(',', '').replace('.', '').replace("'", '')
                for word in name.split(' '):
                    if word not in english_stopwords:
                        r.append((i, word, pvalue))

            dfr = pd.DataFrame(r, columns=['id', 'name', 'pvalue']).set_index('id')
            dfr['name'] = dfr['name'].replace('proteasomal', 'proteasome')
            #
            dfrg = dfr.groupby('name').agg({'pvalue': ['count', 'sum']})
            dfrg.columns = dfrg.columns.droplevel()
            dfrg['frequency'] = dfrg['count'].rank(method='min') * dfrg['sum'].rank(method='min')
            dfrg.sort_values('frequency', ascending=False, inplace=True)

            return dfrg.reset_index().set_index('name')['frequency'].to_dict()

        frequencies = calc_frequencies(dft)
        wordcloud.generate_from_frequencies(frequencies)
        # wordcloud.generate_from_text(text)

        def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
            if word in data_text_color[mid]:
                return text_color
            else:
                return 'black'
        # Recolor
        wordcloud.recolor(color_func=color_func)

        title = 'GOEA-{specie:s} {celltype:s} M{mid:d}-{mname:s}'.format(specie=specie, celltype=celltype_str, mid=mid, mname=dict_replace[mname])
        ax.set_title(title)
        #
        wp = ax.imshow(wordcloud, interpolation='bilinear')
        #
        ax.set_xticks([])
        ax.set_yticks([])

        plt.subplots_adjust(left=0.03, right=0.97, bottom=0.17, top=0.89)
        #
        wIMGFile = 'images/goea-wordcloud/{celltype:s}/{layer:s}/img-goea-wc-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-mod-{mid:d}.pdf'.format(celltype=celltype, network=network, threshold=threshold_str, layer=layer, mid=mid)
        ensurePathExists(wIMGFile)
        plt.savefig(wIMGFile, dpi=300, bbox_inches=None, pad_inches=0.0)
        plt.close()
 # json
 print('> json')
 jsondata = {
     'directed':
     False,
     'graph': [],
     'nodes': [{
         'id': i,
         **d
     } for i, d in Gtc.nodes(data=True)],
     'edges': [{
         'from': i,
         'to': j,
         **d
     } for i, j, d in Gtc.edges(data=True)]
 }
 wGtcfile_json = 'results/json/net_{network:s}-{level:s}-{layer:s}-SVD-{component:s}.json'.format(
     network=network,
     level=levelstr,
     layer=layer,
     component=component)
 ensurePathExists(wGtcfile_json)
 with open(wGtcfile_json, 'w') as outfile:
     json.dump(jsondata, outfile, indent=4)
 """
 # graphml
 print('> graphml')
 wGtcfile_graphml = 'results/graphml/net_{network:s}-{layer:s}-SVD-{component:s}.graphml'.format(network=network, layer=layerstr, component=component)
 ensurePathExists(wGtcfile_graphml)
 nx.write_graphml(Gtc, wDMGfile_graphml)
 """
Пример #15
0
def plot_indianapolis_map(gdf=None,
                          var=None,
                          vmin=None,
                          vmax=None,
                          cmap=None,
                          title='',
                          legend_label='',
                          legend_format=None,
                          wIMGfile=None):
    # Plot
    fig, ax = plt.subplots(figsize=(6, 6), nrows=1, ncols=1)
    cax = fig.add_axes([0.15, 0.06, 0.70, 0.021])
    ax.set_title(title)

    # Patients
    """
    pp = gzip.plot(ax=ax, column='n-patients', cmap='jet', lw=0,
        legend=True,
        legend_kwds={
            'label':'Patients with at least one drug dispensation',
            'orientation':'horizontal',
            'format':FuncFormatter(lambda x, p: "{x:,.0f}".format(x=x))},
        cax=cax,
        zorder=3)
    """
    # Variable
    pp = gdf.plot(column=var,
                  cmap=cmap,
                  ax=ax,
                  lw=0,
                  legend=True,
                  legend_kwds={
                      'label': legend_label,
                      'orientation': 'horizontal',
                      'format': legend_format,
                  },
                  vmin=vmin,
                  vmax=vmax,
                  cax=cax,
                  zorder=3)
    # ZCTA boundaries
    gdp1.boundary.plot(ax=ax, lw=1, edgecolor='#c7c7c7', zorder=4)
    # Counties
    gcounties.boundary.plot(ax=ax, lw=1.5, color='#d62728', zorder=8)
    # Highways
    ghighways.plot(ax=ax, lw=1, color='#7f7f7f', zorder=7)

    # Names
    def label_geometry(x):
        point = x['geometry'].representative_point()
        if zoom_polygon.contains(point):
            ax.text(x=point.x,
                    y=point.y,
                    s=x.get('ZCTA5CE10', ''),
                    ha='center',
                    fontsize='xx-small',
                    zorder=12)

    gdp1.apply(label_geometry, axis=1)

    def xy_format(x, pos):
        return "{x:.0f}".format(x=(x / 1e4))

    ax.plot(*zoom_polygon.exterior.xy, color='green', lw=1)

    # Axis Label
    y_formatter = FuncFormatter(xy_format)
    x_formatter = FuncFormatter(xy_format)
    ax.xaxis.set_major_formatter(x_formatter)
    ax.yaxis.set_major_formatter(y_formatter)

    # Zoom in Marion County
    # minx, miny, maxx, maxy = gdp1.total_bounds
    minx, miny, maxx, maxy = zoom_polygon.bounds
    ax.set_xlim(minx, maxx)
    ax.set_ylim(miny, maxy)

    # Save
    plt.subplots_adjust(left=0.10,
                        right=0.95,
                        bottom=0.14,
                        top=0.93,
                        wspace=0.0,
                        hspace=0.0)
    ensurePathExists(wIMGfile)
    fig.savefig(wIMGfile)
    plt.close()
Пример #16
0
            # Generate dfX
            dfNet = pd.DataFrame.from_dict(dict(Gtc.nodes(data=True)),
                                           orient='index')

            # Merge DataFrames
            dfX = pd.concat([dfNet, dfSVD], axis='columns')

            # Calculate y
            """
            def calc_y(r):
                if r.get('mean-fert-rate', 1.0) < 0.7:
                    return True
                elif not pd.isnull(r.get('known-DM-phenotype', None)):
                    return True
                elif not pd.isnull(r.get('new-DM-phenotype', None)):
                    return True
                else:
                    return False
            dfX['y'] = dfX.apply(calc_y, axis='columns')
            """

            ##
            # Export
            ##
            print('Saving results to .CSV')
            wMLFile = 'results/ml/{celltype:s}/{layer:s}/ml-{celltype:s}-{layer:s}-mod-{mid:d}.csv.gz'.format(
                celltype=celltype, layer=layer, mid=mid)
            ensurePathExists(wMLFile)
            dfX.to_csv(wMLFile)
Пример #17
0
        #
        columns = ['{:d}c'.format(i) for i in range(1, components + 1)]
        df_pca = pd.DataFrame(res[:, 0:components],
                              columns=columns,
                              index=dfG.index)
        df_pca = pd.concat([dfG, df_pca], axis='columns')
        #
        s_pca_var = pd.Series(pca.explained_variance_ratio_,
                              index=range(1, (res.shape[1] + 1)),
                              name='explained_variance_ratio')

        print('Saving results to .CSV')
        wPCAFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dim.csv.gz'.format(
            celltype=celltype,
            network=network,
            threshold=threshold_str,
            layer=layer)
        wSFile = 'results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-s.csv.gz'.format(
            celltype=celltype,
            network=network,
            threshold=threshold_str,
            layer=layer)
        #
        ensurePathExists(wPCAFile)
        ensurePathExists(wSFile)
        #
        df_pca.to_csv(wPCAFile)
        s_pca_var.to_csv(wSFile, header=True)

    print('Done.')
Пример #18
0
            nx.set_node_attributes(Gt, values=dict_conserved, name='conserved')

        if add_core:
            rCOREFile = '../../02-core_genes/results/pipeline-core/{layer:s}_meiotic_genes.csv'.format(
                layer=layer)
            dfC = pd.read_csv(rCOREFile, index_col=0)
            dict_core = {gene: True for gene in dfC.index.tolist()}
            #
            nx.set_node_attributes(Gt, values=dict_core, name='core')

        # Remove Isolates
        if remove_isolates:
            isolates = list(nx.isolates(Gt))
            print('Removing {n:d} isolated nodes'.format(n=len(isolates)))
            Gt.remove_nodes_from(isolates)

        # Largest Connected Component
        if only_largest_component:
            Gt = get_network_largest_connected_component(Gt)

        # graphml
        print('Export to graphml')
        if network == 'thr':
            wGtfile_graphml = '../gephi-plotting/results/graphml/{celltype:s}/net-{celltype:s}-{network:s}-{threshold:s}-{layer:s}.graphml'.format(
                celltype=celltype,
                network=network,
                threshold=threshold_str,
                layer=layer)
        ensurePathExists(wGtfile_graphml)
        nx.write_graphml(Gt, wGtfile_graphml)
Пример #19
0
    df_DM = pd.read_csv(
        'results/{pipeline:s}/DM_meiotic_genes.csv'.format(pipeline=pipeline),
        index_col='id_string',
        usecols=['id_gene', 'id_string', 'gene'])

    def map_multiple_ids(x, d):
        x = x.split(',')
        return ','.join([d[i] for i in x])

    df['id_gene_HS'] = df['id_string_HS'].apply(
        map_multiple_ids, args=(df_HS['id_gene'].to_dict(), ))
    df['id_gene_MM'] = df['id_string_MM'].apply(
        map_multiple_ids, args=(df_MM['id_gene'].to_dict(), ))
    df['id_gene_DM'] = df['id_string_DM'].apply(
        map_multiple_ids, args=(df_DM['id_gene'].to_dict(), ))

    df['gene_HS'] = df['id_string_HS'].apply(map_multiple_ids,
                                             args=(df_HS['gene'].to_dict(), ))
    df['gene_MM'] = df['id_string_MM'].apply(map_multiple_ids,
                                             args=(df_MM['gene'].to_dict(), ))
    df['gene_DM'] = df['id_string_DM'].apply(map_multiple_ids,
                                             args=(df_DM['gene'].to_dict(), ))

    print("> Exporting")
    wCSVFile = 'results/{pipeline:s}/meta_meiotic_genes_4Paulo.csv'.format(
        pipeline=pipeline)
    ensurePathExists(wCSVFile)
    df.to_csv(wCSVFile)

    print('done.')
Пример #20
0
    ])
    # first9 = first9 + others
    rects_first9 = ax.bar(ind,
                          first9,
                          width,
                          bottom=others,
                          color='#636363',
                          edgecolor='#969696',
                          lw=1,
                          zorder=9)
    rects_others = ax.bar(ind,
                          others,
                          width,
                          bottom=0,
                          color='#bdbdbd',
                          edgecolor='#d9d9d9',
                          lw=1,
                          zorder=9)

    bar_labels(ax=ax, rects=rects_first9)

    ax.set_xticks(ind)
    ax.set_xticklabels(['Human', 'Mouse', 'Insect'], fontsize='small')
    ax.set_ylabel('Variance')
    ax.set_xlim(-0.4, 2.7)

    plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89)
    ensurePathExists(wIMGFile)
    plt.savefig(wIMGFile, dpi=150, bbox_inches=None, pad_inches=0.0)
    plt.close()
def plot_pca(celltype='spermatocyte',
             network='thr',
             threshold=0.5,
             layer='DM',
             modules=[]):
    """ Plot PCA """
    threshold_str = str(threshold).replace('.', 'p')
    #
    print('Plotting PCA for {celltype:s}-{network:s}-{threshold:s}-{layer:s}'.
          format(celltype=celltype,
                 network=network,
                 threshold=threshold_str,
                 layer=layer))
    rPCAFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dim.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    rDiAnFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dian.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    rEntFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-entropy.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    rSFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-s.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)

    df_pca = pd.read_csv(rPCAFile, index_col=0)
    df_dian = pd.read_csv(rDiAnFile, index_col=0)
    df_ent = pd.read_csv(rEntFile, index_col=0)
    s = pd.read_csv(rSFile,
                    squeeze=True,
                    index_col=0,
                    header=0,
                    encoding='utf-8')

    df_cp = df_ent.loc[df_ent['cut-rank'].notnull(), :].sort_values(
        ['dim', 'cut-rank'])
    #
    cyc = (cycler(edgecolor=['#1f77b4', '#ff7f0e', '#2ca02c']) +
           cycler(linestyle=['solid', 'dashed', 'dotted']))()

    # Plot Variance
    s_cumsum = s.cumsum()
    n_eigen_95 = s_cumsum[(s_cumsum < 0.95)].shape[0]

    n = 9
    ind = np.arange(n)
    height = s.iloc[:n].values
    width = 0.60
    xticklabels = (ind + 1)

    fig, ax = plt.subplots(figsize=(3, 3))
    ax.bar(ind,
           height,
           width,
           color='#636363',
           edgecolor='#969696',
           zorder=9,
           lw=1)
    ax.set_xticks(ind)
    ax.set_xticklabels(xticklabels)

    species_name = {'HS': 'Human', 'MM': 'Mouse', 'DM': 'Insect'}
    title = '{species:s}'.format(species=species_name[layer])
    ax.set_title(title)

    ax.annotate('95% with {:,d}\nsingular vectors'.format(n_eigen_95),
                xy=(0.97, 0.97),
                xycoords="axes fraction",
                ha='right',
                va='top',
                fontsize='small')
    ax.set_xlabel('Components')
    ax.set_ylabel('Variance')
    ax.grid(axis='y')
    plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89)
    #plt.tight_layout()
    wIMGFile = 'images/pca-entropy/{celltype:s}/{layer:s}/img-pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-variance.pdf'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    ensurePathExists(wIMGFile)
    plt.savefig(wIMGFile, dpi=150, bbox_inches=None, pad_inches=0.0)
    plt.close()

    # Plot Projections
    for dim in range(1, 9):
        print('- Dim: {dim:d}'.format(dim=dim))
        # col = str(dim) + 'c'
        x = str(dim) + 'c'
        y = str(dim + 1) + 'c'
        xs = df_pca[x].tolist()
        ys = df_pca[y].tolist()
        #
        facecolors = '#c7c7c7'
        edgecolors = 'black'  # '#c7c7c7'

        fig, ax = plt.subplots(figsize=(3, 3))
        ax.scatter(xs,
                   ys,
                   c=facecolors,
                   marker='o',
                   edgecolor=edgecolors,
                   lw=0.2,
                   s=10,
                   zorder=5,
                   rasterized=True)

        # Draw a X at the center
        ax.plot(0, 0, color='#2ca02c', marker='x', ms=16)

        # Draw lines at the center
        ax.axhline(y=0, c='black', lw=0.75, ls='-.', zorder=2)
        ax.axvline(x=0, c='black', lw=0.75, ls='-.', zorder=2)

        ax.set_title('Components {dim1} and {dim2}'.format(dim1=dim,
                                                           dim2=(dim + 1)))
        ax.set_xlabel('Component {dim1:d}'.format(dim1=dim))
        ax.set_ylabel('Component {dim2:d}'.format(dim2=dim + 1))

        ax.grid()

        xlimmin, xlimmax = ax.get_xlim()
        ylimmin, ylimmax = ax.get_ylim()
        ylimdiff = abs(ylimmax) + abs(ylimmin)
        yperc = 0.035 * ylimdiff
        yspac = 0.8

        # Radius Circles
        df_cp_tmp = df_cp.loc[(df_cp['dim'] == dim), :]
        sg_circles = {}
        for idx, cut_rank, radius in df_cp_tmp[['cut-rank',
                                                'radius-start']].to_records():
            # Shapely Circle
            sg_circle = sg.Point(0, 0).buffer(radius)
            sg_circles[cut_rank] = sg_circle
            # Mpl Circle
            props = next(cyc)
            mpl_circle = mp.Circle((0, 0),
                                   radius=radius,
                                   facecolor='none',
                                   zorder=6,
                                   **props)
            ax.add_patch(mpl_circle)

        # Draw Component
        for module in modules:
            #
            xc = module['dim-coords']['xdim']
            yc = module['dim-coords']['ydim']
            if (dim == xc) and ((dim + 1) == yc):
                #
                mid = module['id']
                mname = module['name']

                # only rename a DM-M12
                if mname in dict_replace.keys():
                    mname = dict_replace.get(mname)
                #
                cx = '{xc:d}c'.format(xc=xc)  # label 1 component
                cy = '{yc:d}c'.format(yc=yc)  # label 2 component
                cxy = '{xc:d}c-{yc:d}c-dist'.format(xc=xc,
                                                    yc=yc)  # label-1c-2c-dist
                #
                facecolor = module.get('facecolor', 'black')
                edgecolor = module.get('edgecolor', 'none')
                hatch = module.get('hatch', None)
                cxl, cxh = module['dim-coords']['xvals']
                cyl, cyh = module['dim-coords']['yvals']
                cut_rank = module['dim-coords']['radius-rank']
                sg_circle = sg_circles[cut_rank]
                # Radius of the circle
                cut_radius = df_ent.loc[((df_ent['dim'] == xc) &
                                         (df_ent['cut-rank'] == cut_rank)),
                                        'radius-start'].squeeze()

                # Select points in module
                df_pca_tmp = df_pca.loc[((df_pca[cx] >= cxl) &
                                         (df_pca[cx] <= cxh) &
                                         (df_pca[cy] >= cyl) &
                                         (df_pca[cy] <= cyh) &
                                         (df_dian[cxy] >= cut_radius)),
                                        ['gene', cx, cy]].copy()
                n = df_pca_tmp.shape[0]

                name = "M{mid:d}-{mname:s} (n={n:,d})".format(mid=mid,
                                                              mname=mname,
                                                              n=n)
                # name loc
                name_loc = module.get('name-loc', 'upper left')
                name_loc_upper_lower, name_loc_left_right = name_loc.split(' ')
                if name_loc_upper_lower == 'upper':
                    ytext = cyh + yperc
                    # add some space to y-lim-top
                    if abs(ylimmax - ytext) < 0.5:
                        ax.set_ylim((ylimmin - yspac, ylimmax + yspac))
                elif name_loc_upper_lower == 'lower':
                    ytext = cyl - yperc
                    # add some space to y-lim-bottom
                    if abs(ylimmin - ytext) < 0.5:
                        ax.set_ylim((ylimmin - yspac, ylimmax + yspac))
                if name_loc_left_right == 'left':
                    xtext = cxl
                    ha = 'left'
                elif name_loc_left_right == 'right':
                    xtext = cxh
                    ha = 'right'

                sg_box = sg.box(cxl, cyl, cxh, cyh)
                sg_diff = sg_box.difference(sg_circle)

                # ax.add_patch(descartes.PolygonPatch(sg_box, fc='b', ec='k', alpha=0.4))
                ax.add_patch(
                    descartes.PolygonPatch(sg_diff,
                                           facecolor=facecolor,
                                           edgecolor=edgecolor,
                                           lw=1,
                                           zorder=2,
                                           alpha=1,
                                           hatch=hatch))

                # ax.fill([x0, x0, x1, x1], [y0, y1, y1, y0], facecolor=facecolor, edgecolor=edgecolor, lw=1, zorder=2, alpha=1, hatch=hatch)
                ax.annotate(text=name,
                            xy=(xtext, ytext),
                            fontsize='x-small',
                            zorder=8,
                            fontweight='bold',
                            ha=ha,
                            va='center')
            else:
                continue

        plt.subplots_adjust(left=0.21, right=0.97, bottom=0.17, top=0.89)
        #plt.tight_layout()
        wIMGFile = 'images/pca-entropy/{celltype:s}/{layer:s}/img-pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-C{dimx:d}x{dimy:d}.pdf'.format(
            celltype=celltype,
            network=network,
            threshold=threshold_str,
            layer=layer,
            dimx=dim,
            dimy=(dim + 1))
        ensurePathExists(wIMGFile)
        plt.savefig(wIMGFile, dpi=300, bbox_inches=None, pad_inches=0.0)
        plt.close()
Пример #22
0
                'tau': row['tau-norm'],
                'tau_scaler': row['scaler(tau-norm)'],
                #
                'color': edge_color_hex,
                'gender': gender
            })

    G_patient = G.copy()
    G_tau = G.copy()

    #
    # Set weight
    #
    nx.set_edge_attributes(G_patient,
                           nx.get_edge_attributes(G, 'patient_scaler'),
                           'weight')
    nx.set_edge_attributes(G_tau, nx.get_edge_attributes(G, 'tau_scaler'),
                           'weight')

    #
    # Export
    #
    wGtauFile = 'results/ddi_network_tau.gpickle'
    ensurePathExists(wGtauFile)
    nx.write_gpickle(G_tau, wGtauFile)
    nx.write_graphml(G_tau, 'results/ddi_network_tau.graphml')
    #
    wGpatientFile = 'results/ddi_network_patient.gpickle'
    ensurePathExists(wGpatientFile)
    nx.write_gpickle(G_patient, wGpatientFile)
Пример #23
0
    gdp1.boundary.plot(ax=ax, lw=0.5, edgecolor='#c7c7c7', zorder=4)
    # Counties
    gcounties.boundary.plot(ax=ax, lw=0.75, color='#d62728', zorder=8)
    # Highways
    ghighways.plot(ax=ax, lw=0.8, color='#7f7f7f', zorder=7)

    def xy_format(x, pos):
        return "{x:.0f}".format(x=(x / 1e4))
    
    # Axis Label
    y_formatter = FuncFormatter(xy_format)
    x_formatter = FuncFormatter(xy_format)
    ax.xaxis.set_major_formatter(x_formatter)
    ax.yaxis.set_major_formatter(y_formatter)

    # Plot Zoom Polygon
    ax.plot(*zoom_polygon.exterior.xy, color='green', lw=1.5, zorder=10)

    # Zoom in Indiana
    minx, miny, maxx, maxy = gdp1.total_bounds
    #minx, miny, maxx, maxy = zoom_polygon.bounds
    ax.set_xlim(minx, maxx)
    ax.set_ylim(miny, maxy)

    # Save
    plt.subplots_adjust(left=0.04, right=0.98, bottom=0.07, top=0.95, wspace=0.0, hspace=0.0)
    wIMGfile = 'images/img-indianapolis.pdf'
    ensurePathExists(wIMGfile)
    fig.savefig(wIMGfile)
    plt.close()
Пример #24
0
def export_genes(celltype='spermatocyte',
                 network='thr',
                 threshold=0.5,
                 layer='DM',
                 modules=[]):
    """ Export Genes """
    threshold_str = str(threshold).replace('.', 'p')
    #
    print(
        'Exporting genes. PCA modules of {celltype:s}-{network:s}-{threshold:s}-{layer:s}'
        .format(celltype=celltype,
                network=network,
                threshold=threshold_str,
                layer=layer))
    rPCAFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dim.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    rDiAnFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-dian.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    rEntFile = '../../04-network/results/pca/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-entropy.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)
    #
    wCSVFile = 'results/pca-entropy/{celltype:s}/{layer:s}/pca-{celltype:s}-{network:s}-{threshold:s}-{layer:s}-modules.csv.gz'.format(
        celltype=celltype,
        network=network,
        threshold=threshold_str,
        layer=layer)

    df_pca = pd.read_csv(rPCAFile, index_col=0)
    df_dian = pd.read_csv(rDiAnFile, index_col=0)
    df_ent = pd.read_csv(rEntFile, index_col=0)
    # df_cp = df_ent.loc[df_ent['cut-rank'].notnull(), :].sort_values(['dim', 'cut-rank'])

    ldfS = []
    for module in modules:

        mid = module['id']
        mname = module['name']
        print("Computing Module {mid:d}-{mname:s}".format(mid=mid,
                                                          mname=mname))
        #
        xc = module['dim-coords']['xdim']
        yc = module['dim-coords']['ydim']
        ld1 = '{xc:d}c'.format(xc=xc)  # label 1 component
        ld2 = '{yc:d}c'.format(yc=yc)  # label 2 component
        l12d = '{xc:d}c-{yc:d}c-dist'.format(xc=xc, yc=yc)  # label-1c-2c-dist

        x0, x1 = module['dim-coords']['xvals']
        y0, y1 = module['dim-coords']['yvals']
        cut_rank = module['dim-coords']['radius-rank']

        # Radius of the circle
        cut_radius = df_ent.loc[((df_ent['dim'] == xc) &
                                 (df_ent['cut-rank'] == cut_rank)),
                                'radius-start'].squeeze()

        # Select genes in module
        df_pca_tmp = df_pca.loc[((df_pca[ld1] > x0) & (df_pca[ld1] < x1) &
                                 (df_pca[ld2] > y0) & (df_pca[ld2] < y1) &
                                 (df_dian[l12d] > cut_radius)), :].copy()
        #
        df_pca_tmp['module-id'] = mid
        df_pca_tmp['module-name'] = mname

        ldfS.append(df_pca_tmp)

    dfR = pd.concat(ldfS, axis=0)
    # Export
    print("Exporting")
    ensurePathExists(wCSVFile)
    dfR.to_csv(wCSVFile)