Exemplo n.º 1
0
def locality(args):
    log = coblog()
    log("\n"
        "-----------------------\n"
        "   Network Locality    \n"
        "-----------------------\n")
    # Generate output dirs
    if args.out != sys.stdout:
        args.out = "{}_Locality.tsv".format(args.out.replace(".tsv", ""))
    if os.path.dirname(args.out) != "":
        os.makedirs(os.path.dirname(args.out), exist_ok=True)
    if os.path.exists("{}_Locality.tsv".format(args.out.replace(".tsv", ""))):
        log("{}_Locality.csv exists! Skipping!".format(
            args.out.replace(".tsv", "")))
        return None
    # Grab the COB object
    cob = co.COB(args.cob)
    gwas = co.GWAS(args.gwas)
    # If there is a different score for 'significant', update the COB object
    if args.sig_edge_zscore is not None:
        cob.set_sig_edge_zscore(args.sig_edge_zscore)
    # If all, grab a generater
    if "all" in args.terms:
        terms = gwas.iter_terms()
    else:
        # Otherwise get the term out of the GWAS
        terms = (gwas[x] for x in args.terms)

    # Add in text for axes
    locality = pd.DataFrame([generate_data(cob, x, args) for x in terms])
    locality.to_csv(args.out, sep="\t", index=None)
Exemplo n.º 2
0
def locality(args):
    log = coblog()
    log('\n'
        '-----------------------\n'
        '   Network Locality    \n'
        '-----------------------\n'
    )
    # Generate output dirs
    if args.out != sys.stdout:
        args.out = "{}_Locality.tsv".format(args.out.replace('.tsv',''))        
    if os.path.dirname(args.out) != '':
        os.makedirs(os.path.dirname(args.out),exist_ok=True)
    if os.path.exists("{}_Locality.tsv".format(args.out.replace('.tsv',''))):
        log(
            "{}_Locality.csv exists! Skipping!".format(
                args.out.replace('.tsv','')
            )
        )
        return None
    # Grab the COB object
    cob = co.COB(args.cob)
    gwas = co.GWAS(args.gwas)
    # If there is a different score for 'significant', update the COB object
    if args.sig_edge_zscore is not None:
        cob.set_sig_edge_zscore(args.sig_edge_zscore)
    # If all, grab a generater
    if 'all' in args.terms:
        terms = gwas.iter_terms()
    else:
        # Otherwise get the term out of the GWAS
        terms = ( gwas[x] for x in args.terms ) 

    # Add in text for axes
    locality = pd.DataFrame([
        generate_data(cob,x,args) for x in terms
    ])
    locality.to_csv(args.out, sep='\t',index=None)
Exemplo n.º 3
0
def cob_health(args):
    log = coblog()
    log('\n'
        '-----------------------\n'
        '   Network Health      \n'
        '-----------------------\n'
    )
    cob = co.COB(args.cob)
    if args.out is None:
        args.out = '{}_Health'.format(cob.name)

    log('Plotting Scores ----------------------------------------------------') 
    if not path.exists('{}_CoexPCC_raw.png'.format(args.out)):
        cob.plot_scores(
            '{}_CoexPCC_raw.png'.format(args.out),
            pcc=True
        )
    else:
        log('Skipped Raw.')

    if not path.exists('{}_CoexScore_zscore.png'.format(args.out)):
        cob.plot_scores(
            '{}_CoexScore_zscore.png'.format(args.out),
            pcc=False
        )
    else:
        log('Skipped Norm.')

    log('Plotting Expression ------------------------------------------------')
    if not path.exists('{}_Expr_raw.png'.format(args.out)):
        cob.plot(
            '{}_Expr_raw.png'.format(args.out),
            raw=True,
            cluster_method=None
        )
    else:
        log('Skipped raw.')
    if not path.exists('{}_Expr_norm.png'.format(args.out)):
        cob.plot(
            '{}_Expr_norm.png'.format(args.out),
            raw=False
        )
    else:
        log('Skipped norm.')
    log('Plotting Cluster Expression-----------------------------------------')
    if not path.exists('{}_Expr_cluster.png'.format(args.out)):
        cob.plot(
            '{}_Expr_cluster.png'.format(args.out),
            raw=False,
            avg_by_cluster=True
        )
    else:
        log('Skipped norm.')
    log('Printing Summary ---------------------------------------------------')
    if not path.exists('{}.summary.txt'.format(args.out)):
        with open('{}.summary.txt'.format(args.out),'w') as OUT:
            # Print out the network summary
            cob.summary(file=OUT)
    else:
        log('Skipped summary.')

    log('Printing QC Statistics ---------------------------------------------')
    if args.refgen is not None:
        if not path.exists('{}_qc_gene.txt'.format(args.out)):
            # Print out the breakdown of QC Values
            refgen = co.RefGen(args.refgen)
            gene_qc = cob._ft('qc_gene')
            gene_qc = gene_qc[gene_qc.pass_membership]
            gene_qc['chrom'] = ['chr'+str(refgen[x].chrom) for x in gene_qc.index]
            gene_qc = gene_qc.groupby('chrom').agg(sum,axis=0)
            # Add totals at the bottom
            totals = gene_qc.ix[:,slice(1,None)].apply(sum)
            totals.name = 'TOTAL'
            gene_qc = gene_qc.append(totals)
            gene_qc.to_csv('{}_qc_gene.txt'.format(args.out),sep='\t')
        else:
            log('Skipped QC summary.')

    #if not path.exists('{}_CisTrans.png'.format(args.out)):
        # Get trans edges

    log('Plotting Degree Distribution ---------------------------------------')
    if not path.exists('{}_DegreeDist.png'.format(args.out)):
        degree = cob.degree['Degree'].values
        fit = powerlaw.Fit(degree,discrete=True,xmin=1)
        # get an axis
        ax = plt.subplot()
        # Calculate log ratios
        t2p = fit.distribution_compare('truncated_power_law', 'power_law')
        t2e = fit.distribution_compare('truncated_power_law', 'exponential')
        p2e = fit.distribution_compare('power_law','exponential')
        # Plot!
        emp = fit.plot_ccdf(ax=ax,color='r',linewidth=3, label='Empirical Data')
        pwr = fit.power_law.plot_ccdf(ax=ax, color='b', linestyle='--', label='Power law')
        tpw = fit.truncated_power_law.plot_ccdf(ax=ax, color='k', linestyle='--', label='Truncated Power')
        exp = fit.exponential.plot_ccdf(ax=ax, color='g', linestyle='--', label='Exponential')
        ####
        ax.set_ylabel("p(Degree≥x)")
        ax.set_xlabel("Degree Frequency")
        ax.legend(
            loc='best'
        )
        plt.title('{} Degree Distribution'.format(cob.name))
        # Save Fig
        try:
            plt.savefig('{}_DegreeDist.png'.format(args.out))
        except FutureWarning as e:
            # This is a matplotlib bug
            pass
    else:
        log('Skipping Degree Dist.')

    log('Plotting GO --------------------------------------------------------')
    if args.go is not None:
        if not path.exists('{}_GO.csv'.format(args.out)):
            go = co.GOnt(args.go)
            term_ids = []
            density_emp = []
            density_pvals  = []
            locality_emp = []
            locality_pvals = []
            term_sizes = []
            terms_tested = 0
            for term in go.iter_terms():
                term.loci = list(filter(lambda x: x in cob, term.loci))
                if len(term) < args.min_term_size or len(term) > args.max_term_size:
                    continue
                term_ids.append(term.id)
                term_sizes.append(len(term))
                # ------ Density 
                density = cob.density(term.loci)
                density_emp.append(density)
                # Calculate PVals
                density_bs = np.array([
                    cob.density(cob.refgen.random_genes(n=len(term.loci))) \
                    for x in range(args.num_bootstraps)
                ])
                if density > 0:
                    pval = sum(density_bs >= density)/args.num_bootstraps
                else:
                    pval = sum(density_bs <= density)/args.num_bootstraps
                density_pvals.append(pval)

                # ------- Locality
                locality = cob.locality(
                    term.loci,include_regression=True
                ).resid.mean()
                locality_emp.append(locality)
                # Calculate PVals
                locality_bs = np.array([
                    cob.locality(
                        cob.refgen.random_genes(n=len(term.loci)),
                        include_regression=True
                    ).resid.mean() \
                    for x in range(args.num_bootstraps)
                ])
                if locality > 0:
                    pval = sum(locality_bs >= locality)/args.num_bootstraps
                else:
                    pval = sum(locality_bs <= locality)/args.num_bootstraps
                locality_pvals.append(pval)
                # -------------
                terms_tested += 1
                if terms_tested % 100 == 0 and terms_tested > 0:
                    log('Processed {} terms'.format(terms_tested)) 
            go_enrichment = pd.DataFrame({
                'id' : term_ids,
                'size' : term_sizes,
                'density' : density_emp,
                'density_pval' : density_pvals,
                'locality' : locality_emp,
                'locality_pval' : locality_pvals
            })
            go_enrichment\
                .sort_values(by='density_pval',ascending=True)\
                .to_csv('{}_GO.csv'.format(args.out),index=False)
            if terms_tested == 0:
                log.warn('No GO terms met your min/max gene criteria!')
        else:
            go_enrichment = pd.read_table('{}_GO.csv'.format(args.out),sep=',')

        if not path.exists('{}_GO.png'.format(args.out)):
            # Convert pvals to log10
            go_enrichment['density_pval'] = -1*np.log10(go_enrichment['density_pval'])
            go_enrichment['locality_pval'] = -1*np.log10(go_enrichment['locality_pval'])
            plt.clf()
            figure,axes = plt.subplots(3,2,figsize=(12,12))
            # -----------
            # Density
            # ----------
            axes[0,0].scatter(
                go_enrichment['density'],
                go_enrichment['density_pval'],
                alpha=0.05
            )
            axes[0,0].set_xlabel('Empirical Density (Z-Score)')
            axes[0,0].set_ylabel('Bootstraped -log10(p-value)')
            fold = sum(np.array(go_enrichment['density_pval'])>1.3)/(0.05 * len(go_enrichment))
            axes[0,0].axhline(y=-1*np.log10(0.05),color='red')
            axes[0,0].text(
                0, -0.2,
                '{:.3g} Fold Enrichement'.format(fold),
            )
            axes[1,0].scatter(
                go_enrichment['size'],
                go_enrichment['density_pval'],
                alpha=0.05
            )
            axes[1,0].set_ylabel('Bootstrapped -log10(p-value)')
            axes[1,0].set_xlabel('Term Size')
            axes[1,0].axhline(y=-1*np.log10(0.05),color='red')
            axes[2,0].scatter(
                go_enrichment['size'],
                go_enrichment['density'],
                alpha=0.05
            )
            axes[2,0].scatter(
                go_enrichment.query('density_pval>1.3')['size'],
                go_enrichment.query('density_pval>1.3')['density'],
                alpha=0.05,
                color='r'
            )
            axes[2,0].set_ylabel('Density')
            axes[2,0].set_xlabel('Term Size')
            # ------------
            # Do Locality
            # ------------
            axes[0,1].scatter(
                go_enrichment['locality'],
                go_enrichment['locality_pval'],
                alpha=0.05
            )
            axes[0,1].set_xlabel('Empirical Locality (Residual)')
            axes[0,1].set_ylabel('Bootstraped -log10(p-value)')
            fold = sum(np.array(go_enrichment['locality_pval'])>1.3)/(0.05 * len(go_enrichment))
            axes[0,1].axhline(y=-1*np.log10(0.05),color='red')
            axes[0,1].text(
                0, -0.2,
                '{:.3g} Fold Enrichement'.format(fold),
            )
            axes[1,1].scatter(
                go_enrichment['size'],
                go_enrichment['locality_pval'],
                alpha=0.05
            )
            axes[1,1].set_xlabel('Term Size')
            axes[1,1].set_ylabel('Bootstrapped -log10(p-value)')
            axes[1,1].axhline(y=-1*np.log10(0.05),color='red')
            axes[2,1].scatter(
                go_enrichment['size'],
                go_enrichment['locality'],
                alpha=0.05
            )
            axes[2,1].scatter(
                go_enrichment.query('locality_pval>1.3')['size'],
                go_enrichment.query('locality_pval>1.3')['locality'],
                alpha=0.05,
                color='r'
            )
            axes[2,1].set_ylabel('Density')
            axes[2,1].set_xlabel('Term Size')
            # Save Figure
            plt.tight_layout()
            try:
                plt.savefig('{}_GO.png'.format(args.out))
            except FutureWarning as e:
                pass
        else:
            log('Skipping GO Volcano.')
Exemplo n.º 4
0
def cob_health(args):
    log = coblog()
    log('\n'
        '-----------------------\n'
        '   Network Health      \n'
        '-----------------------\n')
    cob = co.COB(args.cob)
    if args.out is None:
        args.out = '{}_Health'.format(cob.name)

    log('Plotting Scores ----------------------------------------------------')
    if not path.exists('{}_CoexPCC_raw.png'.format(args.out)):
        cob.plot_scores('{}_CoexPCC_raw.png'.format(args.out), pcc=True)
    else:
        log('Skipped Raw.')

    if not path.exists('{}_CoexScore_zscore.png'.format(args.out)):
        cob.plot_scores('{}_CoexScore_zscore.png'.format(args.out), pcc=False)
    else:
        log('Skipped Norm.')

    log('Plotting Expression ------------------------------------------------')
    if not path.exists('{}_Expr_raw.png'.format(args.out)):
        cob.plot('{}_Expr_raw.png'.format(args.out),
                 include_accession_labels=True,
                 raw=True,
                 cluster_method=None)
    else:
        log('Skipped raw.')
    if not path.exists('{}_Expr_norm.png'.format(args.out)):
        cob.plot('{}_Expr_norm.png'.format(args.out),
                 include_accession_labels=True,
                 raw=False,
                 cluster_method='leaf',
                 cluster_accessions=True)
    else:
        log('Skipped norm.')
    log('Plotting Cluster Expression-----------------------------------------')
    if not path.exists('{}_Expr_cluster.png'.format(args.out)):
        cob.plot('{}_Expr_cluster.png'.format(args.out),
                 include_accession_labels=True,
                 raw=False,
                 cluster_accessions=True,
                 avg_by_cluster=True)
    else:
        log('Skipped norm.')
    log('Printing Summary ---------------------------------------------------')
    if not path.exists('{}.summary.txt'.format(args.out)):
        with open('{}.summary.txt'.format(args.out), 'w') as OUT:
            # Print out the network summary
            cob.summary(file=OUT)
    else:
        log('Skipped summary.')

    log('Printing QC Statistics ---------------------------------------------')
    if args.refgen is not None:
        if not path.exists('{}_qc_gene.txt'.format(args.out)):
            # Print out the breakdown of QC Values
            refgen = co.RefGen(args.refgen)
            gene_qc = cob._bcolz('qc_gene')
            gene_qc = gene_qc[gene_qc.pass_membership]
            gene_qc['chrom'] = [
                'chr' + str(refgen[x].chrom) for x in gene_qc.index
            ]
            gene_qc = gene_qc.groupby('chrom').agg(sum, axis=0)
            # Add totals at the bottom
            totals = gene_qc.ix[:, slice(1, None)].apply(sum)
            totals.name = 'TOTAL'
            gene_qc = gene_qc.append(totals)
            gene_qc.to_csv('{}_qc_gene.txt'.format(args.out), sep='\t')
        else:
            log('Skipped QC summary.')

    #if not path.exists('{}_CisTrans.png'.format(args.out)):
    # Get trans edges

    log('Plotting Degree Distribution ---------------------------------------')
    if not path.exists('{}_DegreeDist.png'.format(args.out)):
        degree = cob.degree['Degree'].values
        #Using powerlaw makes run-time warning the first time you use it.
        #This is still an open issue on the creators github.
        #The creator recommends removing this warning as long as there is a fit.
        np.seterr(divide='ignore', invalid='ignore')
        fit = powerlaw.Fit(degree, discrete=True, xmin=1)
        # get an axis
        ax = plt.subplot()
        # Calculate log ratios
        t2p = fit.distribution_compare('truncated_power_law', 'power_law')
        t2e = fit.distribution_compare('truncated_power_law', 'exponential')
        p2e = fit.distribution_compare('power_law', 'exponential')
        # Plot!
        emp = fit.plot_ccdf(ax=ax,
                            color='r',
                            linewidth=3,
                            label='Empirical Data')
        pwr = fit.power_law.plot_ccdf(ax=ax,
                                      color='b',
                                      linestyle='--',
                                      label='Power law')
        tpw = fit.truncated_power_law.plot_ccdf(ax=ax,
                                                color='k',
                                                linestyle='--',
                                                label='Truncated Power')
        exp = fit.exponential.plot_ccdf(ax=ax,
                                        color='g',
                                        linestyle='--',
                                        label='Exponential')
        ####
        ax.set_ylabel("p(Degree≥x)")
        ax.set_xlabel("Degree Frequency")
        ax.legend(loc='best')
        plt.title('{} Degree Distribution'.format(cob.name))
        # Save Fig
        try:
            plt.savefig('{}_DegreeDist.png'.format(args.out))
        except FutureWarning as e:
            # This is a matplotlib bug
            pass
    else:
        log('Skipping Degree Dist.')

    log('Plotting GO --------------------------------------------------------')
    if args.go is not None:
        if not path.exists('{}_GO.csv'.format(args.out)):
            go = co.GOnt(args.go)
            term_ids = []
            density_emp = []
            density_pvals = []
            locality_emp = []
            locality_pvals = []
            term_sizes = []
            term_desc = []
            terms_tested = 0
            if args.max_terms is not None:
                log('Limiting to {} GO Terms', args.max_terms)
                terms = go.rand(n=args.max_terms,
                                min_term_size=args.min_term_size,
                                max_term_size=args.max_term_size)
            else:
                terms = go.iter_terms(min_term_size=args.min_term_size,
                                      max_term_size=args.max_term_size)
            for term in terms:
                term.loci = list(filter(lambda x: x in cob, term.loci))
                if len(term) < args.min_term_size or len(
                        term) > args.max_term_size:
                    continue
                #set density value for two tailed go so we only test it once
                density = cob.density(term.loci)
                #one tailed vs two tailed test
                if args.two_tailed_GO is False:
                    #run one tail for only positive values
                    if density > 0:
                        density_emp.append(density)
                    #skip negative density values
                    else:
                        continue
                #if two_tailed_go is not none
                else:
                    density_emp.append(density)
                term_ids.append(term.id)
                term_sizes.append(len(term))
                term_desc.append(str(term.desc))
                # ------ Density
                # Calculate PVals
                density_bs = np.array([
                    cob.density(cob.refgen.random_genes(n=len(term.loci))) \
                    for x in range(args.num_bootstraps)
                ])
                if density > 0:
                    pval = sum(density_bs >= density) / args.num_bootstraps
                else:
                    pval = sum(density_bs <= density) / args.num_bootstraps
                density_pvals.append(pval)

                # ------- Locality
                locality = cob.locality(term.loci,
                                        include_regression=True).resid.mean()
                locality_emp.append(locality)
                # Calculate PVals
                locality_bs = np.array([
                    cob.locality(
                        cob.refgen.random_genes(n=len(term.loci)),
                        include_regression=True
                    ).resid.mean() \
                    for x in range(args.num_bootstraps)
                ])
                if locality > 0:
                    pval = sum(locality_bs >= locality) / args.num_bootstraps
                else:
                    pval = sum(locality_bs <= locality) / args.num_bootstraps
                locality_pvals.append(pval)
                # -------------
                terms_tested += 1
                if terms_tested % 100 == 0 and terms_tested > 0:
                    log('Processed {} terms'.format(terms_tested))
            go_enrichment = pd.DataFrame({
                'GOTerm': term_ids,
                'desc': term_desc,
                'size': term_sizes,
                'density': density_emp,
                'density_pval': density_pvals,
                'locality': locality_emp,
                'locality_pval': locality_pvals
            })
            go_enrichment\
                .sort_values(by='density_pval',ascending=True)\
                .to_csv('{}_GO.csv'.format(args.out),index=False)
            if terms_tested == 0:
                log.warn('No GO terms met your min/max gene criteria!')
        else:
            go_enrichment = pd.read_table('{}_GO.csv'.format(args.out),
                                          sep=',')

        if not path.exists('{}_GO.png'.format(args.out)):
            # Convert pvals to log10
            with np.errstate(divide='ignore'):
                # When no bootstraps are more extreme than the term, the minus log pval yields an infinite
                go_enrichment['density_pval'] = -1 * np.log10(
                    go_enrichment['density_pval'])
                go_enrichment['locality_pval'] = -1 * np.log10(
                    go_enrichment['locality_pval'])
                # Fix the infinites so they are plotted
                max_density = np.max(go_enrichment['density_pval'][np.isfinite(
                    go_enrichment['density_pval'])])
                max_locality = np.max(
                    go_enrichment['locality_pval'][np.isfinite(
                        go_enrichment['locality_pval'])])
                go_enrichment.loc[
                    np.logical_not(np.isfinite(go_enrichment['density_pval'])),
                    'density_pval'] = max_density + 1
                go_enrichment.loc[np.logical_not(
                    np.isfinite(go_enrichment['locality_pval'])),
                                  'locality_pval'] = max_locality + 1
            plt.clf()
            figure, axes = plt.subplots(3, 2, figsize=(12, 12))
            # -----------
            # Density
            # ----------
            axes[0, 0].scatter(go_enrichment['density'],
                               go_enrichment['density_pval'],
                               alpha=0.05)
            axes[0, 0].set_xlabel('Empirical Density (Z-Score)')
            axes[0, 0].set_ylabel('Bootstraped -log10(p-value)')
            fold = sum(np.array(go_enrichment['density_pval']) > 1.3) / (
                0.05 * len(go_enrichment))
            axes[0, 0].axhline(y=-1 * np.log10(0.05), color='red')
            axes[0, 0].text(min(axes[0, 0].get_xlim()),
                            -1 * np.log10(0.05),
                            '{:.3g} Fold Enrichement'.format(fold),
                            color='red')
            axes[1, 0].scatter(go_enrichment['size'],
                               go_enrichment['density_pval'],
                               alpha=0.05)
            axes[1, 0].set_ylabel('Bootstrapped -log10(p-value)')
            axes[1, 0].set_xlabel('Term Size')
            axes[1, 0].axhline(y=-1 * np.log10(0.05), color='red')
            axes[2, 0].scatter(go_enrichment['size'],
                               go_enrichment['density'],
                               alpha=0.05)
            axes[2,
                 0].scatter(go_enrichment.query('density_pval>1.3')['size'],
                            go_enrichment.query('density_pval>1.3')['density'],
                            alpha=0.05,
                            color='r')
            axes[2, 0].set_ylabel('Density')
            axes[2, 0].set_xlabel('Term Size')
            # ------------
            # Do Locality
            # ------------
            axes[0, 1].scatter(go_enrichment['locality'],
                               go_enrichment['locality_pval'],
                               alpha=0.05)
            axes[0, 1].set_xlabel('Empirical Locality (Residual)')
            axes[0, 1].set_ylabel('Bootstraped -log10(p-value)')
            fold = sum(np.array(go_enrichment['locality_pval']) > 1.3) / (
                0.05 * len(go_enrichment))
            axes[0, 1].axhline(y=-1 * np.log10(0.05), color='red')
            axes[0, 1].text(min(axes[0, 1].get_xlim()),
                            -1 * np.log10(0.05),
                            '{:.3g} Fold Enrichement'.format(fold),
                            color='red')
            axes[1, 1].scatter(go_enrichment['size'],
                               go_enrichment['locality_pval'],
                               alpha=0.05)
            axes[1, 1].set_xlabel('Term Size')
            axes[1, 1].set_ylabel('Bootstrapped -log10(p-value)')
            axes[1, 1].axhline(y=-1 * np.log10(0.05), color='red')
            axes[2, 1].scatter(go_enrichment['size'],
                               go_enrichment['locality'],
                               alpha=0.05)
            axes[2, 1].scatter(
                go_enrichment.query('locality_pval>1.3')['size'],
                go_enrichment.query('locality_pval>1.3')['locality'],
                alpha=0.05,
                color='r')
            axes[2, 1].set_ylabel('Density')
            axes[2, 1].set_xlabel('Term Size')
            # Save Figure
            plt.tight_layout()
            try:
                plt.savefig('{}_GO.png'.format(args.out))
            except FutureWarning as e:
                pass
        else:
            log('Skipping GO Volcano.')
Exemplo n.º 5
0
def cob_health(args):
    log = coblog()
    log(
        f"\n"
        f"-----------------------------\n"
        f"   Network Health:{args.cob} \n"
        f"-----------------------------\n"
    )
    log(f"\nCreating reports in {os.getcwd()}\n\n")

    cob = co.COB(args.cob)
    if args.out is None:
        args.out = "{}_Health".format(cob.name)
    log(f"Output prefix: {args.out}")

    if args.edge_zscore_cutoff is not None:
        log("Changing Z-Score cutoff to {}", args.edge_zscore_cutoff)
        cob.set_sig_edge_zscore(args.edge_zscore_cutoff)

    log("Printing Summary ---------------------------------------------------")
    if not path.exists("{}.summary.txt".format(args.out)):
        with open("{}.summary.txt".format(args.out), "w") as OUT:
            # Print out the network summary
            cob.summary(file=OUT)
    else:
        log("Skipped summary.")

    log("Plotting Scores ----------------------------------------------------")
    if not path.exists("{}_CoexPCC_raw.png".format(args.out)):
        cob.plot_scores("{}_CoexPCC_raw.png".format(args.out), pcc=True)
    else:
        log("Skipped Raw.")

    if not path.exists("{}_CoexScore_zscore.png".format(args.out)):
        cob.plot_scores("{}_CoexScore_zscore.png".format(args.out), pcc=False)
    else:
        log("Skipped Norm.")

    log("Plotting Expression ------------------------------------------------")
    # if not path.exists('{}_Expr_raw.png'.format(args.out)):
    #    cob.plot(
    #        '{}_Expr_raw.png'.format(args.out),
    #        include_accession_labels=True,
    #        raw=True,
    #        cluster_method=None
    #    )
    # else:
    #    log('Skipped raw.')
    if not path.exists("{}_Expr_norm.png".format(args.out)):
        cob.plot_heatmap(
            "{}_Expr_norm.png".format(args.out),
            include_accession_labels=True,
            raw=False,
            cluster_method="ward",
            cluster_accessions=True,
        )
    else:
        log("Skipped norm.")
    # log('Plotting Cluster Expression-----------------------------------------')
    # if not path.exists('{}_Expr_cluster.png'.format(args.out)):
    #    cob.plot(
    #        '{}_Expr_cluster.png'.format(args.out),
    #        include_accession_labels=True,
    #        raw=False,
    #        cluster_accessions=True,
    #        avg_by_cluster=True
    #    )
    # else:
    #    log('Skipped norm.')

    log("Printing QC Statistics ---------------------------------------------")
    if args.refgen is not None:
        if not path.exists("{}_qc_gene.txt".format(args.out)):
            # Print out the breakdown of QC Values
            refgen = co.RefGen(args.refgen)
            gene_qc = cob._bcolz("qc_gene")
            gene_qc = gene_qc[gene_qc.pass_membership]
            gene_qc["chrom"] = ["chr" + str(refgen[x].chrom) for x in gene_qc.index]
            gene_qc = gene_qc.groupby("chrom").agg(sum, axis=0)
            # Add totals at the bottom
            totals = gene_qc.ix[:, slice(1, None)].apply(sum)
            totals.name = "TOTAL"
            gene_qc = gene_qc.append(totals)
            gene_qc.to_csv("{}_qc_gene.txt".format(args.out), sep="\t")
        else:
            log("Skipped QC summary.")

    log("Plotting Degree Distribution ---------------------------------------")
    if not path.exists("{}_DegreeDist.png".format(args.out)):
        degree = cob.degree["Degree"].values
        # Using powerlaw makes run-time warning the first time you use it.
        # This is still an open issue on the creators github.
        # The creator recommends removing this warning as long as there is a fit.
        np.seterr(divide="ignore", invalid="ignore")
        fit = powerlaw.Fit(degree, discrete=True, xmin=1)
        # get an axis
        ax = plt.subplot()
        # Calculate log ratios
        t2p = fit.distribution_compare("truncated_power_law", "power_law")
        t2e = fit.distribution_compare("truncated_power_law", "exponential")
        p2e = fit.distribution_compare("power_law", "exponential")
        # Plot!
        emp = fit.plot_ccdf(ax=ax, color="r", linewidth=3, label="Empirical Data")
        pwr = fit.power_law.plot_ccdf(
            ax=ax, linewidth=2, color="b", linestyle=":", label="Power law"
        )
        tpw = fit.truncated_power_law.plot_ccdf(
            ax=ax, linewidth=2, color="k", linestyle="-.", label="Truncated Power"
        )
        exp = fit.exponential.plot_ccdf(
            ax=ax, linewidth=2, color="g", linestyle="--", label="Exponential"
        )
        ####
        ax.set_ylabel("p(Degree≥x)")
        ax.set_xlabel("Degree Frequency")
        ax.legend(loc="best")
        plt.title("{} Degree Distribution".format(cob.name))
        # Save Fig
        try:
            plt.savefig("{}_DegreeDist.png".format(args.out))
        except FutureWarning as e:
            # This is a matplotlib bug
            pass
    else:
        log("Skipping Degree Dist.")

    if args.go is not None:
        log("Plotting GO --------------------------------------------------------")
        # Set the alpha based on the tails
        if args.two_tailed == True:
            alpha = 0.05 / 2
        else:
            alpha = 0.05
        # Generate the GO Table
        if not path.exists("{}_GO.csv".format(args.out)):
            go = co.GOnt(args.go)
            term_ids = []
            density_emp = []
            density_pvals = []
            locality_emp = []
            locality_pvals = []
            term_sizes = []
            term_desc = []
            terms_tested = 0
            # max_terms limits the number of GO terms tested (sub-sampling)
            if args.max_terms is not None:
                log("Limiting to {} GO Terms", args.max_terms)
                terms = go.rand(
                    n=args.max_terms,
                    min_term_size=args.min_term_size,
                    max_term_size=args.max_term_size,
                )
            else:
                # Else do the whole set (default is terms between 10 and 300 genes)
                terms = go.iter_terms(
                    min_term_size=args.min_term_size, max_term_size=args.max_term_size
                )
            for term in terms:
                # Some terms will lose genes that are not in networks
                term.loci = list(filter(lambda x: x in cob, term.loci))
                # Skip terms that are not an adequate size
                if len(term) < args.min_term_size or len(term) > args.max_term_size:
                    continue
                # set density value for two tailed go so we only test it once
                density = cob.density(term.loci)
                # one tailed vs two tailed test
                density_emp.append(density)
                #
                term_ids.append(term.id)
                term_sizes.append(len(term))
                term_desc.append(str(term.desc))
                # ------ Density
                # Calculate PVals
                density_bs = np.array(
                    [
                        cob.density(cob.refgen.random_genes(n=len(term.loci)))
                        for x in range(args.num_bootstraps)
                    ]
                )
                if density > 0:
                    pval = sum(density_bs >= density) / args.num_bootstraps
                else:
                    pval = sum(density_bs <= density) / args.num_bootstraps
                density_pvals.append(pval)

                # ------- Locality
                locality = cob.locality(term.loci, include_regression=True).resid.mean()
                locality_emp.append(locality)
                # Calculate PVals
                locality_bs = np.array(
                    [
                        cob.locality(
                            cob.refgen.random_genes(n=len(term.loci)),
                            include_regression=True,
                        ).resid.mean()
                        for x in range(args.num_bootstraps)
                    ]
                )
                if locality > 0:
                    pval = sum(locality_bs >= locality) / args.num_bootstraps
                else:
                    pval = sum(locality_bs <= locality) / args.num_bootstraps
                locality_pvals.append(pval)
                # -------------
                terms_tested += 1
                if terms_tested % 100 == 0 and terms_tested > 0:
                    log("Processed {} terms".format(terms_tested))
            go_enrichment = pd.DataFrame(
                {
                    "GOTerm": term_ids,
                    "desc": term_desc,
                    "size": term_sizes,
                    "density": density_emp,
                    "density_pval": density_pvals,
                    "locality": locality_emp,
                    "locality_pval": locality_pvals,
                }
            )
            # Calculate significance
            go_enrichment["density_significant"] = go_enrichment.density_pval < alpha
            go_enrichment["locality_significant"] = go_enrichment.locality_pval < alpha
            # Calculate bonferonni
            go_enrichment["density_bonferroni"] = go_enrichment.density_pval < (
                alpha / len(go_enrichment)
            )
            go_enrichment["locality_bonferroni"] = go_enrichment.locality_pval < (
                alpha / len(go_enrichment)
            )
            # Store the GO results in a CSV
            go_enrichment.sort_values(by="density_pval", ascending=True).to_csv(
                "{}_GO.csv".format(args.out), index=False
            )
            if terms_tested == 0:
                log.warn("No GO terms met your min/max gene criteria!")
        else:
            go_enrichment = pd.read_table("{}_GO.csv".format(args.out), sep=",")

        if not path.exists("{}_GO.png".format(args.out)):
            # Convert pvals to log10
            with np.errstate(divide="ignore"):
                # When no bootstraps are more extreme than the term, the minus log pval yields an infinite
                go_enrichment["density_pval"] = -1 * np.log10(
                    go_enrichment["density_pval"]
                )
                go_enrichment["locality_pval"] = -1 * np.log10(
                    go_enrichment["locality_pval"]
                )
                # Fix the infinites so they are plotted
                max_density = np.max(
                    go_enrichment["density_pval"][
                        np.isfinite(go_enrichment["density_pval"])
                    ]
                )
                max_locality = np.max(
                    go_enrichment["locality_pval"][
                        np.isfinite(go_enrichment["locality_pval"])
                    ]
                )
                go_enrichment.loc[
                    np.logical_not(np.isfinite(go_enrichment["density_pval"])),
                    "density_pval",
                ] = (max_density + 1)
                go_enrichment.loc[
                    np.logical_not(np.isfinite(go_enrichment["locality_pval"])),
                    "locality_pval",
                ] = (max_locality + 1)
            plt.clf()
            # Calculate the transparency based on the number of terms
            if len(go_enrichment) > 20:
                transparency_alpha = 0.05
            else:
                transparency_alpha = 1

            # --------------------------------------------------------------------
            # Start Plotting
            figure, axes = plt.subplots(3, 2, figsize=(12, 12))
            # -----------
            # Density
            # ----------
            log_alpha = -1 * np.log10(alpha)
            axes[0, 0].scatter(
                go_enrichment["density"],
                go_enrichment["density_pval"],
                alpha=transparency_alpha,
                color="blue",
            )
            axes[0, 0].set_xlabel("Empirical Density (Z-Score)")
            axes[0, 0].set_ylabel("Bootstraped -log10(p-value)")
            fold = sum(np.array(go_enrichment["density_pval"]) > log_alpha) / (
                alpha * len(go_enrichment)
            )
            axes[0, 0].axhline(y=-1 * np.log10(0.05), color="red")
            axes[0, 0].text(
                min(axes[0, 0].get_xlim()),
                -1 * np.log10(alpha) + 0.1,
                "{:.3g} Fold Enrichement".format(fold),
                color="red",
            )
            axes[0, 0].set_title("Density Health")
            # Plot pvalue by term size
            axes[1, 0].scatter(
                go_enrichment["size"],
                go_enrichment["density_pval"],
                alpha=transparency_alpha,
                color="blue",
            )
            axes[1, 0].set_ylabel("Bootstrapped -log10(p-value)")
            axes[1, 0].set_xlabel("Term Size")
            axes[1, 0].axhline(y=-1 * np.log10(alpha), color="red")
            axes[2, 0].scatter(
                go_enrichment["size"],
                go_enrichment["density"],
                alpha=transparency_alpha,
                color="blue",
            )
            # Plot raw density by term size
            axes[2, 0].scatter(
                go_enrichment.query(f"density_pval>{log_alpha}")["size"],
                go_enrichment.query(f"density_pval>{log_alpha}")["density"],
                alpha=transparency_alpha,
                color="r",
            )
            axes[2, 0].set_ylabel("Density")
            axes[2, 0].set_xlabel("Term Size")
            # ------------
            # Do Locality
            # ------------
            axes[0, 1].scatter(
                go_enrichment["locality"],
                go_enrichment["locality_pval"],
                alpha=transparency_alpha,
                color="blue",
            )
            axes[0, 1].set_xlabel("Empirical Locality (Residual)")
            axes[0, 1].set_ylabel("Bootstraped -log10(p-value)")
            fold = sum(np.array(go_enrichment["locality_pval"]) > log_alpha) / (
                0.05 * len(go_enrichment)
            )
            axes[0, 1].axhline(y=-1 * np.log10(0.05), color="red")
            axes[0, 1].text(
                min(axes[0, 1].get_xlim()),
                -1 * np.log10(alpha),
                "{:.3g} Fold Enrichement".format(fold),
                color="red",
            )
            axes[0, 1].set_title("Locality Health")
            axes[1, 1].scatter(
                go_enrichment["size"],
                go_enrichment["locality_pval"],
                alpha=transparency_alpha,
                color="blue",
            )
            axes[1, 1].set_xlabel("Term Size")
            axes[1, 1].set_ylabel("Bootstrapped -log10(p-value)")
            axes[1, 1].axhline(y=-1 * np.log10(0.05), color="red")
            axes[2, 1].scatter(
                go_enrichment["size"],
                go_enrichment["locality"],
                alpha=transparency_alpha,
                color="blue",
            )
            axes[2, 1].scatter(
                go_enrichment.query(f"locality_pval>{log_alpha}")["size"],
                go_enrichment.query(f"locality_pval>{log_alpha}")["locality"],
                alpha=transparency_alpha,
                color="r",
            )
            axes[2, 1].set_ylabel("Locality")
            axes[2, 1].set_xlabel("Term Size")
            # Save Figure
            plt.tight_layout()
            try:
                plt.savefig("{}_GO.png".format(args.out))
            except FutureWarning as e:
                pass
        else:
            log("Skipping GO Volcano.")
Exemplo n.º 6
0
def cob_health(args):
    log = coblog()
    log(f'\n'
        f'-----------------------------\n'
        f'   Network Health:{args.cob} \n'
        f'-----------------------------\n'
    )
    log(f"\nCreating reports in {os.getcwd()}\n\n")

    cob = co.COB(args.cob)
    if args.out is None:
        args.out = '{}_Health'.format(cob.name)
    log(f'Output prefix: {args.out}')


    if args.edge_zscore_cutoff is not None:
        log("Changing Z-Score cutoff to {}",args.edge_zscore_cutoff)
        cob.set_sig_edge_zscore(args.edge_zscore_cutoff)

    log('Printing Summary ---------------------------------------------------')
    if not path.exists('{}.summary.txt'.format(args.out)):
        with open('{}.summary.txt'.format(args.out),'w') as OUT:
            # Print out the network summary
            cob.summary(file=OUT)
    else:
        log('Skipped summary.')

    log('Plotting Scores ----------------------------------------------------') 
    if not path.exists('{}_CoexPCC_raw.png'.format(args.out)):
        cob.plot_scores(
            '{}_CoexPCC_raw.png'.format(args.out),
            pcc=True
        )
    else:
        log('Skipped Raw.')

    if not path.exists('{}_CoexScore_zscore.png'.format(args.out)):
        cob.plot_scores(
            '{}_CoexScore_zscore.png'.format(args.out),
            pcc=False
        )
    else:
        log('Skipped Norm.')

    log('Plotting Expression ------------------------------------------------')
   #if not path.exists('{}_Expr_raw.png'.format(args.out)):
   #    cob.plot(
   #        '{}_Expr_raw.png'.format(args.out),
   #        include_accession_labels=True,
   #        raw=True,
   #        cluster_method=None
   #    )
   #else:
   #    log('Skipped raw.')
    if not path.exists('{}_Expr_norm.png'.format(args.out)):
        cob.plot_heatmap(
            '{}_Expr_norm.png'.format(args.out),
            include_accession_labels=True,
            raw=False,
            cluster_method='ward',
            cluster_accessions=True
        )
    else:
        log('Skipped norm.')
   #log('Plotting Cluster Expression-----------------------------------------')
   #if not path.exists('{}_Expr_cluster.png'.format(args.out)):
   #    cob.plot(
   #        '{}_Expr_cluster.png'.format(args.out),
   #        include_accession_labels=True,
   #        raw=False,
   #        cluster_accessions=True,
   #        avg_by_cluster=True
   #    )
   #else:
   #    log('Skipped norm.')

    log('Printing QC Statistics ---------------------------------------------')
    if args.refgen is not None:
        if not path.exists('{}_qc_gene.txt'.format(args.out)):
            # Print out the breakdown of QC Values
            refgen = co.RefGen(args.refgen)
            gene_qc = cob._bcolz('qc_gene')
            gene_qc = gene_qc[gene_qc.pass_membership]
            gene_qc['chrom'] = ['chr'+str(refgen[x].chrom) for x in gene_qc.index]
            gene_qc = gene_qc.groupby('chrom').agg(sum,axis=0)
            # Add totals at the bottom
            totals = gene_qc.ix[:,slice(1,None)].apply(sum)
            totals.name = 'TOTAL'
            gene_qc = gene_qc.append(totals)
            gene_qc.to_csv('{}_qc_gene.txt'.format(args.out),sep='\t')
        else:
            log('Skipped QC summary.')

    log('Plotting Degree Distribution ---------------------------------------')
    if not path.exists('{}_DegreeDist.png'.format(args.out)):
        degree = cob.degree['Degree'].values
        # Using powerlaw makes run-time warning the first time you use it.
        # This is still an open issue on the creators github.
        # The creator recommends removing this warning as long as there is a fit.
        np.seterr(divide='ignore', invalid='ignore')
        fit = powerlaw.Fit(degree,discrete=True,xmin=1)
        # get an axis
        ax = plt.subplot()
        # Calculate log ratios
        t2p = fit.distribution_compare('truncated_power_law', 'power_law')
        t2e = fit.distribution_compare('truncated_power_law', 'exponential')
        p2e = fit.distribution_compare('power_law','exponential')
        # Plot!
        emp = fit.plot_ccdf(ax=ax,color='r',linewidth=3, label='Empirical Data')
        pwr = fit.power_law.plot_ccdf(ax=ax,linewidth=2, color='b', linestyle=':', label='Power law')
        tpw = fit.truncated_power_law.plot_ccdf(ax=ax,linewidth=2, color='k', linestyle='-.', label='Truncated Power')
        exp = fit.exponential.plot_ccdf(ax=ax,linewidth=2, color='g', linestyle='--', label='Exponential')
        ####
        ax.set_ylabel("p(Degree≥x)")
        ax.set_xlabel("Degree Frequency")
        ax.legend(
            loc='best'
        )
        plt.title('{} Degree Distribution'.format(cob.name))
        # Save Fig
        try:
            plt.savefig('{}_DegreeDist.png'.format(args.out))
        except FutureWarning as e:
            # This is a matplotlib bug
            pass
    else:
        log('Skipping Degree Dist.')

    if args.go is not None:
        log('Plotting GO --------------------------------------------------------')
        # Set the alpha based on the tails
        if args.two_tailed == True:
            alpha = 0.05 /2
        else:
            alpha = 0.05
        # Generate the GO Table
        if not path.exists('{}_GO.csv'.format(args.out)):
            go = co.GOnt(args.go)
            term_ids = []
            density_emp = []
            density_pvals  = []
            locality_emp = []
            locality_pvals = []
            term_sizes = []
            term_desc = []
            terms_tested = 0
            # max_terms limits the number of GO terms tested (sub-sampling)
            if args.max_terms is not None:
                log('Limiting to {} GO Terms',args.max_terms)
                terms = go.rand(
                    n=args.max_terms,
                    min_term_size=args.min_term_size,
                    max_term_size=args.max_term_size
                )
            else:
                # Else do the whole set (default is terms between 10 and 300 genes)
                terms = go.iter_terms(
                    min_term_size=args.min_term_size,
                    max_term_size=args.max_term_size
                )
            for term in terms:
                # Some terms will lose genes that are not in networks
                term.loci = list(filter(lambda x: x in cob, term.loci))
                # Skip terms that are not an adequate size
                if len(term) < args.min_term_size or len(term) > args.max_term_size:
                    continue
                # set density value for two tailed go so we only test it once
                density = cob.density(term.loci)
                # one tailed vs two tailed test
                density_emp.append(density)
                # 
                term_ids.append(term.id)
                term_sizes.append(len(term))
                term_desc.append(str(term.desc))
                # ------ Density 
                # Calculate PVals
                density_bs = np.array([
                    cob.density(cob.refgen.random_genes(n=len(term.loci))) \
                    for x in range(args.num_bootstraps)
                ])
                if density > 0:
                    pval = sum(density_bs >= density)/args.num_bootstraps
                else:
                    pval = sum(density_bs <= density)/args.num_bootstraps
                density_pvals.append(pval)

                # ------- Locality
                locality = cob.locality(
                    term.loci,include_regression=True
                ).resid.mean()
                locality_emp.append(locality)
                # Calculate PVals
                locality_bs = np.array([
                    cob.locality(
                        cob.refgen.random_genes(n=len(term.loci)),
                        include_regression=True
                    ).resid.mean() \
                    for x in range(args.num_bootstraps)
                ])
                if locality > 0:
                    pval = sum(locality_bs >= locality)/args.num_bootstraps
                else:
                    pval = sum(locality_bs <= locality)/args.num_bootstraps
                locality_pvals.append(pval)
                # -------------
                terms_tested += 1
                if terms_tested % 100 == 0 and terms_tested > 0:
                    log('Processed {} terms'.format(terms_tested)) 
            go_enrichment = pd.DataFrame({
                'GOTerm' : term_ids,
                'desc' : term_desc,
                'size' : term_sizes,
                'density' : density_emp,
                'density_pval' : density_pvals,
                'locality' : locality_emp,
                'locality_pval' : locality_pvals
            })
            # Calculate significance
            go_enrichment['density_significant'] = go_enrichment.density_pval < alpha
            go_enrichment['locality_significant'] = go_enrichment.locality_pval < alpha
            # Calculate bonferonni
            go_enrichment['density_bonferroni'] = go_enrichment.density_pval < (alpha / len(go_enrichment))
            go_enrichment['locality_bonferroni'] = go_enrichment.locality_pval < (alpha / len(go_enrichment))
            # Store the GO results in a CSV
            go_enrichment\
                .sort_values(by='density_pval',ascending=True)\
                .to_csv('{}_GO.csv'.format(args.out),index=False)
            if terms_tested == 0:
                log.warn('No GO terms met your min/max gene criteria!')
        else:
            go_enrichment = pd.read_table('{}_GO.csv'.format(args.out),sep=',')

        if not path.exists('{}_GO.png'.format(args.out)):
            # Convert pvals to log10
            with np.errstate(divide='ignore'):
                # When no bootstraps are more extreme than the term, the minus log pval yields an infinite
                go_enrichment['density_pval'] = -1*np.log10(go_enrichment['density_pval'])
                go_enrichment['locality_pval'] = -1*np.log10(go_enrichment['locality_pval'])
                # Fix the infinites so they are plotted
                max_density = np.max(go_enrichment['density_pval'][np.isfinite(go_enrichment['density_pval'])])
                max_locality = np.max(go_enrichment['locality_pval'][np.isfinite(go_enrichment['locality_pval'])])
                go_enrichment.loc[np.logical_not(np.isfinite(go_enrichment['density_pval'])),'density_pval'] = max_density + 1
                go_enrichment.loc[np.logical_not(np.isfinite(go_enrichment['locality_pval'])),'locality_pval'] = max_locality + 1
            plt.clf()
            # Calculate the transparency based on the number of terms
            if len(go_enrichment) > 20:
                transparency_alpha = 0.05
            else:
                transparency_alpha = 1

            # --------------------------------------------------------------------
            # Start Plotting
            figure,axes = plt.subplots(3,2,figsize=(12,12))
            # -----------
            # Density
            # ----------
            log_alpha = -1 *np.log10(alpha)
            axes[0,0].scatter(
                go_enrichment['density'],
                go_enrichment['density_pval'],
                alpha=transparency_alpha,
                color='blue'
            )
            axes[0,0].set_xlabel('Empirical Density (Z-Score)')
            axes[0,0].set_ylabel('Bootstraped -log10(p-value)')
            fold = sum(np.array(go_enrichment['density_pval'])> log_alpha )/(alpha * len(go_enrichment))
            axes[0,0].axhline(y=-1*np.log10(0.05),color='red')
            axes[0,0].text(
                min(axes[0,0].get_xlim()),
                -1*np.log10(alpha) + 0.1,
                '{:.3g} Fold Enrichement'.format(fold),
                color='red'
            )
            axes[0,0].set_title('Density Health')
            # Plot pvalue by term size
            axes[1,0].scatter(
                go_enrichment['size'],
                go_enrichment['density_pval'],
                alpha=transparency_alpha,
                color='blue'
            )
            axes[1,0].set_ylabel('Bootstrapped -log10(p-value)')
            axes[1,0].set_xlabel('Term Size')
            axes[1,0].axhline(y=-1*np.log10(alpha),color='red')
            axes[2,0].scatter(
                go_enrichment['size'],
                go_enrichment['density'],
                alpha=transparency_alpha,
                color='blue'
            )
            # Plot raw density by term size
            axes[2,0].scatter(
                go_enrichment.query(f'density_pval>{log_alpha}')['size'],
                go_enrichment.query(f'density_pval>{log_alpha}')['density'],
                alpha=transparency_alpha,
                color='r'
            )
            axes[2,0].set_ylabel('Density')
            axes[2,0].set_xlabel('Term Size')
            # ------------
            # Do Locality
            # ------------
            axes[0,1].scatter(
                go_enrichment['locality'],
                go_enrichment['locality_pval'],
                alpha=transparency_alpha,
                color='blue'
            )
            axes[0,1].set_xlabel('Empirical Locality (Residual)')
            axes[0,1].set_ylabel('Bootstraped -log10(p-value)')
            fold = sum(np.array(go_enrichment['locality_pval'])>log_alpha)/(0.05 * len(go_enrichment))
            axes[0,1].axhline(y=-1*np.log10(0.05),color='red')
            axes[0,1].text(
                min(axes[0,1].get_xlim()),
                -1*np.log10(alpha),
                '{:.3g} Fold Enrichement'.format(fold),
                color='red'
            )
            axes[0,1].set_title('Locality Health')
            axes[1,1].scatter(
                go_enrichment['size'],
                go_enrichment['locality_pval'],
                alpha=transparency_alpha,
                color='blue'
            )
            axes[1,1].set_xlabel('Term Size')
            axes[1,1].set_ylabel('Bootstrapped -log10(p-value)')
            axes[1,1].axhline(y=-1*np.log10(0.05),color='red')
            axes[2,1].scatter(
                go_enrichment['size'],
                go_enrichment['locality'],
                alpha=transparency_alpha,
                color='blue' 
            )
            axes[2,1].scatter(
                go_enrichment.query(f'locality_pval>{log_alpha}')['size'],
                go_enrichment.query(f'locality_pval>{log_alpha}')['locality'],
                alpha=transparency_alpha,
                color='r'
            )
            axes[2,1].set_ylabel('Locality')
            axes[2,1].set_xlabel('Term Size')
            # Save Figure
            plt.tight_layout()
            try:
                plt.savefig('{}_GO.png'.format(args.out))
            except FutureWarning as e:
                pass
        else:
            log('Skipping GO Volcano.')