示例#1
0
def _func(
    self,
    key,
    OUTDIR,
    fig_meta,
    meta_chip,
    chipseq_targets_peaks_file,
):
    from pymisca.plotters import plotters
    plotters.fig__save
    #     key = 'figS4E_o'
    key = 'figS4E_0905'
    df = fig_meta[[key]].dropna().astype(int)
    df = df.sort_values(key)
    DATA_ACC_LIST = df.index.tolist()
    mcurr = meta_chip.reindex(DATA_ACC_LIST)
    bwFiles = mcurr['RPKMFile']

    # bedFile = '/home/feng/envs/Fig_POLYQ/bedFile.bed'
    #     bedFile = '/home/feng/static/lists/1112__ELF3__chipTarg.narrowPeak'
    res = sutil.extract_bigwig_multiple(
        bwFiles=mcurr.RPKMFile,
        bedFile=chipseq_targets_peaks_file,
        #         radius=100,stepSize=10,NCORE=6,
        radius=100,
        stepSize=10,
        NCORE=6,
        outIndex=pyext.df__format(mcurr, '{bname}_{index}'),
    )

    tab = dfc = pyutil.colGroupMean(res, ).apply(sutil.log2p1)

    with pyext.getPathStack([OUTDIR], force=1):
        plt.figure(figsize=[12, 8])
        dfc.boxplot(rot='vertical', )
        plt.ylabel('average binding in RPKM')
        #         plt.gcf().savefig('fig1.png')
        #         plt.gcf().
        plotters.fig__save(plt.gcf(), pyext.f('{key}.png'))

        # res.head()
        tab2 = res.mean(axis=0)
        pile = tab2.to_frame().reset_index().pivot_table(index='bwFile',
                                                         columns='pos',
                                                         values=0)
        pile = pile.T
        pyvis.df__heatmap(pile, figsize=[12, 12])
        #         pyvis.heatmap(pile, xtick=pile.columns,ytick=pile.index)
        plotters.fig__save(plt.gcf(), pyext.f('{key}_pileup_heatmap.png'))
示例#2
0
def _func(
    self,
    key,
    signature_datasets,
    signature_profile,
    signature_CUTOFF,
    pyvis,
    #           WORKDIR,
):
    vdf = signature_datasets
    signature_score = vdf.dot(signature_profile)
    #     ax.set_ylim(0,5000)
    ppf = pyext.dist2ppf(signature_score)
    silent = 1
    if not silent:
        fig, ax = plt.subplots(1, 1)
        ax.set_ylabel('signature_score')
        ax.set_xlabel('percentage')
        plt.scatter(ppf, signature_score)
        ax = plt.gca()
        ax.set_xlim(0.95, 1.01)
        ax.grid(1)

        pyvis.abline(x0=signature_CUTOFF)


#     CUTOFF = 0.99 ### top 1% of
    _targets = vdf.index[ppf > signature_CUTOFF]
    #     with pyext.getPathStack([WORKDIR,key],force=1) as stack:
    pyext.printlines(_targets, pyext.f('_temp-{key}-.it'))
    return _targets
示例#3
0
 def _worker(sample):
     sample = attrdict.AttrDict(sample)
     sample.title = "_".join([sample[k] for k in "data_acc,age,tissue,genotype,ztime,temperature".split(",")])
     res  = res= pyext.jf2( template_common())
     res = '\n'.join([x.strip() for x in res.splitlines()])
     sample.soft_text = res        
     pyext.printlines([sample.soft_text], OUTDIR / pyext.f("{sample.data_acc}.soft.txt"))
OUTDIR = WORKDIR() / "get_soft_text"

pyext.real__dir(dirname=OUTDIR)
_samples = ns.sample_init_full()

for sample in _samples:
    print '[template_finalise]', sample['data_acc'], '...'
    try:
        ns.sample_template_find_curated(sample)
        #     continue
        ns.sample_template_finalise(sample)
        res = sample['template_final']
        res = '\n'.join([x.strip() for x in res.splitlines()])
        sample.soft_text = res
        pyext.printlines([sample.soft_text], OUTDIR /
                         pyext.f("{sample.data_acc}.autofilled.soft.txt"))
    except Exception as e:
        print('FAILED')
        print(str(e))

template = u'''
^SERIES = 0829-polyq
!Series_title = RNA-Seq and ChIP-Seq profiling of ELF3, an prion-like domain-containig in ELF3 that functions as a
thermosensor in Arabidopsis.
!Series_summary = Temperature is a major environmental variable governing plant growth and
development. ELF3 contains a polyglutamine (polyQ) repeat 8–10, embedded within a predicted prion domain (PrD). We find the length of the polyQ repeat correlates with thermal responsiveness. Plants from hotter climates appear to have lost the PrD domain, and these versions of ELF3 are stable at high temperature and lack thermal responsiveness. ELF3 temperature sensitivity is also modulated by the levels of ELF4, indicating that ELF4 can stabilise ELF3 function. This RNA-Seq dataset provides evidence for the hypothetical ELF3 function of temperature sensing .
!Series_overall design = Single samples were taken at each time point. RNA-Seqs and ChIP-Seqs were performed for different genotypes at different temperature and objective time. 
!Series_contributor = Jaehoon Jung
!Series_contributor = Katja, Jaeger
!Series_contributor = Feng, Geng
{% for sample in _samples %}
示例#5
0
rnaseq = rnaseq_raw = pyext.readData(
    _get_file('/home/feng/envs/Fig_POLYQ/rnaseq.pk'))
# rnaseq = rnaseq_raw = pyext.readData(pyext.f('{SRCDIR}/static.envs.Fig_POLYQ.rnaseq.pk'))
rnaseq = rnaseq.copy()
rnaseq.loc[:] = rnaseq.apply(pyext.log2p1)
job['rnaseq'] = rnaseq

mcurr0 = pyext.readData(
    _get_file('/home/feng/static/results/0318-makeRNA-polyQ/mcurr0.csv'))
# mcurr0 = pyext.readData( pyext.f('{SRCDIR}/static.results.0318-makeRNA-polyQ.mcurr0.csv') )
mcurr0.columns = mcurr0.columns.str.upper()
mcurr0['DISP_NAME'] = pyext.df__format(mcurr0,
                                       '{TEMP}-{ZTIME}-{GTYPE}-{index}')
job['datasets_meta'] = mcurr0

keyDF = pyext.readData(pyext.f('{SRCDIR}/key_ath.csv'))
markers = ['LUX']
job['markers_df'] = markers_df = keyDFC = keyDF.query('BioName in %s' %
                                                      markers)
#####
###############################

# geneDB =pyext.read__buffer(buf,ext='csv')

# markers = ['ATHB2','HFR1','LUX','GI','PRR7']
# markers = ['ATHB2','HFR1','LUX','GI','PRR7']

job['__doc__'] = '''
signature_profile <-- (biological_marker or pca_eigenvector )

signature_profile + signature_dataset --> signature_targets