示例#1
0
    for k, count in stats.iteritems():
        print '{}\t{}\t{:.2f}\t{:.2f}'.format(
            k,
            count,
            count / stats['all'] * 100,
            count / stats['enhancers'] * 100,
        )

    subsets = {}
    if True:
        subsets['with_me2'] = data[(data['me2_id'] > 0)]
        subsets['with_ac'] = data[(data['ac_id'] > 0)]
        subsets['inactive'] = data[(data['ac_id'] == 0)
                                   & (data['me2_id'] == 0)]

        for k, subset in subsets.iteritems():
            first_peak = 'foxp3'
            subset['id'] = subset[first_peak + '_id']
            subset['start'] = subset[first_peak + '_start']
            subset['end'] = subset[first_peak + '_end']

            yzer.run_homer(subset,
                           first_peak + '_enh_' + k,
                           motif_dirpath,
                           cpus=6,
                           center=True,
                           reverse=False,
                           preceding=False,
                           size=200,
                           length=[8, 10, 12, 15])
示例#2
0
    yzer = MotifAnalyzer()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/' +\
        'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Enhancers'
    dirpath = yzer.get_path(dirpath)

    for cond, seq, breed in SAMPLES:
        sample_prefix = sample_name(cond, seq, breed)
        sample_dirpath = yzer.get_filename(dirpath, sample_prefix)
        filename = yzer.get_filename(sample_dirpath,
                                     sample_prefix + '_enhancers.txt')

        data = yzer.import_file(filename)
        data = data.fillna(0)

        if True:

            min_thresh = get_threshold(seq)

            subdata = data[data['tag_count'] >= min_thresh]
            yzer.run_homer(subdata,
                           'all',
                           sample_dirpath,
                           cpus=10,
                           center=True,
                           reverse=False,
                           preceding=False,
                           size=200,
                           length=[8, 10, 12],
                           mock=True)
示例#3
0
        for peptide in ('K99A', 'NoPep', 'PCC'):
            pep_dirpath = yzer.get_filename(dirpath,
                                            '{}_{}'.format(peptide, ab))

            if False:
                filename = yzer.get_filename(
                    pep_dirpath, '{}_{}_enhancers.txt'.format(peptide, ab))

                data = yzer.import_file(filename)
                data = data.fillna(0)

                yzer.run_homer(data,
                               'all',
                               pep_dirpath,
                               cpus=6,
                               center=True,
                               reverse=False,
                               preceding=False,
                               size=200,
                               length=[8, 10, 12, 15],
                               mock=True)

                yzer.run_homer(data[data['tag_count'] >= 10],
                               'tag_thresh_10',
                               pep_dirpath,
                               cpus=6,
                               center=True,
                               reverse=False,
                               preceding=False,
                               size=200,
                               length=[8, 10, 12, 15],
                               mock=True)
示例#4
0
                                             '{}_{}'.format(condition, ab))

            if False:
                filename = yzer.get_filename(
                    cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab))

                data = yzer.import_file(filename)
                data = data.fillna(0)

                min_thresh = 20
                data = data[data['tag_count'] > min_thresh]
                yzer.run_homer(data,
                               'filtered_{}'.format(min_thresh),
                               cond_dirpath,
                               cpus=6,
                               center=True,
                               reverse=False,
                               preceding=False,
                               size=200,
                               length=[8, 10, 12, 15],
                               mock=True)

            if True:
                filename = yzer.get_filename(
                    cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab))

                data = yzer.import_file(filename)
                data = data.fillna(0)

                min_thresh = 20
                subdata = data[data['tag_count'] > min_thresh]
                subdata = subdata[subdata['tag_count(2)'] <= min_thresh]
示例#5
0
    filename = yzer.get_filename(sample_dirpath,
                                 sample_prefix + '_enhancers.txt')

    data = yzer.import_file(filename)
    data = data.fillna(0)

    min_thresh = get_threshold(seq)
    data = data[data['tag_count'] >= min_thresh]

    fold = 2
    if True:
        # ATAC peaks that are absent in the FOXO1 competent
        ko_new = data[
            data['naive_atac_tag_count'] < min_thresh]
        yzer.run_homer(ko_new,
                       'ko_new', sample_dirpath,
                       cpus=10, center=True, reverse=False, preceding=False,
                       size=200, length=[8, 10, 12], mock=True)
        print('ko_new', len(ko_new))

        # ATAC peaks exist, but are less than half as big
        ko_grows = data[
            (data['naive_atac_tag_count'] >= min_thresh) &
            (data['naive_atac_tag_count'] * fold < data['tag_count'])]
        yzer.run_homer(ko_grows,
                       'ko_grows', sample_dirpath,
                       cpus=10, center=True, reverse=False, preceding=False,
                       size=200, length=[8, 10, 12], mock=True)
        print('ko_grows', len(ko_grows))

        # The sum of new and bigger
        ko_dependent = data[
示例#6
0
    )
    dirpath = yzer.get_and_create_path(base_dirpath, 'motifs/')
    filename = yzer.get_filename(base_dirpath, 'transcript_vectors.txt')
    data = yzer.import_file(filename)
    data = data.fillna(0)

    # Promoters
    if False:
        refseq = data[data['has_refseq'] == 1]
        refseq = refseq[refseq['transcript_score'] >= 4]
        if True:
            yzer.run_homer(refseq,
                           'refseq_promoter',
                           dirpath,
                           cpus=6,
                           center=False,
                           reverse=False,
                           preceding=True,
                           size=400,
                           length=[8, 10, 12, 15])

        bg = yzer.get_filename(
            dirpath, 'refseq_promoter/refseq_promoter_regions_for_homer.txt')

        subset = refseq[refseq['balb_nod_notx_1h_fc'] <= -1]
        yzer.run_homer(subset,
                       'promoter_overlap_notx_1h_nod_down',
                       dirpath,
                       cpus=6,
                       center=False,
                       reverse=False,
示例#7
0
            pep_dirpath = yzer.get_filename(dirpath,
                                            '{}_{}'.format(peptide, ab))

            if True:
                filename = yzer.get_filename(
                    pep_dirpath, '{}_{}_enhancers.txt'.format(peptide, ab))

                data = yzer.import_file(filename)
                data = data.fillna(0)

                if True:
                    yzer.run_homer(data,
                                   'all',
                                   pep_dirpath,
                                   cpus=8,
                                   center=True,
                                   reverse=False,
                                   preceding=False,
                                   size=200,
                                   length=[8, 10, 12],
                                   mock=True)

                if True:
                    subset = data[(data['id(2)'] == 0) & (data['id(3)'] == 0)]
                    yzer.run_homer(subset,
                                   'only',
                                   pep_dirpath,
                                   cpus=8,
                                   center=True,
                                   reverse=False,
                                   preceding=False,
                                   size=200,
示例#8
0
        # Get venn-diagram sets for foxp3/me2
        only_treg = treg[treg['naive_id'] == 0]
        only_naive = naive[naive['treg_id'] == 0]
        shared = treg[treg['naive_id'] > 0]
        print len(only_treg), len(only_naive), len(shared)

        datasets = [treg, naive, only_treg, only_naive, shared]
        main_peak = ['treg', 'naive', 'treg', 'naive', 'treg']
        names = [
            x.format(antibody)
            for x in ('all_treg_{0}_enhancers', 'all_naive_{0}_enhancers',
                      'only_treg_{0}_enhancers', 'only_naive_{0}_enhancers',
                      'shared_{0}_enhancers')
        ]
        for i, subset in enumerate(datasets):
            if i > 1: continue
            subset['id'] = subset['{0}_id'.format(main_peak[i])]
            subset['start'] = subset['{0}_start'.format(main_peak[i])]
            subset['end'] = subset['{0}_end'.format(main_peak[i])]

            yzer.run_homer(subset,
                           names[i],
                           motif_dirpath,
                           cpus=6,
                           center=True,
                           reverse=False,
                           preceding=False,
                           size=200,
                           length=[8, 10, 12, 15])
        data = yzer.import_file(filename)
        data = data.fillna(0)

        min_thresh = 20

        if False:
            subdata = data[data['tag_count'] > min_thresh]
            subdata = subdata[subdata['tag_count(2)'] > min_thresh]
            subdata = subdata[subdata['tag_count(3)'] <= 0]
            subdata = subdata[subdata['tag_count(4)'] <= 0]
            subdir = 'treg_shared_' + ab
            yzer.run_homer(subdata,
                           subdir,
                           motif_dirpath,
                           cpus=6,
                           center=True,
                           reverse=False,
                           preceding=False,
                           size=200,
                           length=[8, 10, 12, 15],
                           mock=True)
            output_file = yzer.get_filename(motif_dirpath, subdir,
                                            subdir + '_enhancers.txt')
            subdata.to_csv(output_file, header=True, index=False, sep='\t')

            subdata = data[data['tag_count'] > min_thresh]
            subdata = subdata[subdata['tag_count(2)'] <= 0]
            subdata = subdata[subdata['tag_count(3)'] <= 0]
            subdata = subdata[subdata['tag_count(4)'] <= 0]
            subdir = 'ntreg_only_' + ab
            yzer.run_homer(subdata,
                           subdir,
示例#10
0
                     '{}_{}'.format(condition, ab))
     
     if False:
         filename = yzer.get_filename(cond_dirpath, 
                         '{}_{}_enhancers.txt'.format(condition, ab))
         
         data = yzer.import_file(filename)
         data = data.fillna(0)
         
         min_thresh = 0
         cutoff = 10000
         data = data[data['tag_count'] > min_thresh]
         data = data.sort('tag_count', ascending=False)[:cutoff]
         
         yzer.run_homer(data, 
                 'top_{}'.format(cutoff), cond_dirpath,
                 cpus=6, center=True, reverse=False, preceding=False, 
                 size=200, length=[8, 10, 12, 15], mock=True)
 
 for condition in ('itreg','treg'):
     cond_dirpath = yzer.get_filename(dirpath, 
                     '{}_{}'.format(condition, ab))
     if False:
         filename = yzer.get_filename(cond_dirpath, 
                         '{}_{}_enhancers.txt'.format(condition, ab))
         
         data = yzer.import_file(filename)
         data = data.fillna(0)
         
         min_thresh = 0
         cutoff = 10000
         data = data[data['tag_count'] > min_thresh]
示例#11
0
        data['all'] = data['treg'][data['treg']\
                                   [[c + '_id' for c in celltypes]].min(axis=1) > 0]

        # Special cases
        # Treg and Th1 shared and not shared, regardless of others
        data['treg_th1_shared'] = data['treg'][data['treg']['th1_id'] > 0]
        data['treg_not_th1'] = data['treg'][data['treg']['th1_id'] == 0]
        data['th1_not_treg'] = data['th1'][data['th1']['treg_id'] == 0]

        for k in sorted(data.keys()):
            subset = data[k]
            print k, len(subset)

            if k in celltypes or len(subset) < 1000: continue
            first_peak = k == 'all' and 'naive' or k.split('_')[0]
            subset['id'] = subset[first_peak + '_id']
            subset['start'] = subset[first_peak + '_start']
            subset['end'] = subset[first_peak + '_end']

            if k in ('treg_th1_shared', 'treg_not_th1', 'th1_not_treg'):
                yzer.run_homer(subset,
                               'four_way_venn_' + k,
                               motif_dirpath,
                               cpus=6,
                               center=True,
                               reverse=False,
                               preceding=False,
                               size=200,
                               length=[8, 10, 12, 15])
    data = yzer.import_file(filename)
    data = data.fillna(0)

    min_thresh = get_threshold(seq)
    data = data[data['tag_count'] >= min_thresh]

    fold = 2
    if True:
        # ATAC peaks that are absent in the FOXO1 KO
        foxo1_critical = data[
            data['foxo1_ko_naive_atac_tag_count'] < min_thresh]
        yzer.run_homer(foxo1_critical,
                       'foxo1_critical',
                       sample_dirpath,
                       cpus=10,
                       center=True,
                       reverse=False,
                       preceding=False,
                       size=200,
                       length=[8, 10, 12],
                       mock=True)
        print('foxo1_critical', len(foxo1_critical))

        # ATAC peaks that don't change with KO of Foxo1
        foxo1_independent = data[
            (data['tag_count'] * fold >= data['foxo1_ko_naive_atac_tag_count'])
            & (data['foxo1_ko_naive_atac_tag_count'] *
               fold >= data['tag_count'])]
        yzer.run_homer(foxo1_independent,
                       'foxo1_independent',
                       sample_dirpath,
                       cpus=10,
示例#13
0
                                 sample_prefix + '_enhancers.txt')

    data = yzer.import_file(filename)
    data = data.fillna(0)

    min_thresh = get_threshold(seq)
    data = data[data['tag_count'] >= min_thresh]

    fold = 2
    if True:
        naive_only = data[data['lcmv_d12_foxo1_tag_count'] < min_thresh]
        yzer.run_homer(naive_only,
                       'naive_only',
                       sample_dirpath,
                       cpus=10,
                       center=True,
                       reverse=False,
                       preceding=False,
                       size=200,
                       length=[8, 10, 12],
                       mock=True)
        print('naive_only', len(naive_only))

        shared = data[
            (data['tag_count'] * fold >= data['lcmv_d12_foxo1_tag_count'])
            & (data['lcmv_d12_foxo1_tag_count'] * fold >= data['tag_count'])]
        yzer.run_homer(shared,
                       'shared',
                       sample_dirpath,
                       cpus=10,
                       center=True,
                       reverse=False,
示例#14
0
    go_path = yzer.get_and_create_path(dirpath, 'with_me3', 'go_analysis',
                                       '0_8_min_lfc')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    data = data.fillna(0)
    data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0]

    if False:
        curr_path = yzer.get_and_create_path(dirpath, 'with_me3',
                                             'motif_analysis')

        yzer.run_homer(data,
                       'all_refseq_preceding',
                       curr_path,
                       center=False,
                       reverse=False,
                       preceding=True,
                       size=200,
                       cpus=6)
        yzer.run_homer(data,
                       'all_refseq',
                       curr_path,
                       center=False,
                       reverse=False,
                       preceding=False,
                       size=200,
                       cpus=6)
        yzer.run_homer(data,
                       'all_refseq',
                       curr_path,
                       center=True,
示例#15
0
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/H3K4me2/Analysis'
    dirpath = yzer.get_path(dirpath)
    motif_dirpath = yzer.get_filename(dirpath, 'motifs')
    filename = yzer.get_filename(dirpath, 'thio_peak_vectors.txt')
    data = yzer.import_file(filename)
    data = data.fillna(0)

    # me2
    if True:
        if False:
            yzer.run_homer(data,
                           'thio_all',
                           motif_dirpath,
                           cpus=6,
                           center=True,
                           reverse=False,
                           preceding=False,
                           size=200,
                           length=[8, 10, 12, 15])

        data = data[data['tss_id'] == 0]
        if True:
            yzer.run_homer(data,
                           'thio_h3k4me2_distal',
                           motif_dirpath,
                           cpus=6,
                           center=True,
                           reverse=False,
                           preceding=False,
                           size=200,
示例#16
0
        for condition in ('treg', 'itreg', 'activated'):
            cond_dirpath = yzer.get_filename(dirpath,
                                             '{}_{}'.format(condition, ab))

            if True:
                filename = yzer.get_filename(
                    cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab))

                data = yzer.import_file(filename)
                data = data.fillna(0)

                yzer.run_homer(data,
                               'all',
                               cond_dirpath,
                               cpus=6,
                               center=True,
                               reverse=False,
                               preceding=False,
                               size=200,
                               length=[8, 10, 12, 15],
                               mock=True)

        for condition in ('itreg', 'treg'):
            cond_dirpath = yzer.get_filename(dirpath,
                                             '{}_{}'.format(condition, ab))
            if False:
                filename = yzer.get_filename(
                    cond_dirpath, '{}_{}_enhancers.txt'.format(condition, ab))

                data = yzer.import_file(filename)
                data = data.fillna(0)
                       #('with_pu_1_kla_dex', data[data['tag_count_5'] >= 10]),
                       ('no_pu_1_kla_dex', data[data['tag_count_5'] < 10]),
                       ('gt_partner', data[data['tag_count'] > 1.2*data['tag_count_2']]),
                       #('lt_partner', data[data['tag_count']*1.2 < data['tag_count_2']]),
                       ('with_partner', data[data['tag_count_2'] >= 10]),
                       ('no_partner', data[data['tag_count_2'] < 10]),
                       #('down_in_dex', data[data['dex_1_lfc'] <= -1]),
                       #('down_in_kla_dex', data[data['kla_dex_1_lfc'] <= -1]),
                       #('down_in_kla', data[data['kla_1_lfc'] <= -1]),
                       #('up_in_dex', data[data['dex_1_lfc'] >= 1]),
                       #('up_in_kla_dex', data[data['kla_dex_1_lfc'] >= 1]),
                       #('up_in_kla', data[data['kla_1_lfc'] >= 1]),
                       #('transrepressed', data[(data['kla_1_lfc'] >= 1) & (data['dex_over_kla_1_lfc'] <= -.58)]),
                       #('up_in_dex_down_in_kla_dex', data[(data['dex_1_lfc'] >= 1) & (data['kla_dex_1_lfc'] - data['dex_1_lfc'] <= -.58)]),
                       ):
     # We have multiple copies of peaks if they align to different transcripts
     parent_path = yzer.get_and_create_path(motif_dirpath,  
                                          'peak_motifs_by_transcript_lfc',
                                          peak_type, super_name)
     curr_path = yzer.get_and_create_path(parent_path, name)
     
     # Group them after selecting those that we want
     dataset = dataset.groupby(['id','chr_name'],as_index=False).mean()
     
     if name != 'all': bg = yzer.get_filename(parent_path, 'all','all','all_regions_for_homer.txt')
     else: bg = None
     
     yzer.run_homer(dataset, name, curr_path, 
                     center=True, reverse=False, preceding=False, size=size,
                     cpus=6, bg=bg)
 
示例#18
0
                    dirpath, 'boxplots_non_refseq_by_p65',
                    'enhancer_like_lose_p65_{0}x_change_dsg_only.txt'.format(
                        ratio)))
            enhancers['glass_transcript_id'] = enhancers['id']

            # Limit to peaks and touching transcripts, then pull out peaks that intersect our enhancer set
            data = all_data[all_data['touches'] == 't']
            data = data.merge(enhancers,
                              how='right',
                              on='glass_transcript_id',
                              suffixes=['', 'trans'])
            curr_path = yzer.get_and_create_path(motif_dirpath,
                                                 'enhancer_like_lose_p65',
                                                 'ratio_{0}'.format(ratio))
            # Group them after selecting those that we want
            data = data.groupby(['id', 'chr_name'], as_index=False).mean()

            #bg = yzer.get_filename(motif_dirpath,
            #        'peak_motifs_by_transcript_lfc', 'p65_kla',
            #        'all','all','all','all_regions_for_homer.txt')

            yzer.run_homer(data,
                           'ratio_{0}'.format(ratio),
                           curr_path,
                           center=False,
                           reverse=False,
                           preceding=False,
                           size=size,
                           bg=None,
                           cpus=7)
示例#19
0
            filename = yzer.get_filename(sample_dirpath,
                                         sample_prefix + '_enhancers.txt')

            data = yzer.import_file(filename)
            data = data.fillna(0)

            min_thresh = get_threshold('atac')

            data = data[data['tag_count'] >= min_thresh]

            datasets[sample_prefix] = data

            if False:

                yzer.run_homer(data,
                               'all', sample_dirpath,
                               cpus=10, center=True, reverse=False, preceding=False,
                               size=200, length=[8, 10, 12], mock=True)
            if True:
                # Versus comparable sample in other breed
                subdata = data[
                    data['{}_tag_count'.format(oth_breed[1][j])] < min_thresh]
                yzer.run_homer(subdata,
                               'not_in_' + oth_breed[1][j], sample_dirpath,
                               cpus=10, center=True, reverse=False,
                               preceding=False,
                               size=200, length=[8, 10, 12], mock=True)

                # Versus hi/lo sample and prior sample if not naive.
                if j > 0:
                    if 'klrghi' in sample_prefix:
                        other = sample_prefix.replace('hi', 'lo')