Пример #1
0
@author: karmel

Scatterplots of H3K4me2 peak tag counts by GROseq score
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data'
    dirpath = yzer.get_path(dirpath)

    img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'me2_peaks_with_transcripts.txt'))
    data = data.fillna(0)
    data = data.groupby(by='id', as_index=True).mean()
    data['transcript_score'] = data['score(2)']
    ax = yzer.scatterplot(
        data,
        xcolname='transcript_score',
        ycolname='tag_count',
        log=True,
        title='H3K4me2 Tag Count as a Function of Transcript Score',
        xlabel='Glass Atlas Transcript Score',
        ylabel='Normalized H3K4me2 tag count',
        show_2x_range=True,
        plot_regression=True,
        show_count=True,
        show_correlation=True,
Пример #2
0
Created on Mar 4, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    dirpath_bmdc = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/BMDCs/Analysis_2013_03/'
    dirpath = yzer.get_path(dirpath)
    dirpath_bmdc = yzer.get_path(dirpath_bmdc)
    img_dirpath = yzer.get_and_create_path(dirpath, 'bmdc_vs_thiomac')
    thio = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    bmdc = yzer.import_file(
        yzer.get_filename(dirpath_bmdc, 'transcript_vectors.txt'))

    sets = []

    for data in (thio, bmdc):
        data = data.fillna(0)

        refseq = yzer.get_refseq(data)

        # Remove low tag counts
        #refseq = refseq[refseq['transcript_score'] >= 4]

        sets.append(refseq)
Пример #3
0
@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'peak_scatterplots')

    if True:
        for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65',
                                                                 'Dex')):
            data = yzer.import_file(
                yzer.get_filename(dirpath, 'motifs', 'from_peaks',
                                  '{0}_kla_dex_vectors.txt'.format(main)))

            data = data.fillna(0)
            data = data.groupby(['id', 'chr_name'], as_index=False).mean()

            xcolname, ycolname = 'tag_count_2', 'tag_count'  #'p65_kla_tag_count', 'p65_kla_dex_tag_count',
            data = data[data[ycolname] >= 10]

            cond_1 = (data['tag_count_3'] == 0)
            cond_2 = (data['tag_count_3'] > 0) & (data['tag_count_3'] <
                                                  data['tag_count_4'])
            cond_3 = (data['tag_count_3'] > 0) & (data['tag_count_3'] >=
                                                  data['tag_count_4'])
            ax = None
            for show_points in (True, False):
Пример #4
0
Note: Made font.weight = bold and axes.titlesize = 24, font.size = 16 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data'
    dirpath = yzer.get_path(dirpath)

    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'refseq_to_homer/large_gap_500bp')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'refseq_tag_counts_500bp.txt'))
    data['sum'] = nonzero(data['sum'].fillna(0))

    homer_data = yzer.import_file(
        yzer.get_filename(dirpath, 'RNA_GroSeq_CountsGenes.txt'))
    homer_data['sequence_identifier'] = homer_data['Gene ID']
    homer_data['homer_tag_count'] = nonzero(homer_data[
        'ThioMac-GroSeq-notx-110513/ genes (Total: 12166480.0) normFactor 0.82']
                                            .fillna(0))
    homer_data = homer_data[['sequence_identifier', 'homer_tag_count']]

    merged = data.merge(homer_data, how='inner', on='sequence_identifier')
    merged = merged.fillna(1)

    if True:
        ax = yzer.scatterplot(merged,
Пример #5
0
        'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Promoters'
    dirpath = yzer.get_path(dirpath)

    cond, seq, breed = ('naive', 'atac', '')
    wt_prefix = sample_name(cond, seq, breed)
    ko_prefix = sample_name(cond, seq, 'foxo1_ko_')

    wt_dirpath = yzer.get_filename(dirpath, wt_prefix)
    ko_dirpath = yzer.get_filename(dirpath, ko_prefix)

    wt_filename = yzer.get_filename(wt_dirpath,
                                    wt_prefix + '_promoters.txt')
    ko_filename = yzer.get_filename(ko_dirpath,
                                    ko_prefix + '_promoters.txt')

    wt_data = yzer.import_file(wt_filename)
    wt_data = wt_data.fillna(0)
    ko_data = yzer.import_file(ko_filename)
    ko_data = ko_data.fillna(0)

    min_thresh = get_threshold(seq)
    wt_data = wt_data[wt_data['tag_count'] >= min_thresh]
    ko_data = ko_data[ko_data['tag_count'] >= min_thresh]

    wt_only = wt_data[
        wt_data['foxo1_ko_naive_atac_tag_count'] < min_thresh]

    fold = 2
    both = wt_data[
        (wt_data['foxo1_ko_naive_atac_tag_count']
         * fold >= wt_data['tag_count']) &
Пример #6
0
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')

    kla_col = 'kla_lfc'

    tss_only = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'interactions_by_kla_lfc', tss_only and 'genic'
        or 'all_interactions', 'lfc_2')

    # File generated in novel_me2_sites
    enhancers = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'all_enhancers_with_me2_and_{0}interaction_stats.txt'.format(
                tss_only and 'tss_' or '')))

    for kla_timepoint in ('1h', ):
        enhancers['me2_ratio'] = nonzero(enhancers['me2_kla_6h_tag_count_2'])/\
                                    nonzero(enhancers['me2_notx_tag_count_2'])

        sets = OrderedDict()
        sets['4x GRO in KLA {0}'.format(kla_timepoint)] = enhancers[
            enhancers[kla_col] > 2]
        sets['No change GRO in KLA {0}'.format(kla_timepoint)] = enhancers[
            enhancers[kla_col].abs() <= 1]
        sets['1/4 GRO in KLA {0}'.format(kla_timepoint)] = enhancers[
            enhancers[kla_col] < -2]
Пример #7
0
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.misc.gr_project_2012.v1.enhancer_subsets_for_supershift import ucsc_link_cleanup
import numpy

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    peak_type = 'p65'
        
    img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_non_refseq_by_{0}'.format(peak_type))
    
    transcripts = yzer.import_file(yzer.get_filename(dirpath, 'motifs', 'transcript_vectors_with_nearby_peaks.txt'))
    
    
    if True:
        pu_1 = False
        for ratio in (1.5, 2, 3):
            data = transcripts[transcripts['refseq'] == 'f']
            data = data[data['has_infrastructure'] == 0]
            data = data[data['length'] < 6000]
            data = data[data['dex_1_lfc'] < 1]
            data = data[data['kla_1_lfc'] >= 1]
            data = data[data['gr_kla_dex_tag_count'] > 0]
            data = data[data['gr_fa_kla_dex_tag_count'] == 0]
            print len(data)
            if pu_1: data = data[data['pu_1_kla_tag_count']  + data['pu_1_kla_tag_count'] > 0]
            
Пример #8
0
        'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Enhancers_set2'
    dirpath = yzer.get_path(dirpath)

    save_path = yzer.get_and_create_path(
        dirpath, 'Figures', 'Enhancer_counts')

    datasets = {}
    breed_sets = get_breed_sets()
    for i, (samples, short_names) in enumerate(breed_sets):
        oth_breed = breed_sets[1 - i]
        for j, sample_prefix in enumerate(short_names):
            sample_dirpath = yzer.get_filename(dirpath, sample_prefix)
            filename = yzer.get_filename(sample_dirpath,
                                         sample_prefix + '_enhancers.txt')

            data = yzer.import_file(filename)
            data = data.fillna(0)

            min_thresh = get_threshold('atac')

            data = data[data['tag_count'] >= min_thresh]

            datasets[sample_prefix] = data

    # How many denovo d7 enhancers are also in foxo1 kos?
    for celltype in ('hi', 'lo'):
        d7 = datasets['klrg{}_d7'.format(celltype)]
        de_novo = d7[d7['d0_tag_count'] < min_thresh]

        all_shared = d7[
            'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] >= min_thresh
Пример #9
0
@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/ctcf_across_celltypes'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    dp = yzer.import_file(
        yzer.get_filename(dirpath, 'dp_with_thiomac_ctcf.txt')).fillna(0)
    thio = yzer.import_file(
        yzer.get_filename(dirpath, 'thiomac_with_dp_ctcf.txt')).fillna(0)

    # Get venn-diagram sets
    only_dp = dp[dp['thiomac_ctcf_tag_count'] == 0]
    only_thio = thio[thio['dp_ctcf_tag_count'] == 0]
    shared = dp[dp['thiomac_ctcf_tag_count'] != 0]
    shared_check = thio[thio['dp_ctcf_tag_count'] != 0]
    print len(only_dp), len(only_thio), len(shared), len(shared_check)

    data = shared.append(only_dp, ignore_index=True)
    data = data.append(only_thio, ignore_index=True)

    data['dp_nonzero'] = nonzero(data['dp_ctcf_tag_count'])
    data['thio_nonzero'] = nonzero(data['thiomac_ctcf_tag_count'])
Пример #10
0
'''
Created on May 10, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
import os
from matplotlib import pyplot

if __name__ == '__main__':
    grapher = SeqGrapher()

    dirpath = '/Users/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/ncRNA_josh/'
    filename = os.path.join(dirpath, 'refseq_predictions.tsv')
    evidence_f = os.path.join(dirpath, 'refseq_evidence.orf')
    data = grapher.import_file(filename)
    evidence = grapher.import_file(evidence_f)

    data['score_orf'] = evidence['score']
    data = data[data['score_orf'] < 200]

    data_coding = data[data['score'] >= 0]
    data_noncoding = data[data['score'] < 0]
    ax = grapher.scatterplot(data_coding,
                             'score_orf',
                             'score',
                             log=False,
                             color='blue',
                             label='Predicted Coding',
                             add_noise=False,
                             show_2x_range=False,
Пример #11
0
    df['enhancer_id'] = group['id_2'].mean()
    df['enhancer_lfc'] = group['p65_tag_count_2'].mean()
    if f_condition: df = df[f_condition(df)]
    return df


if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath, 'enhancer_rewiring_lfc',
                                           'p65_tags')

    interactions = yzer.import_file(
        yzer.get_filename(data_dirpath,
                          'transcript_pairs_refseq_with_me2.txt'))
    interactions = interactions[interactions['count'] > 1]

    transcripts = yzer.import_file(
        yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

    transcripts['kla_6h_rpbp'] = transcripts['kla_6h_tag_count'] / (
        transcripts['length']) * 1000
    transcripts['kla_rpbp'] = transcripts['kla_tag_count'] / (
        transcripts['length']) * 1000

    # Associate gene id
    interactions = interactions.merge(transcripts, how='left', on='id')

    transcripts['id_2'] = transcripts['id']
Пример #12
0
@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/peak_overlaps'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    peak = 'p300'
    th1 = yzer.import_file(
        yzer.get_filename(dirpath,
                          'th1_with_th2_{0}.txt'.format(peak))).fillna(0)
    th2 = yzer.import_file(
        yzer.get_filename(dirpath,
                          'th2_with_th1_{0}.txt'.format(peak))).fillna(0)
    th1_with_ctcf_motif = yzer.import_file(
        yzer.get_filename(dirpath, 'motifs', 'th_p300_enhancers_ctcf',
                          'th1_only_{0}_with_ctcf_motif.txt'.format(peak)))
    th2_with_ctcf_motif = yzer.import_file(
        yzer.get_filename(dirpath, 'motifs', 'th_p300_enhancers_ctcf',
                          'th2_only_{0}_with_ctcf_motif.txt'.format(peak)))
    shared_with_ctcf_motif = yzer.import_file(
        yzer.get_filename(dirpath, 'motifs', 'th_p300_enhancers_ctcf',
                          'th_shared_{0}_with_ctcf_motif.txt'.format(peak)))

    # Filter out promoters
Пример #13
0
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
import random

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'boxplots_nearby_genes_by_p65')

    change_type = 'more'  #'less'
    for ratio in (1.5, 2, 3):
        data = yzer.import_file(
            yzer.get_filename(dirpath, 'motifs',
                              'transcript_vectors_with_nearby_peaks.txt'))
        nearby = yzer.import_file(
            yzer.get_filename(
                img_dirpath,
                'nearest_genes_to_enhancer_like_{1}_p65_{0}x.txt'.format(
                    str(ratio).replace('.', '_'), change_type)))
        colname = 'dex_over_kla_1_lfc'

        pausing = True
        if pausing:
            colname = 'pausing_ratio_ratio'

            # We want previously calculated bucket scores,
            # Joined to old transcripts because we have since updated IDs
            bucket_scores = yzer.import_file(
Пример #14
0
We count the interactions connected to each transcript and
draw a boxplot. In order to most easily pull in all the 
zero-interaction enhancers, we load those with a separate query
and use the difference in count.
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/novel_enhancers'
    dirpath = yzer.get_path(dirpath)

    img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_by_me2_in_notx')

    data = {}
    data['all_enhancers_with_less_me2'] = yzer.import_file(
        yzer.get_filename(dirpath, 'all_enhancers_with_less_me2_in_notx.txt'))
    data['all_enhancers_with_me2'] = yzer.import_file(
        yzer.get_filename(dirpath, 'all_enhancers_with_me2_in_notx.txt'))
    data['interacting_in_notx_with_less_me2'] = yzer.import_file(
        yzer.get_filename(
            dirpath,
            'interacting_in_notx_enhancers_with_less_me2_in_notx.txt'))
    data['interacting_in_notx_with_me2'] = yzer.import_file(
        yzer.get_filename(
            dirpath, 'interacting_in_notx_enhancers_with_me2_in_notx.txt'))
    data['interacting_in_kla_30m_with_less_me2'] = yzer.import_file(
        yzer.get_filename(
            dirpath,
            'interacting_in_kla_30m_enhancers_with_less_me2_in_notx.txt'))
    data['interacting_in_kla_30m_with_me2'] = yzer.import_file(
        yzer.get_filename(
Пример #15
0
if __name__ == '__main__':
    enhancer_counts = True # Are we looking at enhancer interactions (False) or counts (True)?
    
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/enhancers_by_gene_length'
    dirpath = yzer.get_path(dirpath)
    
    img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots')

    counted = enhancer_counts and 'enhancer' or 'interaction'
    
    # The first set has length with interaction counts; 
    # the second has length for all transcripts, even those without interactions.
    # We want to merge such that we add the interaction-less genes with a count of 0.
    data = yzer.import_file(yzer.get_filename(dirpath,'{0}_counts_by_refseq.txt'.format(counted)))
    all_data = yzer.import_file(yzer.get_filename(dirpath,'refseq_all.txt'))
    all_data = all_data[~all_data['id'].isin(data['id'])]
    data = pandas.concat([data, all_data])
    data = data.reset_index().fillna(0)
    
    notx = data[data['sequencing_run_id'] == 765]
    kla_30m = data[data['sequencing_run_id'] == 766]
    kla_4h = data[data['sequencing_run_id'] == 773]
    no_intxns = data[data['sequencing_run_id'] == 0]
    
    # Zero won't show up in a log plot, so add one.
    no_intxns['count'] = 1
    
    
    ax = yzer.scatterplot(no_intxns, 
Пример #16
0
def ucsc_link_cleanup(data):
    data['ucsc_link_nod'] = data['ucsc_link_nod'].map(
        lambda x: '<a href={0} target="_blank">UCSC</a>'.format(
            x.replace('nod_balbc', 'gr_project_2012')))

    return data


if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    save_dirpath = yzer.get_and_create_path(dirpath,
                                            'subgroups_for_supershift')

    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt'))

    data = transcripts[transcripts['refseq'] == 'f']
    data = data[data['has_infrastructure'] == 0]
    data = data[data['length'] < 6000]
    data = data[data['dex_1_lfc'] < 1]
    data = data[data['kla_1_lfc'] >= 1]

    data = data.fillna(0)

    data = ucsc_link_cleanup(data)

    if False:
        # First get sets for Negative controls
        tfs = ['p65', 'pu_1', 'gr', 'gr_fa']
        for tf in tfs:
Пример #17
0
@author: karmel

Note: Made font.weight = normal and axes.titlesize = 24 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.dataanalysis.misc.demoatlas.rpkm_to_score import PrettyAxisGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/Post-gene'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'post_gene_transcripts.txt'))
    refseq = yzer.import_file(
        yzer.get_filename(dirpath, 'all_expressed_refseq.txt'))

    refseq_with_runoff = refseq[refseq['id'].isin(data['gene_id'])]
    refseq_no_runoff = refseq[~refseq['id'].isin(data['gene_id'])]
    if False:
        print len(refseq_no_runoff)
        print refseq_no_runoff.tail(100).to_string()

    # Calculate length of runoff
    data[
        'length'] = data['transcription_end'] - data['transcription_start'] + 1
    data['gene_length'] = data['gene_end'] - data['gene_start'] + 1

    # What might be correlated with length of runoff?
kla_col = 'kla_6h_lfc'

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'genes_to_average_enhancer_lfc')

    keys = ('all', 'notx', 'kla', 'notx_only', 'kla_only', 'shared_enh')

    if True:
        interactions = yzer.import_file(
            yzer.get_filename(data_dirpath,
                              'transcript_pairs_refseq_with_me2.txt'))
        interactions = interactions[interactions['count'] > 1]

        all_transcripts = yzer.import_file(
            yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

        transcripts = all_transcripts[['id', 'kla_lfc', 'kla_6h_lfc']]

        # Associate gene id
        interactions = interactions.merge(transcripts, how='left', on='id')

        transcripts['id_2'] = transcripts['id']
        transcripts = transcripts.drop(['id'], axis=1)
        interactions = interactions.merge(transcripts,
                                          how='left',
Пример #19
0
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
import numpy

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath, 'gene_enhancer_me2_lfc',
                                           'scatterplots')

    interactions = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt'
        ))
    interactions = interactions[interactions['count'] > 1]
    all_transcripts = yzer.import_file(
        yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

    for me2_timepoint in ('6h', '24h'):
        me2_col = 'me2_{0}_ratio'.format(me2_timepoint)
        kla_col = 'kla_lfc'
        col_set = [me2_col + '_2', kla_col + '_2', kla_col, me2_col]

        interactions[me2_col] = numpy.log2(nonzero(interactions['me2_kla_{0}_tag_count'.format(me2_timepoint)])/\
                                            nonzero(interactions['me2_notx_tag_count']))
        interactions[me2_col + '_2'] = numpy.log2(nonzero(interactions['me2_kla_{0}_tag_count_2'.format(me2_timepoint)])/\
                                            nonzero(interactions['me2_notx_tag_count_2']))
Пример #20
0
@author: karmel

Note: Made font.weight = bold and axes.titlesize = 24, font.size = 16 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/vs_homer'
    dirpath = yzer.get_path(dirpath)

    img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'tag_count_by_refseq.txt'))
    data['sum'] = nonzero(data['sum'].fillna(0))

    homer_data = yzer.import_file(
        yzer.get_filename(dirpath, 'RNA_GroSeq_CountsGenes.txt'))
    homer_data['sequence_identifier'] = homer_data['Gene ID']
    homer_data['homer_tag_count'] = nonzero(homer_data[
        'ThioMac-GroSeq-notx-110513/ genes (Total: 12166480.0) normFactor 0.82']
                                            .fillna(0))
    homer_data = homer_data[['sequence_identifier', 'homer_tag_count']]

    merged = data.merge(homer_data, how='inner', on='sequence_identifier')
    merged = merged.fillna(1)

    if True:
        ax = yzer.scatterplot(merged,
Created on Oct 8, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot
from glasslab.dataanalysis.misc.gr_project_2012.boxplots_redistribution_pairs import get_high_quality_pairs

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    motif_dirpath = yzer.get_filename(dirpath, 'motifs', 'from_peaks')

    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt'))
    transcripts['glass_transcript_id'] = transcripts['id']

    if True:
        all_data = yzer.import_file(
            yzer.get_filename(
                dirpath, 'redistribution',
                'p65_peaks_bigger_in_kla_dex_with_nearby_bigger_kla_peaks.txt')
        )

        data = get_high_quality_pairs(all_data, transcripts)

        data = data.groupby(['id', 'chr_name'], as_index=False).mean()

        gr_dex_gt_kla_dex = sum(
            data['tag_count_3'] * 1.5 < data['tag_count_4'])
Пример #22
0
Created on Feb 14, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/ctcf_stat1_overlap'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'ctcf_with_stat1_binding.txt')).fillna(0)
    with_stat1 = data[data['p2_tag_count'] > 0]
    without_stat1 = data[data['p2_tag_count'] == 0]

    if True:
        ax = yzer.piechart(
            [len(with_stat1), len(without_stat1)],
            ['CTCF sites with STAT1', 'CTCF sites without STAT1'],
            title='DP Thymocyte CTCF Sites with STAT1 in Th1 Cells',
            save_dir=img_dirpath,
            show_plot=True)
    data['tag_count_nonzero'] = nonzero(data['tag_count'])
    data['p2_tag_count_nonzero'] = nonzero(data['p2_tag_count'])
    ax = yzer.scatterplot(
        data,
        'tag_count_nonzero',
Пример #23
0
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/cpg_island_promoters'
    dirpath = yzer.get_path(dirpath)

    for rep in (4, 3, 1):
        img_dirpath = yzer.get_and_create_path(dirpath,
                                               'boxplots_by_expression',
                                               'genes_with_gr',
                                               'rep{0}'.format(rep),
                                               'transrepressed')

        data = yzer.import_file(
            yzer.get_filename(dirpath, 'transcript_vectors.txt'))
        data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
            lambda s: s.replace('nod_balbc', 'gr_project_2012'))
        data = data.fillna(0)

        data = data[(data['kla_{0}_lfc'.format(rep)] >= 1)
                    & (data['dex_over_kla_{0}_lfc'.format(rep)] <= -.58)]

        # 2006
        secondary_response = data[data['gene_names'].isin([
            '{Il12b}', '{Il6}', '{Nos2}', '{Mx1}', '{Mx2}', '{Marco}',
            '{Cmpk2}', '{Rsad2}'
        ])]
        delayed = data[data['gene_names'].isin([
            '{Ccl5}', '{Saa3}', '{Ifnb1}', '{Ccl2}', '{Ifit1}', '{Ifit3}',
            '{Peli1}', '{Cxcl10}', '{Traf1}'
Пример #24
0
if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')

    kla_col = 'kla_lfc'

    tss_only = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'novel_me2_sites', tss_only and 'genic' or 'all_interactions',
        'ratio_10')

    if False:
        enhancers = yzer.import_file(
            yzer.get_filename(data_dirpath,
                              'all_distal_enhancers_inc_me2.txt'))

        all_transcripts = yzer.import_file(
            yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))
        transcripts = all_transcripts[['id', kla_col]]
        enhancers = enhancers.merge(transcripts, how='left', on='id')

        if tss_only:
            interactions = yzer.import_file(
                yzer.get_filename(data_dirpath,
                                  'transcript_pairs_refseq_with_me2.txt'))
        else:
            interactions = yzer.import_file(
                yzer.get_filename(
                    data_dirpath,
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.misc.gr_project_2012.v1.elongation import total_tags_per_run

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification'
    dirpath = yzer.get_path(dirpath)

    consistent = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'boxplots_by_expression', consistent and 'consistent'
        or 'rep1')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
        lambda s: s.replace('nod_balbc', 'gr_project_2012'))

    draw_pies = True
    min_tags = 30
    ratio = 1.5
    # Make sure we have dimethyl
    data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags]
    data = data[data['minimal_distance'] >= 1000]

    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    transcripts['nearest_refseq_transcript_id'] = transcripts['id']
    data = data.merge(transcripts,
                      how='left',
Пример #26
0
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/peak_overlaps'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    peak_pretty = 'p300'
    peak = peak_pretty.lower()
    th1 = yzer.import_file(
        yzer.get_filename(dirpath,
                          'th1_with_th2_{0}.txt'.format(peak))).fillna(0)
    th2 = yzer.import_file(
        yzer.get_filename(dirpath,
                          'th2_with_th1_{0}.txt'.format(peak))).fillna(0)

    # Filter out promoters
    th1 = th1[th1['tss_id'] == 0]
    th2 = th2[th2['tss_id'] == 0]

    # Get venn-diagram sets
    only_th1 = th1[th1['p2_id'] == 0]
    only_th2 = th2[th2['p2_id'] == 0]
    shared = th1[th1['p2_id'] != 0]
    shared_check = th2[th2['p2_id'] != 0]
    print len(only_th1), len(only_th2), len(shared), len(shared_check)
Пример #27
0
Created on Sep 7, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher


if __name__ == '__main__':
    grapher = SeqGrapher()
    base_dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    base_dirpath = grapher.get_path(base_dirpath)
    dirpath = grapher.get_filename(base_dirpath, 'motifs')
    filename = grapher.get_filename(dirpath, 'transcript_vectors.txt')
    
    data = grapher.import_file(filename)
    
    
    # Boxplots for gr_dex peaks by lfc in Dex
    if False:
        #data = data[data['distal'] == 't']
        data = data[data['has_refseq'] == 1]
        
        down = data[data['dex_1_lfc'] <= -1]
        up = data[data['dex_1_lfc'] >= 1]
        nc = data[abs(data['dex_1_lfc']) < 1]
        
        key = 'p65_kla_tag_count'
        datasets = [down[key],nc[key],up[key]]
        datasets = [d['p65_kla_dex_tag_count'] - d[key] for d in [down, nc, up]]
        
Пример #28
0
Note: Made font.weight = bold and axes.titlesize = 24 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import pandas_min
from glasslab.dataanalysis.misc.demoatlas.rpkm_to_score import PrettyAxisGrapher


if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Post_gene_transcripts'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots')

    data = yzer.import_file(yzer.get_filename(dirpath,'within_1kb_gap_500bp_with_nc.txt'))
    refseq = yzer.import_file(yzer.get_filename(dirpath,'expressed_refseq_gap_500bp.txt'))
    
    refseq_with_runoff = refseq[refseq['id'].isin(data['gene_id'])]
    refseq_no_runoff = refseq[~refseq['id'].isin(data['gene_id'])]
    if True:
        print len(refseq_no_runoff)
        print refseq_no_runoff.tail(100).to_string()
    
    # Calculate length of runoff
    data['length'] = data['transcription_end'] - data['transcription_start'] + 1
    data['gene_length'] = data['gene_end'] - data['gene_start'] + 1
    
    # What might be correlated with length of runoff?
    if False:
        yzer.scatterplot(data, 'gene_length', 'length', log=True)
Пример #29
0
'''
Created on Jan 30, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from collections import OrderedDict

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_rewiring_lfc')
    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'enhancer_sets', 'transcript_vectors.txt'))

    sets = OrderedDict((
        ('all',
         yzer.import_file(yzer.get_filename(data_dirpath, 'all_vectors.cdt'))),
        #('all_6h', yzer.import_file(yzer.get_filename(data_dirpath,'kla_6h','all_vectors.cdt'))),
        ('rewired',
         yzer.import_file(
             yzer.get_filename(data_dirpath, 'rewired_vectors.cdt'))),
        #('rewired_6h', yzer.import_file(yzer.get_filename(data_dirpath,'kla_6h','rewired_vectors.cdt'))),
        ('shared',
         yzer.import_file(yzer.get_filename(data_dirpath,
                                            'shared_vectors.cdt'))),
    ))

    for key, val in sets.items():
Пример #30
0
'''
Created on Oct 26, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'cpg_island_promoters',
                                           'piecharts', 'by_genes_with_gr')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'enhancer_classification',
                          'enhancers_with_nearest_gene.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
        lambda s: s.replace('nod_balbc', 'gr_project_2012'))

    min_tags = 30
    # Make sure we have dimethyl
    data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags]
    data = data[data['minimal_distance'] >= 1000]

    #data = yzer.collapse_strands(data)

    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'cpg_island_promoters',
                          'transcript_vectors.txt'))
    transcripts['nearest_refseq_transcript_id'] = transcripts['id']