Exemplo n.º 1
0
            'enhancer_lfc': Series(),
            'kla_4h_lfc': Series(kla_4h[kla_col], index=range(len(kla_4h))),
            'notx_lfc': Series(notx[kla_col], index=range(len(notx))),
        }, )
    df['enhancer_id'] = group['id_2'].mean()
    df['enhancer_lfc'] = group['p65_tag_count_2'].mean()
    if f_condition: df = df[f_condition(df)]
    return df


if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath, 'enhancer_rewiring_lfc',
                                           'p65_tags')

    interactions = yzer.import_file(
        yzer.get_filename(data_dirpath,
                          'transcript_pairs_refseq_with_me2.txt'))
    interactions = interactions[interactions['count'] > 1]

    transcripts = yzer.import_file(
        yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

    transcripts['kla_6h_rpbp'] = transcripts['kla_6h_tag_count'] / (
        transcripts['length']) * 1000
    transcripts['kla_rpbp'] = transcripts['kla_tag_count'] / (
        transcripts['length']) * 1000

    # Associate gene id
@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.misc.gr_project_2012.v1.elongation import total_tags_per_run

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification'
    dirpath = yzer.get_path(dirpath)

    consistent = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'boxplots_by_expression', consistent and 'consistent'
        or 'rep1')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
        lambda s: s.replace('nod_balbc', 'gr_project_2012'))

    draw_pies = True
    min_tags = 30
    ratio = 1.5
    # Make sure we have dimethyl
    data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags]
    data = data[data['minimal_distance'] >= 1000]

    transcripts = yzer.import_file(
Exemplo n.º 3
0
'''
Created on Mar 4, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    dirpath_bmdc = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/BMDCs/Analysis_2013_03/'
    dirpath = yzer.get_path(dirpath)
    dirpath_bmdc = yzer.get_path(dirpath_bmdc)
    img_dirpath = yzer.get_and_create_path(dirpath, 'bmdc_vs_thiomac')
    thio = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    bmdc = yzer.import_file(
        yzer.get_filename(dirpath_bmdc, 'transcript_vectors.txt'))

    sets = []

    for data in (thio, bmdc):
        data = data.fillna(0)

        refseq = yzer.get_refseq(data)

        # Remove low tag counts
        #refseq = refseq[refseq['transcript_score'] >= 4]
Exemplo n.º 4
0
Created on Mar 11, 2013

@author: karmel

Note: Made font.weight = bold and axes.titlesize = 24, font.size = 16 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data'
    dirpath = yzer.get_path(dirpath)

    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'refseq_to_homer/large_gap_500bp')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'refseq_tag_counts_500bp.txt'))
    data['sum'] = nonzero(data['sum'].fillna(0))

    homer_data = yzer.import_file(
        yzer.get_filename(dirpath, 'RNA_GroSeq_CountsGenes.txt'))
    homer_data['sequence_identifier'] = homer_data['Gene ID']
    homer_data['homer_tag_count'] = nonzero(homer_data[
        'ThioMac-GroSeq-notx-110513/ genes (Total: 12166480.0) normFactor 0.82']
                                            .fillna(0))
    homer_data = homer_data[['sequence_identifier', 'homer_tag_count']]

    merged = data.merge(homer_data, how='inner', on='sequence_identifier')
    merged = merged.fillna(1)
Exemplo n.º 5
0
@author: karmel

Plot supplementary figure showing Hah et al error rates 
against MAX EDGE values when Vespucci is built without knowledge
of RefSeq boundaries.
 
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/no_refseq'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'plots')

    ax = yzer.set_up_plot()
    title = 'Benchmarking without Foreknowledge of RefSeq'
    yzer.add_title(title, ax)
    yzer.add_axis_labels('MAX_EDGE value',
                         'Error rate defined by Hah et al. (%)')

    max_edges = [100, 500, 1000, 4000, 5000, 5500, 6000, 10000]
    error_rates = [
        0.388551822833, 0.372390444765, 0.263807982126, 0.124663089396,
        0.121784970634, 0.121807917409, 0.123263849815, 0.142530838464
    ]
    error_pcts = [e * 100 for e in error_rates]
    yzer.plot(max_edges, error_pcts, '-o')
    yzer.save_plot_with_dir(save_dir=img_dirpath, title=title)
Exemplo n.º 6
0
@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.misc.gr_project_2012.v1.enhancer_subsets_for_supershift import ucsc_link_cleanup
import numpy

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    peak_type = 'p65'
        
    img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_non_refseq_by_{0}'.format(peak_type))
    
    transcripts = yzer.import_file(yzer.get_filename(dirpath, 'motifs', 'transcript_vectors_with_nearby_peaks.txt'))
    
    
    if True:
        pu_1 = False
        for ratio in (1.5, 2, 3):
            data = transcripts[transcripts['refseq'] == 'f']
            data = data[data['has_infrastructure'] == 0]
            data = data[data['length'] < 6000]
            data = data[data['dex_1_lfc'] < 1]
            data = data[data['kla_1_lfc'] >= 1]
            data = data[data['gr_kla_dex_tag_count'] > 0]
            data = data[data['gr_fa_kla_dex_tag_count'] == 0]
            print len(data)
Exemplo n.º 7
0
'''
Created on Oct 26, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'piecharts_for_genes_by_mechanism')
    
    data = yzer.import_file(yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(lambda s: s.replace('nod_balbc','gr_project_2012'))
    
    draw_pies = True
    min_tags = 30
    ratio = 1.5
    # Make sure we have dimethyl
    data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags]
    data = data[data['minimal_distance'] >= 1000]
    
    #data = yzer.collapse_strands(data)
    
    transcripts = yzer.import_file(yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    transcripts['nearest_refseq_transcript_id'] = transcripts['id']
    # Join, keeping all transcripts
    data = data.merge(transcripts, how='left', on='nearest_refseq_transcript_id', suffixes=['','_trans'])
    
Exemplo n.º 8
0
from random import shuffle

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/hic_domains'
    dirpath = yzer.get_path(dirpath)

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
        lambda s: s.replace('nod_balbc', 'gr_project_2012'))
    data = data.fillna(0)

    for rep in (4, 3, 1):
        img_dirpath = yzer.get_and_create_path(dirpath,
                                               'fold_change_per_domain',
                                               'all_transcripts',
                                               'rep{0}'.format(rep))

        kla_key = 'kla_{0}_lfc'.format(rep)
        dex_kla_key = 'dex_over_kla_{0}_lfc'.format(rep)

        shuffled = data['domain_id'].values.copy()
        shuffle(shuffled)
        data['shuffled_domain_id'] = shuffled

        data['up_in_kla'] = data[kla_key] > 1
        data['repressed'] = data[dex_kla_key] <= -.58
        data['transrepressed'] = (data[kla_key] > 1) & (data[dex_kla_key] <=
                                                        -.58)
        data['count'] = ~data[kla_key].isnull()
For those, we will sort genes in each condition by number
of interactions, and allow for null values when there is a number
mismatch.
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
import numpy

kla_col = 'kla_6h_lfc'

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'genes_to_average_enhancer_lfc')

    keys = ('all', 'notx', 'kla', 'notx_only', 'kla_only', 'shared_enh')

    if True:
        interactions = yzer.import_file(
            yzer.get_filename(data_dirpath,
                              'transcript_pairs_refseq_with_me2.txt'))
        interactions = interactions[interactions['count'] > 1]

        all_transcripts = yzer.import_file(
            yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

        transcripts = all_transcripts[['id', 'kla_lfc', 'kla_6h_lfc']]

        # Associate gene id
Exemplo n.º 10
0
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher


def ucsc_link_cleanup(data):
    data['ucsc_link_nod'] = data['ucsc_link_nod'].map(
        lambda x: '<a href={0} target="_blank">UCSC</a>'.format(
            x.replace('nod_balbc', 'gr_project_2012')))

    return data


if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    save_dirpath = yzer.get_and_create_path(dirpath,
                                            'subgroups_for_supershift')

    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt'))

    data = transcripts[transcripts['refseq'] == 'f']
    data = data[data['has_infrastructure'] == 0]
    data = data[data['length'] < 6000]
    data = data[data['dex_1_lfc'] < 1]
    data = data[data['kla_1_lfc'] >= 1]

    data = data.fillna(0)

    data = ucsc_link_cleanup(data)

    if False:
Exemplo n.º 11
0
'''
Created on Oct 1, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_from_p65_gr')

    if True:
        for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65',
                                                                 'Dex')):
            data = yzer.import_file(
                yzer.get_filename(dirpath, 'motifs', 'from_peaks',
                                  '{0}_kla_dex_vectors.txt'.format(main)))

            data = data.fillna(0)
            data = data.groupby(['id', 'chr_name'], as_index=False).mean()
            data = data[data['tag_count_2'] > 0]

            colname = 'tag_count_diff'
            data[colname] = (data['tag_count'] -
                             data['tag_count_2']) / data['tag_count']

            cond_1 = (data['tag_count_3'] == 0)
            cond_2 = (data['tag_count_3'] > 0) & (data['tag_count_3'] <
Exemplo n.º 12
0
'''
Created on Apr 19, 2013

@author: karmel

Note: Made font.weight = bold and axes.titlesize = 24 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figure_4_pie_charts')

    yzer.legend_location = 'lower left'
    pie1 = '''Annotated by RefSeq and/or ncRNA.org    16,945
Unannotated    36,578'''
    pie1 = [row.split('    ') for row in pie1.split('\n')]
    pie1 = zip(*pie1)
    yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]),
                  pie1[0],
                  title='Transcripts with Score >= 2',
                  save_dir=img_dirpath,
                  show_plot=True)

    pie2 = '''Promoter-associated RNA    6,314
Antisense  of RefSeq    5,604
Post-TTS, same-strand    6,940
Other RefSeq Proximal    3,119
Exemplo n.º 13
0
'''
Created on Oct 26, 2012

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification'
    dirpath = yzer.get_path(dirpath)

    consistent = True
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'piecharts_by_mechanism', consistent and 'consistent'
        or 'by_genes')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
        lambda s: s.replace('nod_balbc', 'gr_project_2012'))

    draw_pies = True
    min_tags = 30
    ratio = 1.5
    # Make sure we have dimethyl
    data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags]
    data = data[data['minimal_distance'] >= 1000]

    #data = yzer.collapse_strands(data)
Exemplo n.º 14
0
'''
Created on Sep 27, 2014

@author: karmel
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.dataanalysis.misc.rodrigo.samples import get_threshold,\
    get_breed_sets
if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/' +\
        'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Enhancers_set2'
    dirpath = yzer.get_path(dirpath)

    save_path = yzer.get_and_create_path(
        dirpath, 'Figures', 'Enhancer_counts')

    datasets = {}
    breed_sets = get_breed_sets()
    for i, (samples, short_names) in enumerate(breed_sets):
        oth_breed = breed_sets[1 - i]
        for j, sample_prefix in enumerate(short_names):
            sample_dirpath = yzer.get_filename(dirpath, sample_prefix)
            filename = yzer.get_filename(sample_dirpath,
                                         sample_prefix + '_enhancers.txt')

            data = yzer.import_file(filename)
            data = data.fillna(0)

            min_thresh = get_threshold('atac')
Exemplo n.º 15
0
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from random import shuffle

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/hic_domains'
    dirpath = yzer.get_path(dirpath)

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
        lambda s: s.replace('nod_balbc', 'gr_project_2012'))
    data = data.fillna(0)

    for rep in (4, 3, 1):
        img_dirpath = yzer.get_and_create_path(dirpath, 'lfc_histograms',
                                               'rep{0}'.format(rep))

        kla_key = 'kla_{0}_lfc'.format(rep)
        dex_kla_key = 'dex_over_kla_{0}_lfc'.format(rep)

        data = data[data[kla_key] >= 1]

        shuffled = data['domain_id'].values.copy()
        shuffle(shuffled)
        data['shuffled_domain_id'] = shuffled

        grouped = data.groupby(by='domain_id', as_index=False).mean()
        shuffled_grouped = data.groupby(by='shuffled_domain_id',
                                        as_index=False).mean()

        grouped = grouped[grouped['domain_id'] != 0]
Exemplo n.º 16
0
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from collections import OrderedDict

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')

    kla_col = 'kla_lfc'

    tss_only = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'novel_me2_sites', tss_only and 'genic' or 'all_interactions',
        'ratio_10')

    if False:
        enhancers = yzer.import_file(
            yzer.get_filename(data_dirpath,
                              'all_distal_enhancers_inc_me2.txt'))

        all_transcripts = yzer.import_file(
            yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))
        transcripts = all_transcripts[['id', kla_col]]
        enhancers = enhancers.merge(transcripts, how='left', on='id')

        if tss_only:
            interactions = yzer.import_file(
                yzer.get_filename(data_dirpath,
Exemplo n.º 17
0
'''
Created on Feb 15, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'srf_ko_targets')
    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))

    data = data.fillna(0)
    data = data[data[['nod_notx_1h_tag_count', 'balb_notx_1h_tag_count']].max(
        axis=1) >= 10]
    data = data[(data['has_refseq'] == 1) & (data['transcript_score'] >= 4)]

    # From Amy Sullivan SRF paper
    down_in_srf_ko = [
        'Cnn2', 'Srf', 'Lima1', 'Rhoj', 'Coro1a', 'Il1rn', 'Lsp1', 'LOC277203',
        'Vcl', 'Card11', 'Cbr2', 'Cd83', 'Acta2', 'Actb', 'Tspan7', 'Ebi2',
        'Gpr162', 'Ckb', 'Dhcr24', 'LOC638632', 'Actg2', 'Trim29', 'Ppap2b',
        'Klk1b11', 'Actc1', 'Pcp4l1', 'LOC621324', 'Cdkn1c', 'Slco2b1',
        'Cd24a', 'Pdgfa', 'Lrrc58', 'Dnmt3a', 'Slamf9', '1100001H23Rik',
        'Aldoc', 'Cd28', '1500003O03Rik', 'Rab15', 'Pld4', 'Pilra', 'Xlr',
        'Tgm1', 'Lcp1', 'Fstl1', 'Slc40a1', 'Usp24', 'Jup', 'Cd74', 'Tpm4',
Exemplo n.º 18
0
'''
Created on Feb 12, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/peak_overlaps'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'figures')

    peak_pretty = 'p300'
    peak = peak_pretty.lower()
    th1 = yzer.import_file(
        yzer.get_filename(dirpath,
                          'th1_with_th2_{0}.txt'.format(peak))).fillna(0)
    th2 = yzer.import_file(
        yzer.get_filename(dirpath,
                          'th2_with_th1_{0}.txt'.format(peak))).fillna(0)

    # Filter out promoters
    th1 = th1[th1['tss_id'] == 0]
    th2 = th2[th2['tss_id'] == 0]

    # Get venn-diagram sets
Exemplo n.º 19
0

def get_filters_transcript(subdata, xcol, ycol):
    down_in_kla = subdata['kla_1_lfc'] <= -1
    nc_in_kla = subdata['kla_1_lfc'].abs() < 1
    up_in_kla = subdata['kla_1_lfc'] >= 1 & (subdata['dex_over_kla_1_lfc'] >
                                             -.58)
    trans = up_in_kla & (subdata['dex_over_kla_1_lfc'] <= -.58)
    return down_in_kla, nc_in_kla, up_in_kla, trans


if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'bargraphs_from_p65_gr')

    transcripts = yzer.import_file(
        yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt'))
    transcripts['glass_transcript_id'] = transcripts['id']

    if True:
        for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65',
                                                                 'Dex')):
            data = yzer.import_file(
                yzer.get_filename(dirpath, 'motifs', 'from_peaks',
                                  '{0}_kla_dex_vectors.txt'.format(main)))

            data = data.merge(transcripts,
                              how='left',
                              on='glass_transcript_id',
@author: karmel

Do novel interactions gain or lose me2? 
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'novel_enhancer_me2_change',
                                           'all_interactions')

    interactions = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt'
        ))
    interactions = interactions[interactions['count'] > 1]

    interactions = interactions.fillna(0)

    # Key on peak id, not enhancer id, which could be bidirectional
    #interactions['id_2'] = interactions['h3k4me2_id']
    interactions['hash'] = interactions.apply(
        lambda row: '{0}.{1}'.format(row['id'], row['id_2']), axis=1)
Exemplo n.º 21
0
'''
Created on Oct 1, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from matplotlib import pyplot
import numpy

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'piecharts_from_p65_gr')

    if True:
        for main, compare, basal_cond in (
            ('GR', 'p65', 'Dex'),
            ('p65', 'GR', 'KLA'),
        ):
            data = yzer.import_file(
                yzer.get_filename(dirpath, 'motifs', 'from_peaks',
                                  '{0}_kla_dex_vectors.txt'.format(main)))

            # Get nearby peaks first
            ids_with_nearby = data[
                (data['distance_to_tss_2'].isnull() == False)
                & (data['distance_to_peak_2'] <= 1000)]['id']

            data = data.fillna(0)
@author: karmel

Do novel interactions gain or lose me2? 
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath,
                                           'novel_interactions_kla_lfc',
                                           'all_interactions')

    interactions = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt'
        ))
    interactions = interactions[interactions['count'] > 1]

    all_transcripts = yzer.import_file(
        yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

    kla_col = 'kla_lfc'

    transcripts = all_transcripts[['id', kla_col]]
Exemplo n.º 23
0
@author: karmel

Do the gene groups outlined in Ramirez-Carrozzi 2006 and 2009 correlate
with expression changes in Dex+KLA?
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/cpg_island_promoters'
    dirpath = yzer.get_path(dirpath)

    for rep in (4, 3, 1):
        img_dirpath = yzer.get_and_create_path(dirpath,
                                               'boxplots_by_expression',
                                               'genes_with_gr',
                                               'rep{0}'.format(rep),
                                               'transrepressed')

        data = yzer.import_file(
            yzer.get_filename(dirpath, 'transcript_vectors.txt'))
        data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(
            lambda s: s.replace('nod_balbc', 'gr_project_2012'))
        data = data.fillna(0)

        data = data[(data['kla_{0}_lfc'.format(rep)] >= 1)
                    & (data['dex_over_kla_{0}_lfc'.format(rep)] <= -.58)]

        # 2006
        secondary_response = data[data['gene_names'].isin([
            '{Il12b}', '{Il6}', '{Nos2}', '{Mx1}', '{Mx2}', '{Marco}',
Exemplo n.º 24
0
'''
Created on Feb 12, 2013

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer

if __name__ == '__main__':
    yzer = SeqGrapher()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'srf_binding')
    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))

    data = data.fillna(0)
    data = data[data[['nod_notx_1h_tag_count', 'balb_notx_1h_tag_count']].max(
        axis=1) >= 10]

    subsets = [
        data,
        data[(data['has_refseq'] == 1) & (data['transcript_score'] >= 4)],
        data[(data['distal'] == 't') & (data['h3k4me2_tag_count'] > 10)]
    ]

    # Add in nearest genes for enhancers
    enh = subsets[2].copy()
    nearest_genes = yzer.import_file(
Exemplo n.º 25
0
    wt_only = wt_data[
        wt_data['foxo1_ko_naive_atac_tag_count'] < min_thresh]

    fold = 2
    both = wt_data[
        (wt_data['foxo1_ko_naive_atac_tag_count']
         * fold >= wt_data['tag_count']) &
        (wt_data['tag_count'] * fold >=
         wt_data['foxo1_ko_naive_atac_tag_count'])
    ]

    ko_only = ko_data[
        ko_data['naive_atac_tag_count'] < min_thresh]

    save_path = yzer.get_and_create_path(
        dirpath, 'Figures', 'Foxo1_group_promoters_overlaps')

    groups = [wt_only, both, ko_only]
    labels = ['WT only', 'WT and KO', 'Foxo1 KO only']

    if True:
        yzer.boxplot([gp['naive_foxo1_tag_count'] for gp in groups],
                     labels,
                     title='Foxo1 tags in ATAC-seq regions by group',
                     ylabel='Foxo1 peak tag count', save_dir=save_path,
                     show_plot=False)
        yzer.boxplot([gp['lcmv_d12_foxo1_tag_count'] for gp in groups],
                     labels,
                     title='LCMV d12 Foxo1 tags in ATAC-seq regions by group',
                     ylabel='Foxo1 peak tag count', save_dir=save_path,
                     show_plot=False)
Exemplo n.º 26
0
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
from collections import OrderedDict

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')

    kla_col = 'kla_lfc'

    tss_only = False
    img_dirpath = yzer.get_and_create_path(
        dirpath, 'interactions_by_kla_lfc', tss_only and 'genic'
        or 'all_interactions', 'lfc_2')

    # File generated in novel_me2_sites
    enhancers = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'all_enhancers_with_me2_and_{0}interaction_stats.txt'.format(
                tss_only and 'tss_' or '')))

    for kla_timepoint in ('1h', ):
        enhancers['me2_ratio'] = nonzero(enhancers['me2_kla_6h_tag_count_2'])/\
                                    nonzero(enhancers['me2_notx_tag_count_2'])

        sets = OrderedDict()
        sets['4x GRO in KLA {0}'.format(kla_timepoint)] = enhancers[
Exemplo n.º 27
0
'''
Created on Apr 19, 2013

@author: karmel

Note: Made font.weight = bold and axes.titlesize = 24 in matplotlibrc
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'hg19_mcf7_pie_charts')

    yzer.legend_location = 'lower left'
    pie1 = '''Annotated by RefSeq and/or ncRNA.org    14,022
Unannotated    67,046'''
    pie1 = [row.split('    ') for row in pie1.split('\n')]
    pie1 = zip(*pie1)
    yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]),
                  pie1[0],
                  title='Hah et al MCF-7 Transcripts\nwith Score >= 1',
                  save_dir=img_dirpath,
                  show_plot=True)

    pie2 = '''Promoter-associated RNA    7,055
Antisense of RefSeq    7,539
Other RefSeq Proximal    13,664
Distal with H3K4me2    2,352
Exemplo n.º 28
0
Created on Jan 3, 2013

@author: karmel

Plot gen-enhancer me2 LFC; do we see correlation?
'''
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.utils.functions import nonzero
import numpy

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/'
    dirpath = yzer.get_path(dirpath)
    data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets')
    img_dirpath = yzer.get_and_create_path(dirpath, 'gene_enhancer_me2_lfc',
                                           'scatterplots')

    interactions = yzer.import_file(
        yzer.get_filename(
            data_dirpath,
            'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt'
        ))
    interactions = interactions[interactions['count'] > 1]
    all_transcripts = yzer.import_file(
        yzer.get_filename(data_dirpath, 'transcript_vectors.txt'))

    for me2_timepoint in ('6h', '24h'):
        me2_col = 'me2_{0}_ratio'.format(me2_timepoint)
        kla_col = 'kla_lfc'
        col_set = [me2_col + '_2', kla_col + '_2', kla_col, me2_col]
Exemplo n.º 29
0
'''
Created on Jul 11, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'peak_scatterplots')

    if True:
        for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65',
                                                                 'Dex')):
            data = yzer.import_file(
                yzer.get_filename(dirpath, 'motifs', 'from_peaks',
                                  '{0}_kla_dex_vectors.txt'.format(main)))

            data = data.fillna(0)
            data = data.groupby(['id', 'chr_name'], as_index=False).mean()

            xcolname, ycolname = 'tag_count_2', 'tag_count'  #'p65_kla_tag_count', 'p65_kla_dex_tag_count',
            data = data[data[ycolname] >= 10]

            cond_1 = (data['tag_count_3'] == 0)
            cond_2 = (data['tag_count_3'] > 0) & (data['tag_count_3'] <
                                                  data['tag_count_4'])
            cond_3 = (data['tag_count_3'] > 0) & (data['tag_count_3'] >=
Exemplo n.º 30
0
'''
Created on Nov 7, 2012

@author: karmel
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher
from glasslab.dataanalysis.misc.cd4tcell_finland_2012.resources import comparison_sets,\
    pretty_names

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells_Finland_2012/Analysis_2013_02'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'with_me3',
                                           'basic_scatterplots')

    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))
    data = data.fillna(0)
    data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0]

    for key1, key2, norm_factor in comparison_sets:
        name1 = pretty_names[key1[:-1]] + key1[-1:]
        name2 = pretty_names[key2[:-1]] + key2[-1:]

        data_normed = yzer.normalize(data, key2 + '_tag_count', norm_factor)
        ax = yzer.scatterplot(
            data_normed,
            key1 + '_tag_count',
            key2 + '_tag_count_norm',