@author: karmel Scatterplots of H3K4me2 peak tag counts by GROseq score ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots') data = yzer.import_file( yzer.get_filename(dirpath, 'me2_peaks_with_transcripts.txt')) data = data.fillna(0) data = data.groupby(by='id', as_index=True).mean() data['transcript_score'] = data['score(2)'] ax = yzer.scatterplot( data, xcolname='transcript_score', ycolname='tag_count', log=True, title='H3K4me2 Tag Count as a Function of Transcript Score', xlabel='Glass Atlas Transcript Score', ylabel='Normalized H3K4me2 tag count', show_2x_range=True, plot_regression=True, show_count=True, show_correlation=True,
Created on Mar 4, 2013 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/' dirpath_bmdc = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/BMDCs/Analysis_2013_03/' dirpath = yzer.get_path(dirpath) dirpath_bmdc = yzer.get_path(dirpath_bmdc) img_dirpath = yzer.get_and_create_path(dirpath, 'bmdc_vs_thiomac') thio = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) bmdc = yzer.import_file( yzer.get_filename(dirpath_bmdc, 'transcript_vectors.txt')) sets = [] for data in (thio, bmdc): data = data.fillna(0) refseq = yzer.get_refseq(data) # Remove low tag counts #refseq = refseq[refseq['transcript_score'] >= 4] sets.append(refseq)
@author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'peak_scatterplots') if True: for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65', 'Dex')): data = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'from_peaks', '{0}_kla_dex_vectors.txt'.format(main))) data = data.fillna(0) data = data.groupby(['id', 'chr_name'], as_index=False).mean() xcolname, ycolname = 'tag_count_2', 'tag_count' #'p65_kla_tag_count', 'p65_kla_dex_tag_count', data = data[data[ycolname] >= 10] cond_1 = (data['tag_count_3'] == 0) cond_2 = (data['tag_count_3'] > 0) & (data['tag_count_3'] < data['tag_count_4']) cond_3 = (data['tag_count_3'] > 0) & (data['tag_count_3'] >= data['tag_count_4']) ax = None for show_points in (True, False):
Note: Made font.weight = bold and axes.titlesize = 24, font.size = 16 in matplotlibrc ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'refseq_to_homer/large_gap_500bp') data = yzer.import_file( yzer.get_filename(dirpath, 'refseq_tag_counts_500bp.txt')) data['sum'] = nonzero(data['sum'].fillna(0)) homer_data = yzer.import_file( yzer.get_filename(dirpath, 'RNA_GroSeq_CountsGenes.txt')) homer_data['sequence_identifier'] = homer_data['Gene ID'] homer_data['homer_tag_count'] = nonzero(homer_data[ 'ThioMac-GroSeq-notx-110513/ genes (Total: 12166480.0) normFactor 0.82'] .fillna(0)) homer_data = homer_data[['sequence_identifier', 'homer_tag_count']] merged = data.merge(homer_data, how='inner', on='sequence_identifier') merged = merged.fillna(1) if True: ax = yzer.scatterplot(merged,
'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Promoters' dirpath = yzer.get_path(dirpath) cond, seq, breed = ('naive', 'atac', '') wt_prefix = sample_name(cond, seq, breed) ko_prefix = sample_name(cond, seq, 'foxo1_ko_') wt_dirpath = yzer.get_filename(dirpath, wt_prefix) ko_dirpath = yzer.get_filename(dirpath, ko_prefix) wt_filename = yzer.get_filename(wt_dirpath, wt_prefix + '_promoters.txt') ko_filename = yzer.get_filename(ko_dirpath, ko_prefix + '_promoters.txt') wt_data = yzer.import_file(wt_filename) wt_data = wt_data.fillna(0) ko_data = yzer.import_file(ko_filename) ko_data = ko_data.fillna(0) min_thresh = get_threshold(seq) wt_data = wt_data[wt_data['tag_count'] >= min_thresh] ko_data = ko_data[ko_data['tag_count'] >= min_thresh] wt_only = wt_data[ wt_data['foxo1_ko_naive_atac_tag_count'] < min_thresh] fold = 2 both = wt_data[ (wt_data['foxo1_ko_naive_atac_tag_count'] * fold >= wt_data['tag_count']) &
yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') kla_col = 'kla_lfc' tss_only = False img_dirpath = yzer.get_and_create_path( dirpath, 'interactions_by_kla_lfc', tss_only and 'genic' or 'all_interactions', 'lfc_2') # File generated in novel_me2_sites enhancers = yzer.import_file( yzer.get_filename( data_dirpath, 'all_enhancers_with_me2_and_{0}interaction_stats.txt'.format( tss_only and 'tss_' or ''))) for kla_timepoint in ('1h', ): enhancers['me2_ratio'] = nonzero(enhancers['me2_kla_6h_tag_count_2'])/\ nonzero(enhancers['me2_notx_tag_count_2']) sets = OrderedDict() sets['4x GRO in KLA {0}'.format(kla_timepoint)] = enhancers[ enhancers[kla_col] > 2] sets['No change GRO in KLA {0}'.format(kla_timepoint)] = enhancers[ enhancers[kla_col].abs() <= 1] sets['1/4 GRO in KLA {0}'.format(kla_timepoint)] = enhancers[ enhancers[kla_col] < -2]
''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero from glasslab.dataanalysis.misc.gr_project_2012.v1.enhancer_subsets_for_supershift import ucsc_link_cleanup import numpy if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) peak_type = 'p65' img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_non_refseq_by_{0}'.format(peak_type)) transcripts = yzer.import_file(yzer.get_filename(dirpath, 'motifs', 'transcript_vectors_with_nearby_peaks.txt')) if True: pu_1 = False for ratio in (1.5, 2, 3): data = transcripts[transcripts['refseq'] == 'f'] data = data[data['has_infrastructure'] == 0] data = data[data['length'] < 6000] data = data[data['dex_1_lfc'] < 1] data = data[data['kla_1_lfc'] >= 1] data = data[data['gr_kla_dex_tag_count'] > 0] data = data[data['gr_fa_kla_dex_tag_count'] == 0] print len(data) if pu_1: data = data[data['pu_1_kla_tag_count'] + data['pu_1_kla_tag_count'] > 0]
'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Enhancers_set2' dirpath = yzer.get_path(dirpath) save_path = yzer.get_and_create_path( dirpath, 'Figures', 'Enhancer_counts') datasets = {} breed_sets = get_breed_sets() for i, (samples, short_names) in enumerate(breed_sets): oth_breed = breed_sets[1 - i] for j, sample_prefix in enumerate(short_names): sample_dirpath = yzer.get_filename(dirpath, sample_prefix) filename = yzer.get_filename(sample_dirpath, sample_prefix + '_enhancers.txt') data = yzer.import_file(filename) data = data.fillna(0) min_thresh = get_threshold('atac') data = data[data['tag_count'] >= min_thresh] datasets[sample_prefix] = data # How many denovo d7 enhancers are also in foxo1 kos? for celltype in ('hi', 'lo'): d7 = datasets['klrg{}_d7'.format(celltype)] de_novo = d7[d7['d0_tag_count'] < min_thresh] all_shared = d7[ 'foxo1_ko_klrg{}_d7_tag_count'.format(celltype)] >= min_thresh
@author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/ctcf_across_celltypes' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'figures') dp = yzer.import_file( yzer.get_filename(dirpath, 'dp_with_thiomac_ctcf.txt')).fillna(0) thio = yzer.import_file( yzer.get_filename(dirpath, 'thiomac_with_dp_ctcf.txt')).fillna(0) # Get venn-diagram sets only_dp = dp[dp['thiomac_ctcf_tag_count'] == 0] only_thio = thio[thio['dp_ctcf_tag_count'] == 0] shared = dp[dp['thiomac_ctcf_tag_count'] != 0] shared_check = thio[thio['dp_ctcf_tag_count'] != 0] print len(only_dp), len(only_thio), len(shared), len(shared_check) data = shared.append(only_dp, ignore_index=True) data = data.append(only_thio, ignore_index=True) data['dp_nonzero'] = nonzero(data['dp_ctcf_tag_count']) data['thio_nonzero'] = nonzero(data['thiomac_ctcf_tag_count'])
''' Created on May 10, 2012 @author: karmel ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher import os from matplotlib import pyplot if __name__ == '__main__': grapher = SeqGrapher() dirpath = '/Users/karmel/Desktop/Projects/GlassLab/Notes_and_Reports/ncRNA_josh/' filename = os.path.join(dirpath, 'refseq_predictions.tsv') evidence_f = os.path.join(dirpath, 'refseq_evidence.orf') data = grapher.import_file(filename) evidence = grapher.import_file(evidence_f) data['score_orf'] = evidence['score'] data = data[data['score_orf'] < 200] data_coding = data[data['score'] >= 0] data_noncoding = data[data['score'] < 0] ax = grapher.scatterplot(data_coding, 'score_orf', 'score', log=False, color='blue', label='Predicted Coding', add_noise=False, show_2x_range=False,
df['enhancer_id'] = group['id_2'].mean() df['enhancer_lfc'] = group['p65_tag_count_2'].mean() if f_condition: df = df[f_condition(df)] return df if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') img_dirpath = yzer.get_and_create_path(dirpath, 'enhancer_rewiring_lfc', 'p65_tags') interactions = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_pairs_refseq_with_me2.txt')) interactions = interactions[interactions['count'] > 1] transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) transcripts['kla_6h_rpbp'] = transcripts['kla_6h_tag_count'] / ( transcripts['length']) * 1000 transcripts['kla_rpbp'] = transcripts['kla_tag_count'] / ( transcripts['length']) * 1000 # Associate gene id interactions = interactions.merge(transcripts, how='left', on='id') transcripts['id_2'] = transcripts['id']
@author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/peak_overlaps' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'figures') peak = 'p300' th1 = yzer.import_file( yzer.get_filename(dirpath, 'th1_with_th2_{0}.txt'.format(peak))).fillna(0) th2 = yzer.import_file( yzer.get_filename(dirpath, 'th2_with_th1_{0}.txt'.format(peak))).fillna(0) th1_with_ctcf_motif = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'th_p300_enhancers_ctcf', 'th1_only_{0}_with_ctcf_motif.txt'.format(peak))) th2_with_ctcf_motif = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'th_p300_enhancers_ctcf', 'th2_only_{0}_with_ctcf_motif.txt'.format(peak))) shared_with_ctcf_motif = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'th_p300_enhancers_ctcf', 'th_shared_{0}_with_ctcf_motif.txt'.format(peak))) # Filter out promoters
''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher import random if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_nearby_genes_by_p65') change_type = 'more' #'less' for ratio in (1.5, 2, 3): data = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'transcript_vectors_with_nearby_peaks.txt')) nearby = yzer.import_file( yzer.get_filename( img_dirpath, 'nearest_genes_to_enhancer_like_{1}_p65_{0}x.txt'.format( str(ratio).replace('.', '_'), change_type))) colname = 'dex_over_kla_1_lfc' pausing = True if pausing: colname = 'pausing_ratio_ratio' # We want previously calculated bucket scores, # Joined to old transcripts because we have since updated IDs bucket_scores = yzer.import_file(
We count the interactions connected to each transcript and draw a boxplot. In order to most easily pull in all the zero-interaction enhancers, we load those with a separate query and use the difference in count. ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/novel_enhancers' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_by_me2_in_notx') data = {} data['all_enhancers_with_less_me2'] = yzer.import_file( yzer.get_filename(dirpath, 'all_enhancers_with_less_me2_in_notx.txt')) data['all_enhancers_with_me2'] = yzer.import_file( yzer.get_filename(dirpath, 'all_enhancers_with_me2_in_notx.txt')) data['interacting_in_notx_with_less_me2'] = yzer.import_file( yzer.get_filename( dirpath, 'interacting_in_notx_enhancers_with_less_me2_in_notx.txt')) data['interacting_in_notx_with_me2'] = yzer.import_file( yzer.get_filename( dirpath, 'interacting_in_notx_enhancers_with_me2_in_notx.txt')) data['interacting_in_kla_30m_with_less_me2'] = yzer.import_file( yzer.get_filename( dirpath, 'interacting_in_kla_30m_enhancers_with_less_me2_in_notx.txt')) data['interacting_in_kla_30m_with_me2'] = yzer.import_file( yzer.get_filename(
if __name__ == '__main__': enhancer_counts = True # Are we looking at enhancer interactions (False) or counts (True)? yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/enhancers_by_gene_length' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots') counted = enhancer_counts and 'enhancer' or 'interaction' # The first set has length with interaction counts; # the second has length for all transcripts, even those without interactions. # We want to merge such that we add the interaction-less genes with a count of 0. data = yzer.import_file(yzer.get_filename(dirpath,'{0}_counts_by_refseq.txt'.format(counted))) all_data = yzer.import_file(yzer.get_filename(dirpath,'refseq_all.txt')) all_data = all_data[~all_data['id'].isin(data['id'])] data = pandas.concat([data, all_data]) data = data.reset_index().fillna(0) notx = data[data['sequencing_run_id'] == 765] kla_30m = data[data['sequencing_run_id'] == 766] kla_4h = data[data['sequencing_run_id'] == 773] no_intxns = data[data['sequencing_run_id'] == 0] # Zero won't show up in a log plot, so add one. no_intxns['count'] = 1 ax = yzer.scatterplot(no_intxns,
def ucsc_link_cleanup(data): data['ucsc_link_nod'] = data['ucsc_link_nod'].map( lambda x: '<a href={0} target="_blank">UCSC</a>'.format( x.replace('nod_balbc', 'gr_project_2012'))) return data if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) save_dirpath = yzer.get_and_create_path(dirpath, 'subgroups_for_supershift') transcripts = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt')) data = transcripts[transcripts['refseq'] == 'f'] data = data[data['has_infrastructure'] == 0] data = data[data['length'] < 6000] data = data[data['dex_1_lfc'] < 1] data = data[data['kla_1_lfc'] >= 1] data = data.fillna(0) data = ucsc_link_cleanup(data) if False: # First get sets for Negative controls tfs = ['p65', 'pu_1', 'gr', 'gr_fa'] for tf in tfs:
@author: karmel Note: Made font.weight = normal and axes.titlesize = 24 in matplotlibrc ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.dataanalysis.misc.demoatlas.rpkm_to_score import PrettyAxisGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/Post-gene' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots') data = yzer.import_file( yzer.get_filename(dirpath, 'post_gene_transcripts.txt')) refseq = yzer.import_file( yzer.get_filename(dirpath, 'all_expressed_refseq.txt')) refseq_with_runoff = refseq[refseq['id'].isin(data['gene_id'])] refseq_no_runoff = refseq[~refseq['id'].isin(data['gene_id'])] if False: print len(refseq_no_runoff) print refseq_no_runoff.tail(100).to_string() # Calculate length of runoff data[ 'length'] = data['transcription_end'] - data['transcription_start'] + 1 data['gene_length'] = data['gene_end'] - data['gene_start'] + 1 # What might be correlated with length of runoff?
kla_col = 'kla_6h_lfc' if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') img_dirpath = yzer.get_and_create_path(dirpath, 'genes_to_average_enhancer_lfc') keys = ('all', 'notx', 'kla', 'notx_only', 'kla_only', 'shared_enh') if True: interactions = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_pairs_refseq_with_me2.txt')) interactions = interactions[interactions['count'] > 1] all_transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) transcripts = all_transcripts[['id', 'kla_lfc', 'kla_6h_lfc']] # Associate gene id interactions = interactions.merge(transcripts, how='left', on='id') transcripts['id_2'] = transcripts['id'] transcripts = transcripts.drop(['id'], axis=1) interactions = interactions.merge(transcripts, how='left',
''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero import numpy if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') img_dirpath = yzer.get_and_create_path(dirpath, 'gene_enhancer_me2_lfc', 'scatterplots') interactions = yzer.import_file( yzer.get_filename( data_dirpath, 'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt' )) interactions = interactions[interactions['count'] > 1] all_transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) for me2_timepoint in ('6h', '24h'): me2_col = 'me2_{0}_ratio'.format(me2_timepoint) kla_col = 'kla_lfc' col_set = [me2_col + '_2', kla_col + '_2', kla_col, me2_col] interactions[me2_col] = numpy.log2(nonzero(interactions['me2_kla_{0}_tag_count'.format(me2_timepoint)])/\ nonzero(interactions['me2_notx_tag_count'])) interactions[me2_col + '_2'] = numpy.log2(nonzero(interactions['me2_kla_{0}_tag_count_2'.format(me2_timepoint)])/\ nonzero(interactions['me2_notx_tag_count_2']))
@author: karmel Note: Made font.weight = bold and axes.titlesize = 24, font.size = 16 in matplotlibrc ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/vs_homer' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots') data = yzer.import_file( yzer.get_filename(dirpath, 'tag_count_by_refseq.txt')) data['sum'] = nonzero(data['sum'].fillna(0)) homer_data = yzer.import_file( yzer.get_filename(dirpath, 'RNA_GroSeq_CountsGenes.txt')) homer_data['sequence_identifier'] = homer_data['Gene ID'] homer_data['homer_tag_count'] = nonzero(homer_data[ 'ThioMac-GroSeq-notx-110513/ genes (Total: 12166480.0) normFactor 0.82'] .fillna(0)) homer_data = homer_data[['sequence_identifier', 'homer_tag_count']] merged = data.merge(homer_data, how='inner', on='sequence_identifier') merged = merged.fillna(1) if True: ax = yzer.scatterplot(merged,
Created on Oct 8, 2012 @author: karmel ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from matplotlib import pyplot from glasslab.dataanalysis.misc.gr_project_2012.boxplots_redistribution_pairs import get_high_quality_pairs if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) motif_dirpath = yzer.get_filename(dirpath, 'motifs', 'from_peaks') transcripts = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt')) transcripts['glass_transcript_id'] = transcripts['id'] if True: all_data = yzer.import_file( yzer.get_filename( dirpath, 'redistribution', 'p65_peaks_bigger_in_kla_dex_with_nearby_bigger_kla_peaks.txt') ) data = get_high_quality_pairs(all_data, transcripts) data = data.groupby(['id', 'chr_name'], as_index=False).mean() gr_dex_gt_kla_dex = sum( data['tag_count_3'] * 1.5 < data['tag_count_4'])
Created on Feb 14, 2013 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/ctcf_stat1_overlap' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'figures') data = yzer.import_file( yzer.get_filename(dirpath, 'ctcf_with_stat1_binding.txt')).fillna(0) with_stat1 = data[data['p2_tag_count'] > 0] without_stat1 = data[data['p2_tag_count'] == 0] if True: ax = yzer.piechart( [len(with_stat1), len(without_stat1)], ['CTCF sites with STAT1', 'CTCF sites without STAT1'], title='DP Thymocyte CTCF Sites with STAT1 in Th1 Cells', save_dir=img_dirpath, show_plot=True) data['tag_count_nonzero'] = nonzero(data['tag_count']) data['p2_tag_count_nonzero'] = nonzero(data['p2_tag_count']) ax = yzer.scatterplot( data, 'tag_count_nonzero',
''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/cpg_island_promoters' dirpath = yzer.get_path(dirpath) for rep in (4, 3, 1): img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_by_expression', 'genes_with_gr', 'rep{0}'.format(rep), 'transrepressed') data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply( lambda s: s.replace('nod_balbc', 'gr_project_2012')) data = data.fillna(0) data = data[(data['kla_{0}_lfc'.format(rep)] >= 1) & (data['dex_over_kla_{0}_lfc'.format(rep)] <= -.58)] # 2006 secondary_response = data[data['gene_names'].isin([ '{Il12b}', '{Il6}', '{Nos2}', '{Mx1}', '{Mx2}', '{Marco}', '{Cmpk2}', '{Rsad2}' ])] delayed = data[data['gene_names'].isin([ '{Ccl5}', '{Saa3}', '{Ifnb1}', '{Ccl2}', '{Ifit1}', '{Ifit3}', '{Peli1}', '{Cxcl10}', '{Traf1}'
if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') kla_col = 'kla_lfc' tss_only = False img_dirpath = yzer.get_and_create_path( dirpath, 'novel_me2_sites', tss_only and 'genic' or 'all_interactions', 'ratio_10') if False: enhancers = yzer.import_file( yzer.get_filename(data_dirpath, 'all_distal_enhancers_inc_me2.txt')) all_transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) transcripts = all_transcripts[['id', kla_col]] enhancers = enhancers.merge(transcripts, how='left', on='id') if tss_only: interactions = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_pairs_refseq_with_me2.txt')) else: interactions = yzer.import_file( yzer.get_filename( data_dirpath,
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from matplotlib import pyplot from glasslab.utils.functions import nonzero from glasslab.dataanalysis.misc.gr_project_2012.v1.elongation import total_tags_per_run if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification' dirpath = yzer.get_path(dirpath) consistent = False img_dirpath = yzer.get_and_create_path( dirpath, 'boxplots_by_expression', consistent and 'consistent' or 'rep1') data = yzer.import_file( yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply( lambda s: s.replace('nod_balbc', 'gr_project_2012')) draw_pies = True min_tags = 30 ratio = 1.5 # Make sure we have dimethyl data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags] data = data[data['minimal_distance'] >= 1000] transcripts = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) transcripts['nearest_refseq_transcript_id'] = transcripts['id'] data = data.merge(transcripts, how='left',
from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/peak_overlaps' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'figures') peak_pretty = 'p300' peak = peak_pretty.lower() th1 = yzer.import_file( yzer.get_filename(dirpath, 'th1_with_th2_{0}.txt'.format(peak))).fillna(0) th2 = yzer.import_file( yzer.get_filename(dirpath, 'th2_with_th1_{0}.txt'.format(peak))).fillna(0) # Filter out promoters th1 = th1[th1['tss_id'] == 0] th2 = th2[th2['tss_id'] == 0] # Get venn-diagram sets only_th1 = th1[th1['p2_id'] == 0] only_th2 = th2[th2['p2_id'] == 0] shared = th1[th1['p2_id'] != 0] shared_check = th2[th2['p2_id'] != 0] print len(only_th1), len(only_th2), len(shared), len(shared_check)
Created on Sep 7, 2012 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': grapher = SeqGrapher() base_dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' base_dirpath = grapher.get_path(base_dirpath) dirpath = grapher.get_filename(base_dirpath, 'motifs') filename = grapher.get_filename(dirpath, 'transcript_vectors.txt') data = grapher.import_file(filename) # Boxplots for gr_dex peaks by lfc in Dex if False: #data = data[data['distal'] == 't'] data = data[data['has_refseq'] == 1] down = data[data['dex_1_lfc'] <= -1] up = data[data['dex_1_lfc'] >= 1] nc = data[abs(data['dex_1_lfc']) < 1] key = 'p65_kla_tag_count' datasets = [down[key],nc[key],up[key]] datasets = [d['p65_kla_dex_tag_count'] - d[key] for d in [down, nc, up]]
Note: Made font.weight = bold and axes.titlesize = 24 in matplotlibrc ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import pandas_min from glasslab.dataanalysis.misc.demoatlas.rpkm_to_score import PrettyAxisGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Post_gene_transcripts' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'scatterplots') data = yzer.import_file(yzer.get_filename(dirpath,'within_1kb_gap_500bp_with_nc.txt')) refseq = yzer.import_file(yzer.get_filename(dirpath,'expressed_refseq_gap_500bp.txt')) refseq_with_runoff = refseq[refseq['id'].isin(data['gene_id'])] refseq_no_runoff = refseq[~refseq['id'].isin(data['gene_id'])] if True: print len(refseq_no_runoff) print refseq_no_runoff.tail(100).to_string() # Calculate length of runoff data['length'] = data['transcription_end'] - data['transcription_start'] + 1 data['gene_length'] = data['gene_end'] - data['gene_start'] + 1 # What might be correlated with length of runoff? if False: yzer.scatterplot(data, 'gene_length', 'length', log=True)
''' Created on Jan 30, 2013 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from collections import OrderedDict if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_rewiring_lfc') transcripts = yzer.import_file( yzer.get_filename(dirpath, 'enhancer_sets', 'transcript_vectors.txt')) sets = OrderedDict(( ('all', yzer.import_file(yzer.get_filename(data_dirpath, 'all_vectors.cdt'))), #('all_6h', yzer.import_file(yzer.get_filename(data_dirpath,'kla_6h','all_vectors.cdt'))), ('rewired', yzer.import_file( yzer.get_filename(data_dirpath, 'rewired_vectors.cdt'))), #('rewired_6h', yzer.import_file(yzer.get_filename(data_dirpath,'kla_6h','rewired_vectors.cdt'))), ('shared', yzer.import_file(yzer.get_filename(data_dirpath, 'shared_vectors.cdt'))), )) for key, val in sets.items():
''' Created on Oct 26, 2012 @author: karmel ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'cpg_island_promoters', 'piecharts', 'by_genes_with_gr') data = yzer.import_file( yzer.get_filename(dirpath, 'enhancer_classification', 'enhancers_with_nearest_gene.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply( lambda s: s.replace('nod_balbc', 'gr_project_2012')) min_tags = 30 # Make sure we have dimethyl data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags] data = data[data['minimal_distance'] >= 1000] #data = yzer.collapse_strands(data) transcripts = yzer.import_file( yzer.get_filename(dirpath, 'cpg_island_promoters', 'transcript_vectors.txt')) transcripts['nearest_refseq_transcript_id'] = transcripts['id']