'enhancer_lfc': Series(), 'kla_4h_lfc': Series(kla_4h[kla_col], index=range(len(kla_4h))), 'notx_lfc': Series(notx[kla_col], index=range(len(notx))), }, ) df['enhancer_id'] = group['id_2'].mean() df['enhancer_lfc'] = group['p65_tag_count_2'].mean() if f_condition: df = df[f_condition(df)] return df if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') img_dirpath = yzer.get_and_create_path(dirpath, 'enhancer_rewiring_lfc', 'p65_tags') interactions = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_pairs_refseq_with_me2.txt')) interactions = interactions[interactions['count'] > 1] transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) transcripts['kla_6h_rpbp'] = transcripts['kla_6h_tag_count'] / ( transcripts['length']) * 1000 transcripts['kla_rpbp'] = transcripts['kla_tag_count'] / ( transcripts['length']) * 1000 # Associate gene id
@author: karmel ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from matplotlib import pyplot from glasslab.utils.functions import nonzero from glasslab.dataanalysis.misc.gr_project_2012.v1.elongation import total_tags_per_run if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification' dirpath = yzer.get_path(dirpath) consistent = False img_dirpath = yzer.get_and_create_path( dirpath, 'boxplots_by_expression', consistent and 'consistent' or 'rep1') data = yzer.import_file( yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply( lambda s: s.replace('nod_balbc', 'gr_project_2012')) draw_pies = True min_tags = 30 ratio = 1.5 # Make sure we have dimethyl data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags] data = data[data['minimal_distance'] >= 1000] transcripts = yzer.import_file(
''' Created on Mar 4, 2013 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/' dirpath_bmdc = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/BMDCs/Analysis_2013_03/' dirpath = yzer.get_path(dirpath) dirpath_bmdc = yzer.get_path(dirpath_bmdc) img_dirpath = yzer.get_and_create_path(dirpath, 'bmdc_vs_thiomac') thio = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) bmdc = yzer.import_file( yzer.get_filename(dirpath_bmdc, 'transcript_vectors.txt')) sets = [] for data in (thio, bmdc): data = data.fillna(0) refseq = yzer.get_refseq(data) # Remove low tag counts #refseq = refseq[refseq['transcript_score'] >= 4]
Created on Mar 11, 2013 @author: karmel Note: Made font.weight = bold and axes.titlesize = 24, font.size = 16 in matplotlibrc ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'refseq_to_homer/large_gap_500bp') data = yzer.import_file( yzer.get_filename(dirpath, 'refseq_tag_counts_500bp.txt')) data['sum'] = nonzero(data['sum'].fillna(0)) homer_data = yzer.import_file( yzer.get_filename(dirpath, 'RNA_GroSeq_CountsGenes.txt')) homer_data['sequence_identifier'] = homer_data['Gene ID'] homer_data['homer_tag_count'] = nonzero(homer_data[ 'ThioMac-GroSeq-notx-110513/ genes (Total: 12166480.0) normFactor 0.82'] .fillna(0)) homer_data = homer_data[['sequence_identifier', 'homer_tag_count']] merged = data.merge(homer_data, how='inner', on='sequence_identifier') merged = merged.fillna(1)
@author: karmel Plot supplementary figure showing Hah et al error rates against MAX EDGE values when Vespucci is built without knowledge of RefSeq boundaries. ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/no_refseq' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'plots') ax = yzer.set_up_plot() title = 'Benchmarking without Foreknowledge of RefSeq' yzer.add_title(title, ax) yzer.add_axis_labels('MAX_EDGE value', 'Error rate defined by Hah et al. (%)') max_edges = [100, 500, 1000, 4000, 5000, 5500, 6000, 10000] error_rates = [ 0.388551822833, 0.372390444765, 0.263807982126, 0.124663089396, 0.121784970634, 0.121807917409, 0.123263849815, 0.142530838464 ] error_pcts = [e * 100 for e in error_rates] yzer.plot(max_edges, error_pcts, '-o') yzer.save_plot_with_dir(save_dir=img_dirpath, title=title)
@author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero from glasslab.dataanalysis.misc.gr_project_2012.v1.enhancer_subsets_for_supershift import ucsc_link_cleanup import numpy if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) peak_type = 'p65' img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_non_refseq_by_{0}'.format(peak_type)) transcripts = yzer.import_file(yzer.get_filename(dirpath, 'motifs', 'transcript_vectors_with_nearby_peaks.txt')) if True: pu_1 = False for ratio in (1.5, 2, 3): data = transcripts[transcripts['refseq'] == 'f'] data = data[data['has_infrastructure'] == 0] data = data[data['length'] < 6000] data = data[data['dex_1_lfc'] < 1] data = data[data['kla_1_lfc'] >= 1] data = data[data['gr_kla_dex_tag_count'] > 0] data = data[data['gr_fa_kla_dex_tag_count'] == 0] print len(data)
''' Created on Oct 26, 2012 @author: karmel ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from matplotlib import pyplot if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'piecharts_for_genes_by_mechanism') data = yzer.import_file(yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply(lambda s: s.replace('nod_balbc','gr_project_2012')) draw_pies = True min_tags = 30 ratio = 1.5 # Make sure we have dimethyl data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags] data = data[data['minimal_distance'] >= 1000] #data = yzer.collapse_strands(data) transcripts = yzer.import_file(yzer.get_filename(dirpath, 'transcript_vectors.txt')) transcripts['nearest_refseq_transcript_id'] = transcripts['id'] # Join, keeping all transcripts data = data.merge(transcripts, how='left', on='nearest_refseq_transcript_id', suffixes=['','_trans'])
from random import shuffle if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/hic_domains' dirpath = yzer.get_path(dirpath) data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply( lambda s: s.replace('nod_balbc', 'gr_project_2012')) data = data.fillna(0) for rep in (4, 3, 1): img_dirpath = yzer.get_and_create_path(dirpath, 'fold_change_per_domain', 'all_transcripts', 'rep{0}'.format(rep)) kla_key = 'kla_{0}_lfc'.format(rep) dex_kla_key = 'dex_over_kla_{0}_lfc'.format(rep) shuffled = data['domain_id'].values.copy() shuffle(shuffled) data['shuffled_domain_id'] = shuffled data['up_in_kla'] = data[kla_key] > 1 data['repressed'] = data[dex_kla_key] <= -.58 data['transrepressed'] = (data[kla_key] > 1) & (data[dex_kla_key] <= -.58) data['count'] = ~data[kla_key].isnull()
For those, we will sort genes in each condition by number of interactions, and allow for null values when there is a number mismatch. ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher import numpy kla_col = 'kla_6h_lfc' if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') img_dirpath = yzer.get_and_create_path(dirpath, 'genes_to_average_enhancer_lfc') keys = ('all', 'notx', 'kla', 'notx_only', 'kla_only', 'shared_enh') if True: interactions = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_pairs_refseq_with_me2.txt')) interactions = interactions[interactions['count'] > 1] all_transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) transcripts = all_transcripts[['id', 'kla_lfc', 'kla_6h_lfc']] # Associate gene id
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher def ucsc_link_cleanup(data): data['ucsc_link_nod'] = data['ucsc_link_nod'].map( lambda x: '<a href={0} target="_blank">UCSC</a>'.format( x.replace('nod_balbc', 'gr_project_2012'))) return data if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) save_dirpath = yzer.get_and_create_path(dirpath, 'subgroups_for_supershift') transcripts = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt')) data = transcripts[transcripts['refseq'] == 'f'] data = data[data['has_infrastructure'] == 0] data = data[data['length'] < 6000] data = data[data['dex_1_lfc'] < 1] data = data[data['kla_1_lfc'] >= 1] data = data.fillna(0) data = ucsc_link_cleanup(data) if False:
''' Created on Oct 1, 2012 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_from_p65_gr') if True: for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65', 'Dex')): data = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'from_peaks', '{0}_kla_dex_vectors.txt'.format(main))) data = data.fillna(0) data = data.groupby(['id', 'chr_name'], as_index=False).mean() data = data[data['tag_count_2'] > 0] colname = 'tag_count_diff' data[colname] = (data['tag_count'] - data['tag_count_2']) / data['tag_count'] cond_1 = (data['tag_count_3'] == 0) cond_2 = (data['tag_count_3'] > 0) & (data['tag_count_3'] <
''' Created on Apr 19, 2013 @author: karmel Note: Made font.weight = bold and axes.titlesize = 24 in matplotlibrc ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/Demo-data' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'figure_4_pie_charts') yzer.legend_location = 'lower left' pie1 = '''Annotated by RefSeq and/or ncRNA.org 16,945 Unannotated 36,578''' pie1 = [row.split(' ') for row in pie1.split('\n')] pie1 = zip(*pie1) yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]), pie1[0], title='Transcripts with Score >= 2', save_dir=img_dirpath, show_plot=True) pie2 = '''Promoter-associated RNA 6,314 Antisense of RefSeq 5,604 Post-TTS, same-strand 6,940 Other RefSeq Proximal 3,119
''' Created on Oct 26, 2012 @author: karmel ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from matplotlib import pyplot if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/enhancer_classification' dirpath = yzer.get_path(dirpath) consistent = True img_dirpath = yzer.get_and_create_path( dirpath, 'piecharts_by_mechanism', consistent and 'consistent' or 'by_genes') data = yzer.import_file( yzer.get_filename(dirpath, 'enhancers_with_nearest_gene.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply( lambda s: s.replace('nod_balbc', 'gr_project_2012')) draw_pies = True min_tags = 30 ratio = 1.5 # Make sure we have dimethyl data = data[data.filter(like='h3k4me2').max(axis=1) > min_tags] data = data[data['minimal_distance'] >= 1000] #data = yzer.collapse_strands(data)
''' Created on Sep 27, 2014 @author: karmel ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.dataanalysis.misc.rodrigo.samples import get_threshold,\ get_breed_sets if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/' +\ 'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Enhancers_set2' dirpath = yzer.get_path(dirpath) save_path = yzer.get_and_create_path( dirpath, 'Figures', 'Enhancer_counts') datasets = {} breed_sets = get_breed_sets() for i, (samples, short_names) in enumerate(breed_sets): oth_breed = breed_sets[1 - i] for j, sample_prefix in enumerate(short_names): sample_dirpath = yzer.get_filename(dirpath, sample_prefix) filename = yzer.get_filename(sample_dirpath, sample_prefix + '_enhancers.txt') data = yzer.import_file(filename) data = data.fillna(0) min_thresh = get_threshold('atac')
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from random import shuffle if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/hic_domains' dirpath = yzer.get_path(dirpath) data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply( lambda s: s.replace('nod_balbc', 'gr_project_2012')) data = data.fillna(0) for rep in (4, 3, 1): img_dirpath = yzer.get_and_create_path(dirpath, 'lfc_histograms', 'rep{0}'.format(rep)) kla_key = 'kla_{0}_lfc'.format(rep) dex_kla_key = 'dex_over_kla_{0}_lfc'.format(rep) data = data[data[kla_key] >= 1] shuffled = data['domain_id'].values.copy() shuffle(shuffled) data['shuffled_domain_id'] = shuffled grouped = data.groupby(by='domain_id', as_index=False).mean() shuffled_grouped = data.groupby(by='shuffled_domain_id', as_index=False).mean() grouped = grouped[grouped['domain_id'] != 0]
from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero from collections import OrderedDict if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') kla_col = 'kla_lfc' tss_only = False img_dirpath = yzer.get_and_create_path( dirpath, 'novel_me2_sites', tss_only and 'genic' or 'all_interactions', 'ratio_10') if False: enhancers = yzer.import_file( yzer.get_filename(data_dirpath, 'all_distal_enhancers_inc_me2.txt')) all_transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) transcripts = all_transcripts[['id', kla_col]] enhancers = enhancers.merge(transcripts, how='left', on='id') if tss_only: interactions = yzer.import_file( yzer.get_filename(data_dirpath,
''' Created on Feb 15, 2013 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'srf_ko_targets') data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data = data.fillna(0) data = data[data[['nod_notx_1h_tag_count', 'balb_notx_1h_tag_count']].max( axis=1) >= 10] data = data[(data['has_refseq'] == 1) & (data['transcript_score'] >= 4)] # From Amy Sullivan SRF paper down_in_srf_ko = [ 'Cnn2', 'Srf', 'Lima1', 'Rhoj', 'Coro1a', 'Il1rn', 'Lsp1', 'LOC277203', 'Vcl', 'Card11', 'Cbr2', 'Cd83', 'Acta2', 'Actb', 'Tspan7', 'Ebi2', 'Gpr162', 'Ckb', 'Dhcr24', 'LOC638632', 'Actg2', 'Trim29', 'Ppap2b', 'Klk1b11', 'Actc1', 'Pcp4l1', 'LOC621324', 'Cdkn1c', 'Slco2b1', 'Cd24a', 'Pdgfa', 'Lrrc58', 'Dnmt3a', 'Slamf9', '1100001H23Rik', 'Aldoc', 'Cd28', '1500003O03Rik', 'Rab15', 'Pld4', 'Pilra', 'Xlr', 'Tgm1', 'Lcp1', 'Fstl1', 'Slc40a1', 'Usp24', 'Jup', 'Cd74', 'Tpm4',
''' Created on Feb 12, 2013 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells/Oshea_enhancers/peak_overlaps' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'figures') peak_pretty = 'p300' peak = peak_pretty.lower() th1 = yzer.import_file( yzer.get_filename(dirpath, 'th1_with_th2_{0}.txt'.format(peak))).fillna(0) th2 = yzer.import_file( yzer.get_filename(dirpath, 'th2_with_th1_{0}.txt'.format(peak))).fillna(0) # Filter out promoters th1 = th1[th1['tss_id'] == 0] th2 = th2[th2['tss_id'] == 0] # Get venn-diagram sets
def get_filters_transcript(subdata, xcol, ycol): down_in_kla = subdata['kla_1_lfc'] <= -1 nc_in_kla = subdata['kla_1_lfc'].abs() < 1 up_in_kla = subdata['kla_1_lfc'] >= 1 & (subdata['dex_over_kla_1_lfc'] > -.58) trans = up_in_kla & (subdata['dex_over_kla_1_lfc'] <= -.58) return down_in_kla, nc_in_kla, up_in_kla, trans if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'bargraphs_from_p65_gr') transcripts = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'transcript_vectors.txt')) transcripts['glass_transcript_id'] = transcripts['id'] if True: for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65', 'Dex')): data = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'from_peaks', '{0}_kla_dex_vectors.txt'.format(main))) data = data.merge(transcripts, how='left', on='glass_transcript_id',
@author: karmel Do novel interactions gain or lose me2? ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') img_dirpath = yzer.get_and_create_path(dirpath, 'novel_enhancer_me2_change', 'all_interactions') interactions = yzer.import_file( yzer.get_filename( data_dirpath, 'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt' )) interactions = interactions[interactions['count'] > 1] interactions = interactions.fillna(0) # Key on peak id, not enhancer id, which could be bidirectional #interactions['id_2'] = interactions['h3k4me2_id'] interactions['hash'] = interactions.apply( lambda row: '{0}.{1}'.format(row['id'], row['id_2']), axis=1)
''' Created on Oct 1, 2012 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from matplotlib import pyplot import numpy if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'piecharts_from_p65_gr') if True: for main, compare, basal_cond in ( ('GR', 'p65', 'Dex'), ('p65', 'GR', 'KLA'), ): data = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'from_peaks', '{0}_kla_dex_vectors.txt'.format(main))) # Get nearby peaks first ids_with_nearby = data[ (data['distance_to_tss_2'].isnull() == False) & (data['distance_to_peak_2'] <= 1000)]['id'] data = data.fillna(0)
@author: karmel Do novel interactions gain or lose me2? ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') img_dirpath = yzer.get_and_create_path(dirpath, 'novel_interactions_kla_lfc', 'all_interactions') interactions = yzer.import_file( yzer.get_filename( data_dirpath, 'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt' )) interactions = interactions[interactions['count'] > 1] all_transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) kla_col = 'kla_lfc' transcripts = all_transcripts[['id', kla_col]]
@author: karmel Do the gene groups outlined in Ramirez-Carrozzi 2006 and 2009 correlate with expression changes in Dex+KLA? ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/GR_Analysis/cpg_island_promoters' dirpath = yzer.get_path(dirpath) for rep in (4, 3, 1): img_dirpath = yzer.get_and_create_path(dirpath, 'boxplots_by_expression', 'genes_with_gr', 'rep{0}'.format(rep), 'transrepressed') data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data['ucsc_link_nod'] = data['ucsc_link_nod'].apply( lambda s: s.replace('nod_balbc', 'gr_project_2012')) data = data.fillna(0) data = data[(data['kla_{0}_lfc'.format(rep)] >= 1) & (data['dex_over_kla_{0}_lfc'.format(rep)] <= -.58)] # 2006 secondary_response = data[data['gene_names'].isin([ '{Il12b}', '{Il6}', '{Nos2}', '{Mx1}', '{Mx2}', '{Marco}',
''' Created on Feb 12, 2013 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/NOD_BALBc/ThioMacs/Analysis_2013_02/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'srf_binding') data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data = data.fillna(0) data = data[data[['nod_notx_1h_tag_count', 'balb_notx_1h_tag_count']].max( axis=1) >= 10] subsets = [ data, data[(data['has_refseq'] == 1) & (data['transcript_score'] >= 4)], data[(data['distal'] == 't') & (data['h3k4me2_tag_count'] > 10)] ] # Add in nearest genes for enhancers enh = subsets[2].copy() nearest_genes = yzer.import_file(
wt_only = wt_data[ wt_data['foxo1_ko_naive_atac_tag_count'] < min_thresh] fold = 2 both = wt_data[ (wt_data['foxo1_ko_naive_atac_tag_count'] * fold >= wt_data['tag_count']) & (wt_data['tag_count'] * fold >= wt_data['foxo1_ko_naive_atac_tag_count']) ] ko_only = ko_data[ ko_data['naive_atac_tag_count'] < min_thresh] save_path = yzer.get_and_create_path( dirpath, 'Figures', 'Foxo1_group_promoters_overlaps') groups = [wt_only, both, ko_only] labels = ['WT only', 'WT and KO', 'Foxo1 KO only'] if True: yzer.boxplot([gp['naive_foxo1_tag_count'] for gp in groups], labels, title='Foxo1 tags in ATAC-seq regions by group', ylabel='Foxo1 peak tag count', save_dir=save_path, show_plot=False) yzer.boxplot([gp['lcmv_d12_foxo1_tag_count'] for gp in groups], labels, title='LCMV d12 Foxo1 tags in ATAC-seq regions by group', ylabel='Foxo1 peak tag count', save_dir=save_path, show_plot=False)
from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero from collections import OrderedDict if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') kla_col = 'kla_lfc' tss_only = False img_dirpath = yzer.get_and_create_path( dirpath, 'interactions_by_kla_lfc', tss_only and 'genic' or 'all_interactions', 'lfc_2') # File generated in novel_me2_sites enhancers = yzer.import_file( yzer.get_filename( data_dirpath, 'all_enhancers_with_me2_and_{0}interaction_stats.txt'.format( tss_only and 'tss_' or ''))) for kla_timepoint in ('1h', ): enhancers['me2_ratio'] = nonzero(enhancers['me2_kla_6h_tag_count_2'])/\ nonzero(enhancers['me2_notx_tag_count_2']) sets = OrderedDict() sets['4x GRO in KLA {0}'.format(kla_timepoint)] = enhancers[
''' Created on Apr 19, 2013 @author: karmel Note: Made font.weight = bold and axes.titlesize = 24 in matplotlibrc ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'hg19_mcf7_pie_charts') yzer.legend_location = 'lower left' pie1 = '''Annotated by RefSeq and/or ncRNA.org 14,022 Unannotated 67,046''' pie1 = [row.split(' ') for row in pie1.split('\n')] pie1 = zip(*pie1) yzer.piechart(map(lambda s: int(s.replace(',', '')), pie1[1]), pie1[0], title='Hah et al MCF-7 Transcripts\nwith Score >= 1', save_dir=img_dirpath, show_plot=True) pie2 = '''Promoter-associated RNA 7,055 Antisense of RefSeq 7,539 Other RefSeq Proximal 13,664 Distal with H3K4me2 2,352
Created on Jan 3, 2013 @author: karmel Plot gen-enhancer me2 LFC; do we see correlation? ''' from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.utils.functions import nonzero import numpy if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/HiC/' dirpath = yzer.get_path(dirpath) data_dirpath = yzer.get_filename(dirpath, 'enhancer_sets') img_dirpath = yzer.get_and_create_path(dirpath, 'gene_enhancer_me2_lfc', 'scatterplots') interactions = yzer.import_file( yzer.get_filename( data_dirpath, 'transcript_pairs_enhancer_with_anything_with_me2_inc_me2_counts.txt' )) interactions = interactions[interactions['count'] > 1] all_transcripts = yzer.import_file( yzer.get_filename(data_dirpath, 'transcript_vectors.txt')) for me2_timepoint in ('6h', '24h'): me2_col = 'me2_{0}_ratio'.format(me2_timepoint) kla_col = 'kla_lfc' col_set = [me2_col + '_2', kla_col + '_2', kla_col, me2_col]
''' Created on Jul 11, 2012 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/Classes/Rotations/Finland_2012/GR_Project/' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'peak_scatterplots') if True: for main, compare, basal_cond in (('p65', 'GR', 'KLA'), ('GR', 'p65', 'Dex')): data = yzer.import_file( yzer.get_filename(dirpath, 'motifs', 'from_peaks', '{0}_kla_dex_vectors.txt'.format(main))) data = data.fillna(0) data = data.groupby(['id', 'chr_name'], as_index=False).mean() xcolname, ycolname = 'tag_count_2', 'tag_count' #'p65_kla_tag_count', 'p65_kla_dex_tag_count', data = data[data[ycolname] >= 10] cond_1 = (data['tag_count_3'] == 0) cond_2 = (data['tag_count_3'] > 0) & (data['tag_count_3'] < data['tag_count_4']) cond_3 = (data['tag_count_3'] > 0) & (data['tag_count_3'] >=
''' Created on Nov 7, 2012 @author: karmel ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher from glasslab.dataanalysis.misc.cd4tcell_finland_2012.resources import comparison_sets,\ pretty_names if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/CD4TCells_Finland_2012/Analysis_2013_02' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'with_me3', 'basic_scatterplots') data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) data = data.fillna(0) data = data[data['naive_me3_tag_count'] + data['act_me3_tag_count'] > 0] for key1, key2, norm_factor in comparison_sets: name1 = pretty_names[key1[:-1]] + key1[-1:] name2 = pretty_names[key2[:-1]] + key2[-1:] data_normed = yzer.normalize(data, key2 + '_tag_count', norm_factor) ax = yzer.scatterplot( data_normed, key1 + '_tag_count', key2 + '_tag_count_norm',