def main(): usage = 'usage: %prog [options] <mut1 file> <mut2 file>' parser = OptionParser(usage) parser.add_option('-m', dest='mut_norm', action='store_true', default=False, help='Normalize by # mutations (as opposed to sequenced bp) [Default: %default]') parser.add_option('-o', dest='output_pdf', default='mut_cmp.pdf', help='Output pdf file for heatmap [Default: %default]') parser.add_option('-r', dest='raw', action='store_true', default=False, help='Use raw mutation counts (as opposed to normalized for ACGT content) [Default: %default]') (options,args) = parser.parse_args() if len(args) != 2: parser.error(usage) else: mut1_file = args[0] mut2_file = args[1] mutation_profile1, seq_bp1 = parse_mutations(mut1_file, options.raw) mutation_profile2, seq_bp2 = parse_mutations(mut2_file, options.raw) relative_mutation_profile = compute_relative_profile(mutation_profile1, seq_bp1, mutation_profile2, seq_bp2) print_table(relative_mutation_profile) # make plotting data structures nts = ['_','A','C','G','T'] nts1 = [] nts2 = [] rel = [] for nt1 in nts: for nt2 in nts: nts1.append(nt1) nts2.append(nt2) rel.append(relative_mutation_profile[(nt1,nt2)]) nts1_r = ro.StrVector(nts1) nts2_r = ro.StrVector(nts2) rel_r = ro.FloatVector(rel) df = ro.DataFrame({'nt1':nts1_r, 'nt2':nts2_r, 'rel':rel_r}) # plot ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \ ggplot2.geom_tile() + \ ggplot2.scale_x_discrete(mut2_file, limits=nts) + \ ggplot2.scale_y_discrete(mut1_file, limits=nts) + \ ggplot2.scale_fill_gradient('Enrichment 1/2') ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \ ggplot2.geom_tile() + \ ggplot2.scale_x_discrete('Read') + \ ggplot2.scale_y_discrete('Reference') + \ ggplot2.scale_fill_gradient2('log2 enrichment', low='darkblue', mid='white', high='darkred') # save to file grdevices.pdf(file=options.output_pdf) gp.plot() grdevices.dev_off()
def gray_plot(data, min=0, max=1, name=""): reshape = importr('reshape') gg = ggplot2.ggplot(reshape.melt(data,id_var=['x','y'])) pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \ ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \ ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \ ggplot2.coord_equal() + ggplot2.scale_x_continuous(name) return pg
def gray_plot(data, min=0, max=1, name=""): reshape = importr('reshape') gg = ggplot2.ggplot(reshape.melt(data, id_var=['x', 'y'])) pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \ ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \ ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \ ggplot2.coord_equal() + ggplot2.scale_x_continuous(name) return pg
def plot_similarity_matrix(self, item_type, image_file, title): '''Plot similarities of crawls (overlap of unique items) as heat map matrix''' data = defaultdict(dict) n = 1 for crawl1 in self.similarity[item_type]: for crawl2 in self.similarity[item_type][crawl1]: similarity = self.similarity[item_type][crawl1][crawl2] data['crawl1'][n] = MonthlyCrawl.short_name(crawl1) data['crawl2'][n] = MonthlyCrawl.short_name(crawl2) data['similarity'][n] = similarity data['sim_rounded'][n] = similarity # to be rounded n += 1 data = pandas.DataFrame(data) print(data) # select median of similarity values as midpoint of similarity scale midpoint = data['similarity'].median() decimals = 3 textsize = 2 minshown = .0005 if (data['similarity'].max()-data['similarity'].min()) > .2: decimals = 2 textsize = 2.8 minshown = .005 data['sim_rounded'] = data['sim_rounded'].apply( lambda x: ('{0:.'+str(decimals)+'f}').format(x).lstrip('0') if x >= minshown else '0') print('Median of similarities for', item_type, '=', midpoint) matrix_size = len(self.similarity[item_type]) if matrix_size > self.MAX_MATRIX_SIZE: n = 0 for crawl1 in sorted(self.similarity[item_type], reverse=True): short_name = MonthlyCrawl.short_name(crawl1) if n > self.MAX_MATRIX_SIZE: data = data[data['crawl1'] != short_name] data = data[data['crawl2'] != short_name] n += 1 p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x='crawl2', y='crawl1', fill='similarity', label='sim_rounded') \ + ggplot2.geom_tile(color="white") \ + ggplot2.scale_fill_gradient2(low="red", high="blue", mid="white", midpoint=midpoint, space="Lab") \ + GGPLOT2_THEME \ + ggplot2.coord_fixed() \ + ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle=45, vjust=1, hjust=1)}) \ + ggplot2.labs(title=title, x='', y='') \ + ggplot2.geom_text(color='black', size=textsize) img_path = os.path.join(PLOTDIR, image_file) p.save(img_path) return p
def main(): usage = 'usage: %prog [options] <mut1 file> <mut2 file>' parser = OptionParser(usage) parser.add_option( '-m', dest='mut_norm', action='store_true', default=False, help= 'Normalize by # mutations (as opposed to sequenced bp) [Default: %default]' ) parser.add_option('-o', dest='output_pdf', default='mut_cmp.pdf', help='Output pdf file for heatmap [Default: %default]') parser.add_option( '-r', dest='raw', action='store_true', default=False, help= 'Use raw mutation counts (as opposed to normalized for ACGT content) [Default: %default]' ) (options, args) = parser.parse_args() if len(args) != 2: parser.error(usage) else: mut1_file = args[0] mut2_file = args[1] mutation_profile1, seq_bp1 = parse_mutations(mut1_file, options.raw) mutation_profile2, seq_bp2 = parse_mutations(mut2_file, options.raw) relative_mutation_profile = compute_relative_profile( mutation_profile1, seq_bp1, mutation_profile2, seq_bp2) print_table(relative_mutation_profile) # make plotting data structures nts = ['_', 'A', 'C', 'G', 'T'] nts1 = [] nts2 = [] rel = [] for nt1 in nts: for nt2 in nts: nts1.append(nt1) nts2.append(nt2) rel.append(relative_mutation_profile[(nt1, nt2)]) nts1_r = ro.StrVector(nts1) nts2_r = ro.StrVector(nts2) rel_r = ro.FloatVector(rel) df = ro.DataFrame({'nt1': nts1_r, 'nt2': nts2_r, 'rel': rel_r}) # plot ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \ ggplot2.geom_tile() + \ ggplot2.scale_x_discrete(mut2_file, limits=nts) + \ ggplot2.scale_y_discrete(mut1_file, limits=nts) + \ ggplot2.scale_fill_gradient('Enrichment 1/2') ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \ ggplot2.geom_tile() + \ ggplot2.scale_x_discrete('Read') + \ ggplot2.scale_y_discrete('Reference') + \ ggplot2.scale_fill_gradient2('log2 enrichment', low='darkblue', mid='white', high='darkred') # save to file grdevices.pdf(file=options.output_pdf) gp.plot() grdevices.dev_off()