chromosome_list, text_output_path) min_dists_list = [] for min_dists_chr_list in min_dists.values(): min_dists_list.extend(min_dists_chr_list) lessthan2kb, btwn2kb10kb, grtrthan10kb = \ integrate_data.bin_distances(min_dists_list) # print len(lessthan2kb), len(btwn2kb10kb), len(grtrthan10kb) jplots.plot_binned_bar_graph(min_dists_list, bins_in_plot, xmin, xmax, 'Min Distance to %s' %genomedat_startcolname, 'Frequency', 'Tandem Duplications by Min Dist to %s' %genomedat_startcolname, plot_output_fname, autoxlim=autoxlim) largest_tss_dic = {} for k, l in all_tandem_dupes.iteritems(): largest_tss_dic[k] = max(l) print 'Largest distances by chromosome: ' print largest_tss_dic
if __name__ == '__main__': if len(sys.argv) < 2: print('Tandem data must be given on the command line.') sys.exit() tandem_fname = sys.argv[1] tandem_path = os.path.join(_input_dir, tandem_fname) gc_chr_pairlist = tandem_data.calculate_tandem_gc_content(tandem_path, 'sequence', 'chromosome_1', save_output, output_gc_content_colname, output_seq_length_colname) just_gc = [i[0] for i in gc_chr_pairlist] seq_lengths = [i[2] for i in gc_chr_pairlist] jplots.plot_binned_bar_graph(seq_lengths, 20, None, None, 'length_of_sequence (bases)', 'frequency', 'seq_length_across_tandem_dupes', save_path_length, autoxlim=True) jplots.plot_binned_bar_graph(just_gc, 30, 0, 1, 'gc_content', 'frequency', 'gc_content_across_tandem_dupes', save_path_gc)
tss_distances_list.extend(dist_list) lessthan2kb, btwn2kb10kb, grtrthan10kb = \ integrate_data.bin_distances(tss_distances_list) # print lessthan2kb, btwn2kb10kb, grtrthan10kb ''' with open(os.path.join(_output_dir, output_filename), 'wb') as writefile: outwriter = csv.writer(writefile, delimiter='\t') for dist in tss_distances_list: outwriter.writerow(dist) ''' jplots.plot_binned_bar_graph(tss_distances_list, bins_in_plot, xmin, xmax, 'distance from %s' %genome_startcolname, 'frequency', 'Random distribution of Distances from %s'\ 'across genome\n%s points per chromosome'\ %(genome_startcolname, numb_rands_per_chr), plot_output_fname, autoxlim=autoxlim)
print('Done for %s' %chromosome) print distances_dict print coordinates_dict print locations_dict # Convert dictionary to list distances = [] for dist_list in distances_dict.values(): distances.append(dist_list) # Flatten list distances = list_tools.flatten_list(distances) # Plot results jplots.plot_binned_bar_graph(distances, 100, -100000, 100000, 'Distance from Exon', 'Frequency', 'Title', save_path_name, autoxlim=True) print('Plot saved in %s' %save_path_name)
merged_tss_list = [] for chromosome in chromosome_list: genome_tss = genome_info.get_tss_locations(genome_path, 'chrom', 'txStart', chromosome, chromosome_list) dist_from_tss_list = tandem_data.get_tandem_tss_distribution(tandem_path, 'start_1', 'chromosome_1', genome_tss, chromosome, chromosome_list) merged_tss_list.extend(dist_from_tss_list) jplots.plot_binned_bar_graph(merged_tss_list, 100, 'Distance from a known TSS', 'Frequency', 'Distribution of filtered tandem duplications by TSS', os.path.join(_plot_dir, plot_output_fname)) total = len(merged_tss_list) abs_merged_tss_list = [abs(i) for i in merged_tss_list] print abs_merged_tss_list print len(abs_merged_tss_list) chunked_list = list_tools.chunks(sorted(abs_merged_tss_list), 48) print chunked_list print len(chunked_list) print [[min(i), max(i)] for i in chunked_list]