def prepare_plot_data(samples): bar_plot_data = {} for sample in samples: in_file = prepare_file_name(sample, pt, 'sample_', '.bam') st = pysam.AlignmentFile(in_file, "rb") uniq_dist, multi_dist, _ = umi_dist(st) x1, y1 = bar_prep(uniq_dist) x2, y2 = bar_prep(multi_dist) bar_plot_data.update({sample: (x1, y1, x2, y2)})
unique_savers_dist(savers, mm_hashtable, uniq_hashtable, draw_bar=True) ''' if __name__ == "__main__": logging.basicConfig(level=logging.INFO) logger.info('Call to rank1 multimap solver package.') path = '/data/UMI/data/MUS/R1/' pt = '/data/UMI/data/MUS/' file_names = [] samples = ['GACCGC'] for sample in samples: f1 = prepare_file_name(sample, pt, 'sample_', '.bam') f2 = prepare_file_name(sample, path, 'u_tab_', '.pkl') f3 = prepare_file_name(sample, path, 'u_map_', '.pkl') f4 = prepare_file_name(sample, path, 'm_tab_', '.pkl') file_names.append((sample, f1, f2, f3, f4)) for sample, f1, f2, f3, f4 in file_names: st = pysam.AlignmentFile(f1, "rb") u_tab = pickle.load(open(f2, 'r')) u_map = pickle.load(open(f3, 'r')) m_tab = pickle.load(open(f4, 'r')) ''' m_tab = build_multimapping_hashtable(st, sample)
logger.info('Call to UMI distribution report module. \n') logger.info('UMI distribution report for %d samples.' % len(samples)) logger.info('Organism : %s\n' % organism) sample_counter = 1 for sample in samples: logger.info('\n') logger.info('-' * 100) logger.info('Sample no. %d, cellular barcode : %s' % (sample_counter, sample)) logger.info('-' * 100) logger.info('\n') f1 = prepare_file_name(sample, path, 'summ_ut_', '.pkl') f2 = prepare_file_name(sample, path, 'summ_mt_', '.pkl') f3 = prepare_file_name(sample, path, 'summ_cnt_', '.pkl') uniq_summary = pickle.load(open(f1, 'r')) multi_summary = pickle.load(open(f2, 'r')) multi_stat_summary = pickle.load(open(f3, 'r')) logger.info('UMI distribution among unique reads:\n') logger.info('-' * 40) distribution_report(uniq_summary) logger.info('\n') logger.info('UMI distribution among multi reads:\n') logger.info('-' * 40) multi_read_stats_report(multi_stat_summary) distribution_report(multi_summary)
if __name__ == "__main__": logging.basicConfig(level=logging.INFO) fh = logging.FileHandler('/data/UMI/data/MUS/DT/isoDmappedReport.log') fh.setLevel(logging.INFO) logger.addHandler(fh) logger.info('Call to double_mapped solver module. \n') logger.info('Double_mapped multi reads report for %d samples.' % len(samples)) logger.info('Organism : %s\n' % organism) samples = ['CCGGAC'] sample_counter = 1 for sample in samples: logger.info('\n') logger.info('-' * 100) logger.info('Sample no. %d, cellular barcode : %s' % (sample_counter, sample)) logger.info('-' * 100) logger.info('\n') in_file = prepare_file_name(sample, path, 'sample_', '.bam') pysam_iter = pysam.AlignmentFile(in_file, "rb") iso_report(pysam_iter) sample_counter += 1
if __name__ == "__main__": logging.basicConfig(level=logging.INFO) fh = logging.FileHandler('/data/UMI/data/MUS/RP/RepeatDistReport.log') fh.setLevel(logging.INFO) logger.addHandler(fh) logger.info('Call to Repeat Solver Library.') logger.info('Repeat distribution report for %d samples.' % len(samples)) logger.info('Organism : %s\n' % organism) file_names = [] for sample in samples: f1 = prepare_file_name(sample, pt, 'sample_', '.bam') f2 = prepare_file_name(sample, path, 'r2r_', '.pkl') file_names.append((f1, f2)) counter = 1 for in_file, out_file in file_names: logger.info('\n') logger.info('-' * 100) logger.info('Sample no. %d : %s' % (counter, in_file)) logger.info('-' * 100) logger.info('\n') start_time = time.time() st = pysam.AlignmentFile(in_file, "rb")