Пример #1
0
def prepare_plot_data(samples):

    bar_plot_data = {}

    for sample in samples:

        in_file = prepare_file_name(sample, pt, 'sample_', '.bam')
        st = pysam.AlignmentFile(in_file, "rb")

        uniq_dist, multi_dist, _ = umi_dist(st)

        x1, y1 = bar_prep(uniq_dist)
        x2, y2 = bar_prep(multi_dist)

        bar_plot_data.update({sample: (x1, y1, x2, y2)})
Пример #2
0
    unique_savers_dist(savers, mm_hashtable, uniq_hashtable, draw_bar=True)
'''

if __name__ == "__main__":

    logging.basicConfig(level=logging.INFO)
    logger.info('Call to rank1 multimap solver package.')

    path = '/data/UMI/data/MUS/R1/'
    pt = '/data/UMI/data/MUS/'

    file_names = []
    samples = ['GACCGC']
    for sample in samples:

        f1 = prepare_file_name(sample, pt, 'sample_', '.bam')
        f2 = prepare_file_name(sample, path, 'u_tab_', '.pkl')
        f3 = prepare_file_name(sample, path, 'u_map_', '.pkl')
        f4 = prepare_file_name(sample, path, 'm_tab_', '.pkl')

        file_names.append((sample, f1, f2, f3, f4))

    for sample, f1, f2, f3, f4 in file_names:

        st = pysam.AlignmentFile(f1, "rb")

        u_tab = pickle.load(open(f2, 'r'))
        u_map = pickle.load(open(f3, 'r'))
        m_tab = pickle.load(open(f4, 'r'))
        '''
        m_tab = build_multimapping_hashtable(st, sample)
Пример #3
0
    logger.info('Call to UMI distribution report module. \n')

    logger.info('UMI distribution report for %d samples.' % len(samples))
    logger.info('Organism : %s\n' % organism)

    sample_counter = 1

    for sample in samples:
        logger.info('\n')
        logger.info('-' * 100)
        logger.info('Sample no. %d, cellular barcode : %s' %
                    (sample_counter, sample))
        logger.info('-' * 100)
        logger.info('\n')

        f1 = prepare_file_name(sample, path, 'summ_ut_', '.pkl')
        f2 = prepare_file_name(sample, path, 'summ_mt_', '.pkl')
        f3 = prepare_file_name(sample, path, 'summ_cnt_', '.pkl')

        uniq_summary = pickle.load(open(f1, 'r'))
        multi_summary = pickle.load(open(f2, 'r'))
        multi_stat_summary = pickle.load(open(f3, 'r'))

        logger.info('UMI distribution among unique reads:\n')
        logger.info('-' * 40)
        distribution_report(uniq_summary)
        logger.info('\n')
        logger.info('UMI distribution among multi reads:\n')
        logger.info('-' * 40)
        multi_read_stats_report(multi_stat_summary)
        distribution_report(multi_summary)
Пример #4
0
if __name__ == "__main__":

    logging.basicConfig(level=logging.INFO)

    fh = logging.FileHandler('/data/UMI/data/MUS/DT/isoDmappedReport.log')
    fh.setLevel(logging.INFO)
    logger.addHandler(fh)

    logger.info('Call to double_mapped solver module. \n')

    logger.info('Double_mapped multi reads report for %d samples.' %
                len(samples))
    logger.info('Organism : %s\n' % organism)

    samples = ['CCGGAC']
    sample_counter = 1

    for sample in samples:
        logger.info('\n')
        logger.info('-' * 100)
        logger.info('Sample no. %d, cellular barcode : %s' %
                    (sample_counter, sample))
        logger.info('-' * 100)
        logger.info('\n')

        in_file = prepare_file_name(sample, path, 'sample_', '.bam')
        pysam_iter = pysam.AlignmentFile(in_file, "rb")
        iso_report(pysam_iter)

        sample_counter += 1
Пример #5
0
if __name__ == "__main__":

    logging.basicConfig(level=logging.INFO)
    fh = logging.FileHandler('/data/UMI/data/MUS/RP/RepeatDistReport.log')
    fh.setLevel(logging.INFO)
    logger.addHandler(fh)

    logger.info('Call to Repeat Solver Library.')

    logger.info('Repeat distribution report for %d samples.' % len(samples))
    logger.info('Organism : %s\n' % organism)

    file_names = []
    for sample in samples:

        f1 = prepare_file_name(sample, pt, 'sample_', '.bam')
        f2 = prepare_file_name(sample, path, 'r2r_', '.pkl')
        file_names.append((f1, f2))

    counter = 1

    for in_file, out_file in file_names:
        logger.info('\n')
        logger.info('-' * 100)
        logger.info('Sample no. %d : %s' % (counter, in_file))
        logger.info('-' * 100)
        logger.info('\n')

        start_time = time.time()

        st = pysam.AlignmentFile(in_file, "rb")