Exemplo n.º 1
0
def post_basecalling(BASECALLER_RESULTS,expName,resultsName,flows):

    datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")

    if not os.path.exists(datasets_basecaller_path):
        printtime("ERROR: %s does not exist" % datasets_basecaller_path)
        raise Exception("ERROR: %s does not exist" % datasets_basecaller_path)
    
    datasets_basecaller = {}
    try:
        f = open(datasets_basecaller_path,'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % datasets_basecaller_path)
        raise Exception("ERROR: problem parsing %s" % datasets_basecaller_path)

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    quality_file_list = []
    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue
                
        # Call ionstats utility to generate alignment-independent metrics for current unmapped BAM
        ionstats.generate_ionstats_basecaller(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                graph_max_x)
        
        # Plot read length sparkline
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)
        
        quality_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))
        
    # Merge ionstats_basecaller files from individual barcodes/dataset
    ionstats.reduce_stats(quality_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))

    # Generate legacy stats file: quality.summary
    ionstats.generate_legacy_basecaller_files(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,''))

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))

    printtime("Finished basecaller post processing")
Exemplo n.º 2
0
def create_ionstats(
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        basecaller_meta_information,
        basecaller_datasets,
        graph_max_x,
        activate_barcode_filter):

    # TEST
    basecaller_bam_file_list = []
    alignment_bam_file_list = []


    ionstats_alignment_file_list = []
    ionstats_alignment_h5_file_list = []

    ionstats_basecaller_file_list = []

    for dataset in basecaller_datasets["datasets"]:

        keep_dataset = False
        for rg_name in dataset["read_groups"]:
            if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                keep_dataset = True
        filtered = not keep_dataset

        # filter out based on flag
        if activate_barcode_filter:
            if filtered:
                continue

        # skip non-existing bam file
        if int(dataset["read_count"]) == 0:
            continue

        read_group = dataset['read_groups'][0]
        reference = basecaller_datasets['read_groups'][read_group]['reference']
        if reference and not filtered:

            # TEST
            alignment_bam_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'))

            ionstats.generate_ionstats_alignment(
                [os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')],
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'),
                basecaller_meta_information,
                graph_max_x)

            ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))
            ionstats_alignment_h5_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'))
        else:

            # TEST
            basecaller_bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            ionstats.generate_ionstats_basecaller(
                [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])],
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), # TODO, not needed
                basecaller_meta_information,
                graph_max_x)

            ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))


    # Merge ionstats files from individual (barcoded) datasets
    if len(ionstats_alignment_file_list) > 0:
        ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))
    else: # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
            if ret != 1:
                printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_alignment(
                ['empty_dummy.bam'],
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'),
                basecaller_meta_information,
                graph_max_x)

        except:
            pass

    if len(ionstats_basecaller_file_list) > 0:
        ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json'))
    else: # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
            if ret != 1:
                printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_basecaller(
                ['empty_dummy.bam'],
                os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_error_summary.h5'), # TODO, not needed
                basecaller_meta_information,
                graph_max_x)
        except:
            pass


    ionstatslist = []
    a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')
    b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')
    if os.path.exists(a):
        ionstatslist.append(a)
    if os.path.exists(b):
        ionstatslist.append(b)
    if len(ionstatslist) > 0:
        ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json'))
        ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
    if len(ionstats_alignment_h5_file_list) > 0 and basecaller_meta_information:
        ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5'))

    '''