Exemplo n.º 1
0
def merge_ionstats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS,
                   basecaller_datasets):

    # Merge *ionstats_alignment.json files across blocks

    # DEBUG: check if merging is commutative

    try:
        # DEBUG
        composite_filename_list = []
        composite_h5_filename_list = []

        for dataset in basecaller_datasets["datasets"]:

            # filter out based on flag
            keep_dataset = False
            for rg_name in dataset["read_groups"]:
                if not basecaller_datasets["read_groups"][rg_name].get(
                        'filtered', False):
                    keep_dataset = True
            if not keep_dataset:
                printtime("INFO: filter out %s" % rg_name)
                continue

            read_group = dataset['read_groups'][0]
            reference = basecaller_datasets['read_groups'][read_group][
                'reference']

            if reference:
                ionstats_folder = ALIGNMENT_RESULTS
                ionstats_file = 'ionstats_alignment.json'
            else:
                ionstats_folder = BASECALLER_RESULTS
                ionstats_file = 'ionstats_basecaller.json'

            block_filename_list = [
                os.path.join(dir, ionstats_folder,
                             dataset['file_prefix'] + '.' + ionstats_file)
                for dir in dirs
            ]
            block_filename_list = [
                filename for filename in block_filename_list
                if os.path.exists(filename)
            ]  # TODO, remove this check and provide list with valid blocks
            composite_filename = os.path.join(
                ionstats_folder, dataset['file_prefix'] +
                '.composite_allblocks_' + ionstats_file)
            ionstats.reduce_stats(block_filename_list, composite_filename)
            composite_filename_list.append(composite_filename)

            if reference:
                block_h5_filename_list = [
                    os.path.join(
                        dir, ALIGNMENT_RESULTS,
                        dataset['file_prefix'] + '.ionstats_error_summary.h5')
                    for dir in dirs
                ]
                block_h5_filename_list = [
                    filename for filename in block_h5_filename_list
                    if os.path.exists(filename)
                ]  # TODO, remove this check and provide list with valid blocks
                composite_h5_filename = os.path.join(
                    ALIGNMENT_RESULTS,
                    dataset['file_prefix'] + '.ionstats_error_summary.h5')
                ionstats.reduce_stats_h5(block_h5_filename_list,
                                         composite_h5_filename)
                composite_h5_filename_list.append(composite_h5_filename)

        block_filename_list = [
            os.path.join(dir, ALIGNMENT_RESULTS, 'ionstats_alignment.json')
            for dir in dirs
        ]
        block_filename_list = [
            filename for filename in block_filename_list
            if os.path.exists(filename)
        ]
        composite_filename = os.path.join(
            ALIGNMENT_RESULTS, 'composite_allblocks_ionstats_alignment.json')
        ionstats.reduce_stats(block_filename_list, composite_filename)

        block_h5_filename_list = [
            os.path.join(dir, ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')
            for dir in dirs
        ]
        block_h5_filename_list = [
            filename for filename in block_h5_filename_list
            if os.path.exists(filename)
        ]
        composite_filename = os.path.join(
            ALIGNMENT_RESULTS,
            'ionstats_error_summary.h5')  # composite_allblocks
        if len(block_h5_filename_list):
            ionstats.reduce_stats_h5(block_h5_filename_list,
                                     composite_filename)

        # DEBUG: this is used to check if merging is commutative, the length check is necessary in case  all datasets are 'filtered' (e.g.)
        if len(composite_filename_list) > 0:
            ionstats.reduce_stats(
                composite_filename_list,
                os.path.join(ALIGNMENT_RESULTS,
                             'composite_allbarcodes_ionstats_alignment.json'))
        if len(composite_h5_filename_list) > 0:
            ionstats.reduce_stats_h5(
                composite_h5_filename_list,
                os.path.join(
                    ALIGNMENT_RESULTS,
                    'composite_allbarcodes_ionstats_error_summary.h5'))

    except:
        printtime("ERROR: Failed to merge ionstats_alignment.json")
        traceback.print_exc()
Exemplo n.º 2
0
def merge_ionstats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_datasets):

    # Merge *ionstats_alignment.json files across blocks

    # DEBUG: check if merging is commutative

    try:
        # DEBUG
        composite_filename_list = []
        composite_h5_filename_list = []

        for dataset in basecaller_datasets["datasets"]:

            # filter out based on flag
            keep_dataset = False
            for rg_name in dataset["read_groups"]:
                if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                    keep_dataset = True
            if not keep_dataset:
                printtime("INFO: filter out %s" % rg_name)
                continue

            read_group = dataset['read_groups'][0]
            reference = basecaller_datasets['read_groups'][read_group]['reference']

            if reference:
                ionstats_folder = ALIGNMENT_RESULTS
                ionstats_file = 'ionstats_alignment.json'
            else:
                ionstats_folder = BASECALLER_RESULTS
                ionstats_file = 'ionstats_basecaller.json'

            block_filename_list = [os.path.join(dir,ionstats_folder,dataset['file_prefix']+'.'+ionstats_file) for dir in dirs]
            block_filename_list = [filename for filename in block_filename_list if os.path.exists(filename)] # TODO, remove this check and provide list with valid blocks
            composite_filename = os.path.join(ionstats_folder, dataset['file_prefix']+'.composite_allblocks_'+ionstats_file)
            ionstats.reduce_stats(block_filename_list, composite_filename)
            composite_filename_list.append(composite_filename)

            if reference:
                block_h5_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,dataset['file_prefix']+'.ionstats_error_summary.h5') for dir in dirs]
                block_h5_filename_list = [filename for filename in block_h5_filename_list if os.path.exists(filename)]  # TODO, remove this check and provide list with valid blocks
                composite_h5_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5')
                ionstats.reduce_stats_h5(block_h5_filename_list, composite_h5_filename)
                composite_h5_filename_list.append(composite_h5_filename)


        block_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,'ionstats_alignment.json') for dir in dirs]
        block_filename_list = [filename for filename in block_filename_list if os.path.exists(filename)]
        composite_filename = os.path.join(ALIGNMENT_RESULTS, 'composite_allblocks_ionstats_alignment.json')
        ionstats.reduce_stats(block_filename_list, composite_filename)

        block_h5_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,'ionstats_error_summary.h5') for dir in dirs]
        block_h5_filename_list = [filename for filename in block_h5_filename_list if os.path.exists(filename)]
        composite_filename = os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') # composite_allblocks
        if len(block_h5_filename_list):
            ionstats.reduce_stats_h5(block_h5_filename_list, composite_filename)

        # DEBUG: this is used to check if merging is commutative, the length check is necessary in case  all datasets are 'filtered' (e.g.)
        if len(composite_filename_list) > 0:
            ionstats.reduce_stats(composite_filename_list,os.path.join(ALIGNMENT_RESULTS,'composite_allbarcodes_ionstats_alignment.json'))
        if len(composite_h5_filename_list) > 0:
            ionstats.reduce_stats_h5(composite_h5_filename_list,os.path.join(ALIGNMENT_RESULTS,'composite_allbarcodes_ionstats_error_summary.h5'))

    except:
        printtime("ERROR: Failed to merge ionstats_alignment.json")
        traceback.print_exc()
Exemplo n.º 3
0
def create_ionstats(BASECALLER_RESULTS, ALIGNMENT_RESULTS,
                    basecaller_meta_information, basecaller_datasets,
                    graph_max_x, activate_barcode_filter, evaluate_hp):

    # TEST
    basecaller_bam_file_list = []
    alignment_bam_file_list = []

    ionstats_alignment_file_list = []
    if evaluate_hp:
        ionstats_alignment_h5_file_list = []

    ionstats_basecaller_file_list = []

    for dataset in basecaller_datasets["datasets"]:

        keep_dataset = False
        for rg_name in dataset["read_groups"]:
            if not basecaller_datasets["read_groups"][rg_name].get(
                    'filtered', False):
                keep_dataset = True
        filtered = not keep_dataset

        # filter out based on flag
        if activate_barcode_filter:
            if filtered:
                continue

        # skip non-existing bam file
        if int(dataset["read_count"]) == 0:
            continue

        read_group = dataset['read_groups'][0]
        reference = basecaller_datasets['read_groups'][read_group]['reference']
        if reference and not filtered:

            # TEST
            alignment_bam_file_list.append(
                os.path.join(ALIGNMENT_RESULTS,
                             dataset['file_prefix'] + '.bam'))

            ionstats.generate_ionstats_alignment(
                [
                    os.path.join(ALIGNMENT_RESULTS,
                                 dataset['file_prefix'] + '.bam')
                ],
                os.path.join(
                    ALIGNMENT_RESULTS,
                    dataset['file_prefix'] + '.ionstats_alignment.json'),
                os.path.join(
                    ALIGNMENT_RESULTS, dataset['file_prefix'] +
                    '.ionstats_error_summary.h5') if evaluate_hp else None,
                basecaller_meta_information if evaluate_hp else None,
                graph_max_x)

            ionstats_alignment_file_list.append(
                os.path.join(
                    ALIGNMENT_RESULTS,
                    dataset['file_prefix'] + '.ionstats_alignment.json'))
            if evaluate_hp:
                ionstats_alignment_h5_file_list.append(
                    os.path.join(
                        ALIGNMENT_RESULTS,
                        dataset['file_prefix'] + '.ionstats_error_summary.h5'))
        else:

            # TEST
            basecaller_bam_file_list.append(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            ionstats.generate_ionstats_basecaller(
                [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])],
                os.path.join(
                    BASECALLER_RESULTS,
                    dataset['file_prefix'] + '.ionstats_basecaller.json'),
                graph_max_x)

            ionstats_basecaller_file_list.append(
                os.path.join(
                    BASECALLER_RESULTS,
                    dataset['file_prefix'] + '.ionstats_basecaller.json'))

    # Merge ionstats files from individual (barcoded) datasets
    if len(ionstats_alignment_file_list) > 0:
        ionstats.reduce_stats(
            ionstats_alignment_file_list,
            os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'))
    else:  # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd, shell=True)
            if ret != 1:
                printtime(
                    "ERROR: empty bam file generation failed, return code: %d"
                    % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_alignment(
                ['empty_dummy.bam'],
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')
                if evaluate_hp else None,
                basecaller_meta_information if evaluate_hp else None,
                graph_max_x)

        except:
            pass

    if len(ionstats_basecaller_file_list) > 0:
        ionstats.reduce_stats(
            ionstats_basecaller_file_list,
            os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'))
    else:  # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd, shell=True)
            if ret != 1:
                printtime(
                    "ERROR: empty bam file generation failed, return code: %d"
                    % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_basecaller(
                ['empty_dummy.bam'],
                os.path.join(BASECALLER_RESULTS,
                             'ionstats_tmp_basecaller.json'), graph_max_x)
        except:
            pass

    ionstatslist = []
    a = os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json')
    b = os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json')
    if os.path.exists(a):
        ionstatslist.append(a)
    if os.path.exists(b):
        ionstatslist.append(b)
    if len(ionstatslist) > 0:
        ionstats.reduce_stats(
            ionstatslist,
            os.path.join(BASECALLER_RESULTS,
                         'ionstats_basecaller_with_aligninfos.json'))
        ionstats.reduce_stats(
            reversed(ionstatslist),
            os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json'))
    if evaluate_hp and len(ionstats_alignment_h5_file_list
                           ) > 0 and basecaller_meta_information:
        ionstats.reduce_stats_h5(
            ionstats_alignment_h5_file_list,
            os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'))
    '''
Exemplo n.º 4
0
def create_ionstats(
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        basecaller_meta_information,
        basecaller_datasets,
        graph_max_x,
        activate_barcode_filter):

    # TEST
    basecaller_bam_file_list = []
    alignment_bam_file_list = []


    ionstats_alignment_file_list = []
    ionstats_alignment_h5_file_list = []

    ionstats_basecaller_file_list = []

    for dataset in basecaller_datasets["datasets"]:

        keep_dataset = False
        for rg_name in dataset["read_groups"]:
            if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                keep_dataset = True
        filtered = not keep_dataset

        # filter out based on flag
        if activate_barcode_filter:
            if filtered:
                continue

        # skip non-existing bam file
        if int(dataset["read_count"]) == 0:
            continue

        read_group = dataset['read_groups'][0]
        reference = basecaller_datasets['read_groups'][read_group]['reference']
        if reference and not filtered:

            # TEST
            alignment_bam_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'))

            ionstats.generate_ionstats_alignment(
                [os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')],
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'),
                basecaller_meta_information,
                graph_max_x)

            ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))
            ionstats_alignment_h5_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'))
        else:

            # TEST
            basecaller_bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            ionstats.generate_ionstats_basecaller(
                [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])],
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), # TODO, not needed
                basecaller_meta_information,
                graph_max_x)

            ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))


    # Merge ionstats files from individual (barcoded) datasets
    if len(ionstats_alignment_file_list) > 0:
        ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))
    else: # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
            if ret != 1:
                printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_alignment(
                ['empty_dummy.bam'],
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'),
                basecaller_meta_information,
                graph_max_x)

        except:
            pass

    if len(ionstats_basecaller_file_list) > 0:
        ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json'))
    else: # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
            if ret != 1:
                printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_basecaller(
                ['empty_dummy.bam'],
                os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_error_summary.h5'), # TODO, not needed
                basecaller_meta_information,
                graph_max_x)
        except:
            pass


    ionstatslist = []
    a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')
    b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')
    if os.path.exists(a):
        ionstatslist.append(a)
    if os.path.exists(b):
        ionstatslist.append(b)
    if len(ionstatslist) > 0:
        ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json'))
        ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
    if len(ionstats_alignment_h5_file_list) > 0 and basecaller_meta_information:
        ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5'))

    '''