Exemplo n.º 1
0
def check_format_ad_dp_tags(vcf, pcgr_directory, config_options, logger):

    found_taf_tag = 0
    found_tdp_tag = 0
    found_naf_tag = 0
    found_ndp_tag = 0
    found_call_conf_tag = 0

    tumor_dp_tag = config_options['allelic_support']['tumor_dp_tag']
    tumor_af_tag = config_options['allelic_support']['tumor_af_tag']
    normal_dp_tag = config_options['allelic_support']['normal_dp_tag']
    normal_af_tag = config_options['allelic_support']['normal_af_tag']
    call_conf_tag = config_options['allelic_support']['call_conf_tag']

    annoutils.detect_reserved_info_tag(tumor_dp_tag, 'tumor_dp_tag', logger)
    annoutils.detect_reserved_info_tag(normal_dp_tag, 'normal_dp_tag', logger)
    annoutils.detect_reserved_info_tag(tumor_af_tag, 'tumor_af_tag', logger)
    annoutils.detect_reserved_info_tag(normal_af_tag, 'normal_af_tag', logger)
    annoutils.detect_reserved_info_tag(call_conf_tag, 'call_conf_tag', logger)

    for e in vcf.header_iter():
        header_element = e.info()
        if 'ID' in header_element.keys(
        ) and 'HeaderType' in header_element.keys():
            if header_element['HeaderType'] == 'INFO':
                if header_element['ID'] == tumor_dp_tag:
                    if header_element['Type'] == 'Integer':
                        logger.info(
                            'Found INFO tag for tumor variant sequencing depth (tumor_dp_tag '
                            + str(tumor_dp_tag) + ') in input VCF')
                        found_tdp_tag = 1
                    else:
                        err_msg = 'INFO tag for tumor variant sequencing depth (tumor_dp_tag ' + str(
                            tumor_dp_tag
                        ) + ') is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']
                        ) + '), should be Type=Integer'
                        return pcgr_error_message(err_msg, logger)
                if header_element['ID'] == tumor_af_tag:
                    if header_element['Type'] == 'Float':
                        logger.info(
                            'Found INFO tag for tumor variant allelic fraction (tumor_af_tag '
                            + str(tumor_af_tag) + ') in input VCF')
                        found_taf_tag = 1
                    else:
                        err_msg = 'INFO tag for tumor variant allelic fraction (tumor_af_tag ' + str(
                            tumor_af_tag
                        ) + ') is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']) + '), should be Type=Float'
                        return pcgr_error_message(err_msg, logger)
                if header_element['ID'] == normal_dp_tag:
                    if header_element['Type'] == 'Integer':
                        logger.info(
                            'Found INFO tag for normal/control variant sequencing depth (normal_dp_tag '
                            + str(normal_dp_tag) + ') in input VCF')
                        found_ndp_tag = 1
                    else:
                        err_msg = 'INFO tag for normal/control variant sequencing depth (normal_dp_tag ' + str(
                            normal_dp_tag
                        ) + ') is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']
                        ) + '), should be Type=Integer'
                        return pcgr_error_message(err_msg, logger)
                if header_element['ID'] == normal_af_tag:
                    if header_element['Type'] == 'Float':
                        logger.info(
                            'Found INFO tag for normal/control allelic fraction (normal_af_tag '
                            + str(normal_af_tag) + ') in input VCF')
                        found_naf_tag = 1
                    else:
                        err_msg = 'INFO tag for for normal/control allelic fraction (normal_af_tag ' + str(
                            normal_af_tag
                        ) + ') is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']) + ') should be Type=Float'
                        return pcgr_error_message(err_msg, logger)
                if header_element['ID'] == call_conf_tag:
                    if header_element['Type'] == 'String':
                        logger.info(
                            'Found INFO tag for variant call confidence (call_conf_tag '
                            + str(call_conf_tag) + ') in input VCF')
                        found_call_conf_tag = 1
                    else:
                        err_msg = 'INFO tag for variant call confidence (call_conf_tag) is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']
                        ) + '), should be Type=String'
                        return pcgr_error_message(err_msg, logger)

    if call_conf_tag != '' and found_call_conf_tag == 0:
        logger.warn('Could not find the specified call_conf_tag (' +
                    str(call_conf_tag) + ') in INFO column of input VCF')
    if tumor_dp_tag != '' and found_tdp_tag == 0:
        logger.warn('Could not find the specified tumor_dp_tag (' +
                    str(tumor_dp_tag) + ') in INFO column of input VCF')
    if tumor_af_tag != '' and found_taf_tag == 0:
        logger.warn('Could not find the specified tumor_af_tag (' +
                    str(tumor_af_tag) + ') in INFO column of input VCF')
    if normal_dp_tag != '' and found_ndp_tag == 0:
        logger.warn('Could not find the specified normal_dp_tag (' +
                    str(normal_dp_tag) + ') in INFO column of input VCF')
    if normal_af_tag != '' and found_naf_tag == 0:
        logger.warn('Could not find the specified normal_af_tag (' +
                    str(normal_af_tag) + ') in INFO column of input VCF')

    if found_tdp_tag == 1 and found_taf_tag == 0:
        logger.warn(
            'BOTH \' tumor_dp_tag\' AND \' tumor_af_tag\' need to be specified for use in tumor report (\'tumor_af_tag\' is missing)'
        )

    if found_tdp_tag == 0 and found_taf_tag == 1:
        logger.warn(
            'BOTH \'tumor_dp_tag\' AND \'tumor_af_tag\' need to be specified for use in tumor report (\'tumor_dp_tag\' is missing)'
        )

    if found_ndp_tag == 1 and found_naf_tag == 0:
        logger.warn(
            'BOTH \'normal_dp_tag\' AND \'normal_af_tag\' need to be specified for use in tumor report (\'normal_af_tag\' is missing)'
        )

    if found_ndp_tag == 0 and found_naf_tag == 1:
        logger.warn(
            'BOTH \'normal_dp_tag\' AND \'normal_af_tag\' need to be specified for use in tumor report (\'normal_dp_tag\' is missing)'
        )

    return 0
Exemplo n.º 2
0
def check_format_ad_dp_tags(vcf, pcgr_directory, config_options, logger):

    found_taf_tag = 0
    found_tdp_tag = 0
    found_naf_tag = 0
    found_ndp_tag = 0
    found_call_conf_tag = 0

    tumor_dp_tag = config_options['allelic_support']['tumor_dp_tag']
    tumor_af_tag = config_options['allelic_support']['tumor_af_tag']
    control_dp_tag = config_options['allelic_support']['control_dp_tag']
    control_af_tag = config_options['allelic_support']['control_af_tag']
    call_conf_tag = config_options['allelic_support']['call_conf_tag']

    annoutils.detect_reserved_info_tag(tumor_dp_tag, 'tumor_dp_tag', logger)
    annoutils.detect_reserved_info_tag(control_dp_tag, 'control_dp_tag',
                                       logger)
    annoutils.detect_reserved_info_tag(tumor_af_tag, 'tumor_af_tag', logger)
    annoutils.detect_reserved_info_tag(control_af_tag, 'control_af_tag',
                                       logger)
    annoutils.detect_reserved_info_tag(call_conf_tag, 'call_conf_tag', logger)

    for e in vcf.header_iter():
        header_element = e.info()
        if 'ID' in header_element.keys(
        ) and 'HeaderType' in header_element.keys():
            if header_element['HeaderType'] == 'INFO':
                if header_element['ID'] == tumor_dp_tag:
                    if header_element['Type'] == 'Integer':
                        logger.info(
                            'Found INFO tag for tumor variant sequencing depth (tumor_dp_tag '
                            + str(tumor_dp_tag) + ') in input VCF')
                        found_tdp_tag = 1
                    else:
                        err_msg = 'INFO tag for tumor variant sequencing depth (tumor_dp_tag ' + str(
                            tumor_dp_tag
                        ) + ') is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']
                        ) + '), should be Type=Integer'
                        return pcgr_error_message(err_msg, logger)
                if header_element['ID'] == tumor_af_tag:
                    if header_element['Type'] == 'Float':
                        logger.info(
                            'Found INFO tag for tumor variant allelic fraction (tumor_af_tag '
                            + str(tumor_af_tag) + ') in input VCF')
                        found_taf_tag = 1
                    else:
                        err_msg = 'INFO tag for tumor variant allelic fraction (tumor_af_tag ' + str(
                            tumor_af_tag
                        ) + ') is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']) + '), should be Type=Float'
                        return pcgr_error_message(err_msg, logger)
                if header_element['ID'] == control_dp_tag:
                    if header_element['Type'] == 'Integer':
                        logger.info(
                            'Found INFO tag for normal/control variant sequencing depth (control_dp_tag '
                            + str(control_dp_tag) + ') in input VCF')
                        found_ndp_tag = 1
                    else:
                        err_msg = 'INFO tag for normal/control variant sequencing depth (control_dp_tag ' + str(
                            control_dp_tag
                        ) + ') is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']
                        ) + '), should be Type=Integer'
                        return pcgr_error_message(err_msg, logger)
                if header_element['ID'] == control_af_tag:
                    if header_element['Type'] == 'Float':
                        logger.info(
                            'Found INFO tag for normal/control allelic fraction (control_af_tag '
                            + str(control_af_tag) + ') in input VCF')
                        found_naf_tag = 1
                    else:
                        err_msg = 'INFO tag for for normal/control allelic fraction (control_af_tag ' + str(
                            control_af_tag
                        ) + ') is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']) + ') should be Type=Float'
                        return pcgr_error_message(err_msg, logger)
                if header_element['ID'] == call_conf_tag:
                    if header_element['Type'] == 'String':
                        logger.info(
                            'Found INFO tag for variant call confidence (call_conf_tag '
                            + str(call_conf_tag) + ') in input VCF')
                        found_call_conf_tag = 1
                    else:
                        err_msg = 'INFO tag for variant call confidence (call_conf_tag) is not correctly specified in input VCF (Type=' + str(
                            header_element['Type']
                        ) + '), should be Type=String'
                        return pcgr_error_message(err_msg, logger)

    if call_conf_tag != '' and found_call_conf_tag == 0:
        logger.warn('Could not find the specified call_conf_tag (' +
                    str(call_conf_tag) + ') in INFO column of input VCF')
    if tumor_dp_tag != '' and found_tdp_tag == 0:
        logger.warn('Could not find the specified tumor_dp_tag (' +
                    str(tumor_dp_tag) + ') in INFO column of input VCF')
    if tumor_af_tag != '' and found_taf_tag == 0:
        logger.warn('Could not find the specified tumor_af_tag (' +
                    str(tumor_af_tag) + ') in INFO column of input VCF')
    if control_dp_tag != '' and found_ndp_tag == 0:
        logger.warn('Could not find the specified control_dp_tag (' +
                    str(control_dp_tag) + ') in INFO column of input VCF')
    if control_af_tag != '' and found_naf_tag == 0:
        logger.warn('Could not find the specified control_af_tag (' +
                    str(control_af_tag) + ') in INFO column of input VCF')

    if config_options['tumor_only'][
            'exclude_likely_hom_germline'] is True and config_options[
                'tumor_only']['vcf_tumor_only'] is True and found_taf_tag == 0:
        logger.warn(
            'Could not find the specified tumor_af_tag (' + str(tumor_af_tag) +
            ') in INFO column of input VCF - filtering of homozygous germline variants in tumor-only mode will be ignored'
        )

    if config_options['tumor_only'][
            'exclude_likely_het_germline'] is True and config_options[
                'tumor_only']['vcf_tumor_only'] is True and found_taf_tag == 0:
        logger.warn(
            'Could not find the specified tumor_af_tag (' + str(tumor_af_tag) +
            ') in INFO column of input VCF - filtering of heterozygous germline variants in tumor-only mode will be ignored'
        )

    if found_tdp_tag == 1 and found_taf_tag == 0:
        logger.warn(
            'BOTH \' tumor_dp_tag\' AND \' tumor_af_tag\' need to be specified for use in tumor report (\'tumor_af_tag\' is missing)'
        )

    if found_tdp_tag == 0 and found_taf_tag == 1:
        logger.warn(
            'BOTH \'tumor_dp_tag\' AND \'tumor_af_tag\' need to be specified for use in tumor report (\'tumor_dp_tag\' is missing)'
        )

    if found_ndp_tag == 1 and found_naf_tag == 0:
        logger.warn(
            'BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_af_tag\' is missing)'
        )

    if found_ndp_tag == 0 and found_naf_tag == 1:
        logger.warn(
            'BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_dp_tag\' is missing)'
        )

    ## if filtering turned on for AF-based tumor-only filtering, return error if TVAF not defined

    return 0
Exemplo n.º 3
0
def check_format_ad_dp_tags(vcf,
                           tumor_dp_tag,
                           tumor_af_tag,
                           control_dp_tag,
                           control_af_tag,
                           call_conf_tag,
                           exclude_hom_germline,
                           exclude_het_germline,
                           tumor_only,
                           logger):

    """
    Function that checks whether the INFO tags specified for depth/allelic fraction are correctly formatted in the VCF header (i.e. Type)
    """

    found_taf_tag = 0
    found_tdp_tag = 0
    found_naf_tag = 0
    found_ndp_tag = 0
    found_call_conf_tag = 0

    annoutils.detect_reserved_info_tag(tumor_dp_tag,'tumor_dp_tag', logger)
    annoutils.detect_reserved_info_tag(control_dp_tag,'control_dp_tag', logger)
    annoutils.detect_reserved_info_tag(tumor_af_tag,'tumor_af_tag', logger)
    annoutils.detect_reserved_info_tag(control_af_tag,'control_af_tag', logger)
    annoutils.detect_reserved_info_tag(call_conf_tag,'call_conf_tag', logger)

    for e in vcf.header_iter():
        header_element = e.info()
        if 'ID' in header_element.keys() and 'HeaderType' in header_element.keys():
            if header_element['HeaderType'] == 'INFO':
                if header_element['ID'] == tumor_dp_tag:
                    if header_element['Type'] == 'Integer':
                        logger.info(f'Found INFO tag for tumor variant sequencing depth (tumor_dp_tag {tumor_dp_tag}) in input VCF')
                        found_tdp_tag = 1
                    else:
                        err_msg = f'INFO tag for tumor variant sequencing depth (tumor_dp_tag {tumor_dp_tag}) is not correctly specified in input VCF (Type={header_element["Type"]}), should be Type=Integer'
                        return error_message(err_msg, logger)
                if header_element['ID'] == tumor_af_tag:
                    if header_element['Type'] == 'Float':
                        logger.info(f'Found INFO tag for tumor variant allelic fraction (tumor_af_tag {tumor_af_tag}) in input VCF')
                        found_taf_tag = 1
                    else:
                        err_msg = f'INFO tag for tumor variant allelic fraction (tumor_af_tag {tumor_af_tag}) is not correctly specified in input VCF (Type={header_element["Type"]}), should be Type=Float'
                        return error_message(err_msg, logger)
                if header_element['ID'] == control_dp_tag:
                    if header_element['Type'] == 'Integer':
                        logger.info(f'Found INFO tag for normal/control variant sequencing depth (control_dp_tag {control_dp_tag}) in input VCF')
                        found_ndp_tag = 1
                    else:
                        err_msg = f'INFO tag for normal/control variant sequencing depth (control_dp_tag {control_dp_tag}) is not correctly specified in input VCF (Type={header_element["Type"]}), should be Type=Integer'
                        return error_message(err_msg, logger)
                if header_element['ID'] == control_af_tag:
                    if header_element['Type'] == 'Float':
                        logger.info(f'Found INFO tag for normal/control allelic fraction (control_af_tag {control_af_tag}) in input VCF')
                        found_naf_tag = 1
                    else:
                        err_msg = f'INFO tag for for normal/control allelic fraction (control_af_tag {control_af_tag}) is not correctly specified in input VCF (Type={header_element["Type"]}) should be Type=Float'
                        return error_message(err_msg, logger)
                if header_element['ID'] == call_conf_tag:
                    if header_element['Type'] == 'String':
                        logger.info(f'Found INFO tag for variant call confidence (call_conf_tag {call_conf_tag}) in input VCF')
                        found_call_conf_tag = 1
                    else:
                        err_msg = f'INFO tag for variant call confidence (call_conf_tag) is not correctly specified in input VCF (Type={header_element["Type"]}), should be Type=String'
                        return error_message(err_msg, logger)


    if call_conf_tag != '_NA_' and found_call_conf_tag == 0:
        logger.warning(f"Could not find the specified call_conf_tag ('{call_conf_tag}') in INFO column of input VCF")
    if tumor_dp_tag != '_NA_' and found_tdp_tag == 0:
        logger.warning(f"Could not find the specified tumor_dp_tag ('{tumor_dp_tag}') in INFO column of input VCF")
    if tumor_af_tag != '_NA_' and found_taf_tag == 0:
        logger.warning(f"Could not find the specified tumor_af_tag ('{tumor_af_tag}') in INFO column of input VCF")
    if control_dp_tag != '_NA_' and found_ndp_tag == 0:
        logger.warning(f"Could not find the specified control_dp_tag ('{control_dp_tag}') in INFO column of input VCF")
    if control_af_tag != '_NA_' and found_naf_tag == 0:
        logger.warning(f"Could not find the specified control_af_tag ('{control_af_tag}') in INFO column of input VCF")

    if exclude_hom_germline is True and tumor_only == 1 and found_taf_tag == 0:
        logger.warning(f"Could not find the specified tumor_af_tag ('{tumor_af_tag}') in INFO column of input VCF - filtering of homozygous germline variants in tumor-only mode will be ignored")

    if exclude_het_germline is True and tumor_only == 1 and found_taf_tag == 0:
        logger.warning(f"Could not find the specified tumor_af_tag ('{tumor_af_tag}') in INFO column of input VCF - filtering of heterozygous germline variants in tumor-only mode will be ignored")


    if found_tdp_tag == 1 and found_taf_tag == 0:
        logger.warning('BOTH \' tumor_dp_tag\' AND \' tumor_af_tag\' need to be specified for use in tumor report (\'tumor_af_tag\' is missing)')

    if found_tdp_tag == 0 and found_taf_tag == 1:
        logger.warning('BOTH \'tumor_dp_tag\' AND \'tumor_af_tag\' need to be specified for use in tumor report (\'tumor_dp_tag\' is missing)')

    if found_ndp_tag == 1 and found_naf_tag == 0:
        logger.warning('BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_af_tag\' is missing)')

    if found_ndp_tag == 0 and found_naf_tag == 1:
        logger.warning('BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_dp_tag\' is missing)')

    ## if filtering turned on for AF-based tumor-only filtering, return error if TVAF not defined

    return 0