예제 #1
0
def process_test_vcf(orig_vcf, test_vcf, output, conf):
    normalizers = core_norms.get_normalizers()
    normalizers.update(load_components(conf, 'normalizers', 'get_normalizers'))

    comparators = core_comps.get_comparators()
    comparators.update(load_components(conf, 'comparators', 'get_comparators'))
    processor = bp.VariantProcessor({}, normalizers, comparators, JsonReporter(output), conf)
    processor.compare_test_vcf(orig_vcf, test_vcf)
예제 #2
0
def process_vcf(vcf, gt_default, conf, output, callers, fqs=None, snp_info=None, single_batch=False, keep_tmpdir=False, read_depth=250):
    """
    Perform analyses for each variant in the VCF file.
    :param input_vcf: Path to vcf file containing variants to process
    :param single_batch: Assume all variants in VCF are part of one batch and process them all simultaneously
    :param keep_tmpdir: Preserve tmpdirs created (otherwise delete them, unless they are flagged)
    :param conf: Configuration object
    """

    variant_callers = core_callers.get_callers()
    variant_callers.update(load_components(conf, 'callers', 'get_callers'))

    normalizers = core_norms.get_normalizers()
    normalizers.update(load_components(conf, 'normalizers', 'get_normalizers'))

    comparators = core_comps.get_comparators()
    comparators.update(load_components(conf, 'comparators', 'get_comparators'))

    if callers is not None and len(callers)>0:
        callers_to_use = {}
        for caller in callers:
            if caller not in variant_callers:
                raise KeyError('No variant caller ' + caller + ' found in callers')
            callers_to_use[caller] = variant_callers[caller]
        variant_callers = callers_to_use

    if fqs is not None:
        fqs = [os.path.abspath(fq) for fq in fqs]

    processor = bp.VariantProcessor(variant_callers, normalizers, comparators, JsonReporter(output), conf)
    logging.info("Processing variants in file " + vcf)
    if single_batch:
        logging.info("Processing all variants as one batch")
        tmp_dir = '{}-tmp'.format(util.strip_extensions(os.path.basename(vcf), ['vcf','gz']))
        processor.process_batch(vcf, tmp_dir, gt_default, ex_snp=snp_info, keep_tmpdir=keep_tmpdir, read_depth=read_depth, reads=fqs)
    else:
        batches = util.batch_variants(vcf, max_batch_size=1000, min_safe_dist=2000)
        for batchnum, batch_vcf in enumerate(batches, 1):
            logging.info('Processing batch #{} of {}'.format(batchnum, len(batches)))
            tmp_dir = '{}-batch{:03d}-tmp'.format(util.strip_extensions(os.path.basename(vcf), ['vcf','gz']), batchnum)
            processor.process_batch(batch_vcf, tmp_dir, gt_default, ex_snp=snp_info, keep_tmpdir=keep_tmpdir, read_depth=read_depth, reads=fqs)
            os.remove(batch_vcf)