def compare_test_vcf(self, raw_orig_vcf, raw_test_vcf): raw_orig_vcf = os.path.abspath(raw_orig_vcf) raw_test_vcf = os.path.abspath(raw_test_vcf) orig_vars = list(pysam.VariantFile(raw_orig_vcf)) tmp_dirname = util.strip_extensions(raw_test_vcf, ['gz','vcf']) + "-vcomp-" + util.randstr() with util.TempDir(dirname=tmp_dirname): orig_vcf = util.bgz_tabix(raw_orig_vcf, self.conf) test_vcf = util.remove_halfcalls(raw_test_vcf) test_vcf = util.bgz_tabix(test_vcf, self.conf) caller_name = util.strip_extensions(test_vcf, ['gz','vcf']) bed = util.vars_to_bed(orig_vars) var_results = defaultdict(dict) var_quals = self.collect_var_quals({caller_name: test_vcf}, bed, orig_vcf) bamstats = defaultdict(dict) for normalizer_name, normalizer in self.normalizers.iteritems(): logging.info("--> Running normalizer " + normalizer_name) normed_orig_vcf = normalizer(orig_vcf, self.conf) normed_caller_vcf = normalizer(test_vcf, self.conf) for comparator_name, comparator in self.comparators.iteritems(): logging.info("--> Running comparator " + comparator_name + " (normalizer " + normalizer_name + ")") all_results = comparator(normed_orig_vcf, normed_caller_vcf, None, self.conf) single_results = split_results(all_results, bed) for region, result in zip(util.read_regions(bed), single_results): match_vars = util.find_matching_var(orig_vcf, region) if not match_vars: raise ValueError('Unable to find original variant from region ' + str(region)) result = compare_single_var(result, region, normed_orig_vcf, normed_caller_vcf, comparator, "/".join(str(i) for i in match_vars[0].samples[0]['GT']), self.conf) key = var_key(match_vars) if caller_name not in var_results[key]: var_results[key][caller_name] = defaultdict(dict) var_results[key][caller_name][normalizer_name][comparator_name] = result bamstats[key] = {} # Iterate over all results and write to standard output. We do this here instead of within the loops above # because it keeps results organized by variant, which makes them easier to look at self.reporter.write_output(var_results, var_quals, bamstats)
def test_rm_halfcalls(self): input_vcf = os.path.join(TestUtils.DATA_PATH, TestUtils.HALFCALL_VCF) try: no_halfcalls = util.remove_halfcalls(input_vcf) calls = {} with open(no_halfcalls) as fh: for line in fh: if line[0] == "#": continue toks = line.split() calls[toks[1]] = toks[9] self.assertTrue(len(calls) == 3) for pos, gt in calls.iteritems(): self.assertTrue("." not in gt.split(":")[0]) except Exception as ex: print str(ex) finally: os.remove(no_halfcalls)