Python find_matching_var примеры использования

Язык программирования: Python

Пространство имен/Пакет: vcomp.util

Метод/Функция: find_matching_var

Примеров на hotexamples.com: 4

Python find_matching_var - 4 примера найдено. Это лучшие примеры Python кода для vcomp.util.find_matching_var, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: batch_processor.py Проект: brendanofallon/varcomp

 def collect_var_quals(self, caller_vars, bed, orig_vcf):
     """
     For each call and input variant, find the quality of the matching called variant, if there is one
     Return a dict[variant key][caller] for each input variant
     Missing variants (ref calls) are assigned a quality of MISSING_QUAL
     """
     var_quals = defaultdict(dict)
     for region in util.read_regions(bed):
         key = var_key(util.find_matching_var(orig_vcf, region))
         for caller in caller_vars:
             with pysam.VariantFile(caller_vars[caller]) as cvars:
                 cvar = util.find_matching_var(cvars, region)
                 var_quals[key][caller] = find_qual(cvar)
     return var_quals

Пример #2

Показать файл

Файл: batch_processor.py Проект: brendanofallon/varcomp

 def collect_bam_stats(self, bam, bed, orig_vcf):
     """
     For each bed region compute some bam-level stats and return them in a dict
     The key of the dict is the var_key of the matching original (input) variant
     """
     bam_stats = defaultdict(dict)
     for region in util.read_regions(bed):
         key = var_key(util.find_matching_var(orig_vcf, region))
         bam_stats[key] = bam_simulation.gen_bam_stats(bam, region)
     return bam_stats

Пример #3

Показать файл

Файл: batch_processor.py Проект: brendanofallon/varcomp

    def compare_test_vcf(self, raw_orig_vcf, raw_test_vcf):
        raw_orig_vcf = os.path.abspath(raw_orig_vcf)
        raw_test_vcf = os.path.abspath(raw_test_vcf)
        orig_vars    = list(pysam.VariantFile(raw_orig_vcf))
        tmp_dirname  = util.strip_extensions(raw_test_vcf, ['gz','vcf']) + "-vcomp-" + util.randstr()

        with util.TempDir(dirname=tmp_dirname):
            orig_vcf = util.bgz_tabix(raw_orig_vcf, self.conf)
            test_vcf = util.remove_halfcalls(raw_test_vcf)
            test_vcf = util.bgz_tabix(test_vcf, self.conf)
            caller_name = util.strip_extensions(test_vcf, ['gz','vcf'])
            bed = util.vars_to_bed(orig_vars)
            var_results = defaultdict(dict)
            var_quals = self.collect_var_quals({caller_name: test_vcf}, bed, orig_vcf)
            bamstats = defaultdict(dict)

            for normalizer_name, normalizer in self.normalizers.iteritems():
                logging.info("--> Running normalizer " + normalizer_name)
                normed_orig_vcf   = normalizer(orig_vcf, self.conf)
                normed_caller_vcf = normalizer(test_vcf, self.conf)

                for comparator_name, comparator in self.comparators.iteritems():
                    logging.info("--> Running comparator " + comparator_name + " (normalizer " + normalizer_name + ")")
                    all_results = comparator(normed_orig_vcf, normed_caller_vcf, None, self.conf)
                    single_results = split_results(all_results, bed)
                    for region, result in zip(util.read_regions(bed), single_results):
                        match_vars = util.find_matching_var(orig_vcf, region)
                        if not match_vars:
                            raise ValueError('Unable to find original variant from region ' + str(region))
                        result = compare_single_var(result,
                                                    region,
                                                    normed_orig_vcf,
                                                    normed_caller_vcf,
                                                    comparator,
                                                    "/".join(str(i) for i in match_vars[0].samples[0]['GT']),
                                                    self.conf)
                        key = var_key(match_vars)
                        if caller_name not in var_results[key]:
                            var_results[key][caller_name] = defaultdict(dict)
                        var_results[key][caller_name][normalizer_name][comparator_name] = result
                        bamstats[key] = {}

        # Iterate over all results and write to standard output. We do this here instead of within the loops above
        # because it keeps results organized by variant, which makes them easier to look at
        self.reporter.write_output(var_results, var_quals, bamstats)

Пример #4

Показать файл

Файл: batch_processor.py Проект: brendanofallon/varcomp

    def process_batch(self, vcf, batchname, gt_policy, ex_snp=None, keep_tmpdir=False, read_depth=250, reads=None):
        """
        Process the given batch of variants by creating a fake 'genome' with the variants, simulating reads from it,
         aligning the reads to make a bam file, then using different callers, variant normalizers, and variant
         comparison methods to generate results. The results are just written to a big text file, which needs to
         be parsed by a separate utility to generate anything readable.
        :param vcf: .vcf file containing variants to simulate
        :param conf: Configuration containing paths to all required binaries / executables / genomes, etc.
        :param homs: Boolean indicating whether variants should be simulated as hets or homs
        :return:
        """
        raw_vars = list(pysam.VariantFile(vcf))

        tmpdir_del_policy = util.TempDir.DELETE_NO_EXCEPTION
        if keep_tmpdir:
            tmpdir_del_policy = util.TempDir.NEVER_DELETE

        tmp_dirname = batchname + "-" + util.randstr()
        with util.TempDir(dirname=tmp_dirname, del_policy=tmpdir_del_policy):
            ref_path = self.conf.get('main', 'ref_genome')
            var_results = defaultdict(dict)

            orig_vcf, variant_sets = self.create_input_vcf(raw_vars, ex_snp, gt_policy)
            bed = util.vars_to_bed(variant_sets)
            if reads is None:
                reads = bam_simulation.gen_alt_fq(ref_path, variant_sets, read_depth)
            bam = bam_simulation.gen_alt_bam(ref_path, self.conf, reads)

            caller_variants = self.call_variants(bam, bed)
            bam_stats = self.collect_bam_stats(bam, bed, orig_vcf)
            var_quals = self.collect_var_quals(caller_variants, bed, orig_vcf)

            for normalizer_name, normalizer in self.normalizers.iteritems():
                logging.info("--> Running normalizer " + normalizer_name)
                normed_orig_vcf = normalizer(orig_vcf, self.conf)

                for caller in caller_variants:
                    normed_caller_vcf = normalizer(caller_variants[caller], self.conf)

                    for comparator_name, comparator in self.comparators.iteritems():
                        logging.info("--> Running comparator " + comparator_name + " (normalizer " + normalizer_name + ")")
                        all_results = comparator(normed_orig_vcf, normed_caller_vcf, None, self.conf)
                        single_results = split_results(all_results, bed)
                        for region, result in zip(util.read_regions(bed), single_results):
                            match_vars = util.find_matching_var(orig_vcf, region)
                            if not match_vars:
                                raise ValueError('Unable to find original variant from region ' + str(region))
                            result = compare_single_var(result,
                                                        region,
                                                        normed_orig_vcf,
                                                        normed_caller_vcf,
                                                        comparator,
                                                        "/".join(str(i) for i in match_vars[0].samples[0]['GT']),
                                                        self.conf)
                            key = var_key(match_vars)
                            if caller not in var_results[key]:
                                var_results[key][caller] = defaultdict(dict)
                            var_results[key][caller][normalizer_name][comparator_name] = result
            #Iterate over all results and write to standard output. We do this here instead of within the loops above
            #because it keeps results organized by variant, which makes them easier to look at
            self.reporter.write_output(var_results, var_quals, bam_stats)