Python VCFReader.fetch примеры использования

Язык программирования: Python

Пространство имен/Пакет: medaka.vcf

Класс/Тип: VCFReader

Метод/Функция: fetch

Примеров на hotexamples.com: 3

Python VCFReader.fetch - 3 примера найдено. Это лучшие примеры Python кода для medaka.vcf.VCFReader.fetch, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

VCFReader(8)

fetch(3)

Основные методы

VCFReader (8)

fetch (3)

Пример #1

Показать файл

    def test_002_check_split(self):
        self.split_fps.extend(split_variants(self.vcf_merged))

        # 1-based positions on either position to exclude
        # these are where two variants have been merged and cannot be easily
        # separated without alignment, or where an indel has been rolled
        # forwards due to the merge and splitting apart process.
        expt_excluded = [
            {675, 677, 1582, 1734, 1775},  # hap 1
            {370, 1194},  # hap 2
        ]

        for expt_vcf, got_vcf, excluded in zip([self.vcf1, self.vcf2],
                                               self.split_fps, expt_excluded):
            expt_vcfr = VCFReader(expt_vcf)
            got_vcfr = VCFReader(got_vcf)

            for expt in expt_vcfr.fetch():
                if expt.pos + 1 in excluded:
                    continue
                got = list(
                    got_vcfr.fetch(expt.chrom, expt.pos,
                                   expt.pos + len(expt.ref) + 1))
                self.assertEqual(
                    len(got), 1,
                    'Could not find split variant for {}:{}.'.format(
                        expt.chrom, expt.pos + 1))
                got = got[0]
                for key in ('chrom', 'pos', 'ref', 'alt'):
                    expected = getattr(expt, key)
                    result = getattr(got, key)
                    self.assertEqual(
                        expected, result,
                        'Splitting failed for {}:{} {}.'.format(
                            expt.chrom, expt.pos + 1, key))

Пример #2

Показать файл

def merge_haploid_vcfs(vcf1, vcf2, vcf_out):
    "Merge SNPs from two haploid VCFs into an unphased diploid vcf."
    loci_by_chrom = defaultdict(set)

    vcf1 = VCFReader(vcf1)
    vcf2 = VCFReader(vcf2)

    for v in chain(vcf1.fetch(), vcf2.fetch()):
        loci_by_chrom[v.chrom].add(v.pos)

    with VCFWriter(vcf_out, 'w', version='4.1') as vcf_writer:
        for chrom, loci in loci_by_chrom.items():
            for pos in sorted(loci):
                v1 = list(vcf1.fetch(ref_name=chrom, start=pos, end=pos+1))
                v2 = list(vcf2.fetch(ref_name=chrom, start=pos, end=pos+1))

                # the QC is -10*log10(1-p(label)) where p(label) is the medaka consensus
                # probability. To combine these, we probably want to multiply the
                # (1-p(label)) values, i.e. add the QC scores. However, in the case of a
                # herterozygous SNPs where one of the haplotypes is the reference, we
                # won't have the QC value of the reference haplotype (no variant was
                # called).
                # Hence if we want a common scale we need to assume we can apprimate the missing
                # QC score for the reference haplotypes as being equal to the non-reference
                # haplotype so we can set the overall score to double the latter.
                def get_gq(v1, v2):
                    if len(v1) == 1 and len(v2) == 1:
                        gq = float(v1[0].sample_dict['GQ']) + float(v2[0].sample_dict['GQ'])
                    else:
                        v = v1[0] if len(v1) == 1 else v2[0]
                        gq = 2 * float(v.sample_dict['GQ'])
                    return gq

                def get_ref(v1, v2):
                    return v1[0].ref if len(v1) == 1 else v2[0].ref

                # Note we output unphased GTs as we might have multiple phased
                # regions and the phase can switch between regions

                # heterozygous on v1:
                if len(v1) == 1 and (len(v2) == 0 or v2[0].alt == ['.']):
                    alt = v1[0].alt
                    gt = '0/1'  # not 1/0 by convention since this is unphased
                # heterozygous on v2
                elif (len(v1) == 0 or v1[0].alt == ['.']) and len(v2) == 1:
                    alt = v2[0].alt
                    gt = '0/1'
                else:
                    assert len(v1) == 1 and len(v2) == 1
                    if v1[0].alt == v2[0].alt:  #homozygous snp
                        alt = v1[0].alt
                        gt = '1/1'
                    else:  #heterozygous snp
                        alt = v1[0].alt + v2[0].alt
                        gt = '1/2'

                gq = get_gq(v1, v2)
                v = Variant(chrom, pos, get_ref(v1, v2), alt=alt, qual=gq, sample_dict={'GT':gt, 'GQ':gq})
                vcf_writer.write_variant(v)

Пример #3

Показать файл

    def test_vcf_annotate(self):
        variants_annotated = [
                Variant('MN908947.3', 29748, 'ACGATCGAGTG', alt=['A'],
                    ident='.', qual=243.965, filt='PASS',
                    info='AR=0,0;DP=200;DPS=100,100;DPSP=199;SC=19484,20327,22036,23215;SR=1,2,98,98',
                    genotype_data=OrderedDict([('GT','1'), ('GQ', '244')])),
                Variant('MN908947.3', 29764, 'TGAACAATGCT',
                    alt=['A'], ident='.', qual=243.965, filt='PASS',
                    info='AR=0,0;DP=200;DPS=100,100;DPSP=199;SC=19970,21140,15773,16751;SR=99,100,0,0',
                    genotype_data=OrderedDict([('GT','1'), ('GQ', '244')])),
                Variant('MN908947.3', 29788, 'TATATGGAAGA',
                     alt=['A'], ident='.', qual=243.965, filt='PASS',
                    info='AR=0,0;DP=199;DPS=99,100;DPSP=197;SC=26174,28129,19085,20315;SR=96,100,1,0',
                    genotype_data=OrderedDict([('GT', '1'), ('GQ','244')]))]
        variants_annotated = variants_annotated + deepcopy(variants_annotated)
        for i in range(3, 6):
            variants_annotated[i].chrom = "Duplicate"

        with tempfile.NamedTemporaryFile() as vcfout:
            # Annotate vcf
            args = Namespace(RG=self.rg, vcf=self.vcf,ref_fasta=self.ref_fasta,
                            bam=self.bam, vcfout=vcfout.name,
                             chunk_size=100000, pad=25, dpsp=True)
            annotate_vcf_n_reads(args)

            # Read in output variants and compare with expected annotated variants
            vcf_reader = VCFReader(vcfout.name)
            for i, v in enumerate(vcf_reader.fetch()):
                self.assertEqual(v, variants_annotated[i],
                                 'Annotation failed for variant {}: {} {}.'.format(i, v.chrom, v.pos))