Python Variant.Variant 예제들, variant.Variant.Variant Python 예제들

예제 #1

0

파일 보기

def test_pc_iter_2():
    seq = 'AAAAAAAAAA'
    #      0123456789
    #          CG

    variants = [
        Variant('t', 't', 4, 'A', 'C', 0.25),
        Variant('t', 't', 5, 'A', 'G', 0.25)
    ]
    it = PseudocontigIterator(seq, variants, 4)
    pc = it.next()
    pcs = []
    while pc:
        pcs.append(pc)
        pc = it.next()
    assert 2 == len(pcs)
    assert 'AAACAAA' in pcs
    assert 'AACGAA' in pcs

    it = PseudocontigIterator(seq, variants, 5)
    pc = it.next()
    pcs = []
    while pc:
        pcs.append(pc)
        pc = it.next()
    assert 2 == len(pcs)
    assert 'AAAACAAAA' in pcs
    assert 'AAACGAAA' in pcs

예제 #2

0

파일 보기

    def group_inversions(cls, adjs):
        """Group 2 inversion adjacencies into a single event"""
        inversions = sorted(adjs,
                            key=lambda adj: (adj.chroms[0], adj.breaks[0]))

        max_homology = 25
        variants = []
        i = 0
        while i < len(inversions) - 1:
            if inversions[i].chroms[0] == inversions[i + 1].chroms[0] and\
               inversions[i + 1].breaks[0] - inversions[i].breaks[0] <= max_homology and\
               ((inversions[i].orients == ('L', 'L') and inversions[i + 1].orients == ('R', 'R')) or
                (inversions[i].orients == ('R', 'R') and inversions[i + 1].orients == ('L', 'L'))):

                (adj1,
                 adj2) = (inversions[i],
                          inversions[i + 1]) if inversions[i].orients == (
                              'L', 'L') else (inversions[i + 1], inversions[i])

                variants.append(Variant('INV', [adj1, adj2]))
                i += 2

            else:
                if not inversions[i].dubious:
                    variants.append(Variant('INV', [inversions[i]]))
                i += 1

        if i == len(inversions) - 1 and not inversions[i].dubious:
            variants.append(Variant('INV', [inversions[i]]))

        return variants

예제 #3

0

파일 보기

def main():
    global options, args
    separator = '|'

    # Open and parse each line of the vcf file
    input_vcf = vcf.Reader(open(options.input_vcf, 'r'))
    if options.non_model:
        variant = Variant(samples=input_vcf.samples,
                          organism_type='non_model',
                          ploidy=options.ploidy)
    else:
        variant = Variant(samples=input_vcf.samples, ploidy=options.ploidy)

    # Open output file
    with open(options.output_vcf, 'w') as output_psv:
        # Generate output file header
        #variant = ConsequenceType(input_vcf.samples)
        output_psv.write(variant.create_psv_header(separator=separator))

        # Now parse lines in .vcf and output with new format:
        for record in input_vcf:
            # Only output sites that hasn't been filtered out
            if len(record.FILTER) == 0:
                #for consequence in range(0, len(record.INFO['CSQ'])):
                variant.get_from_record(record=record)
                output_psv.write(variant.put_to_psv(separator=separator))

예제 #4

0

파일 보기

파일: test_variant.py 프로젝트: icwells/blastnWrapper

def getVariants():
    # Returns initialized variants for testing
    ret = {}
    pid = "DCIS_1"
    rows = ["", ""]
    ret["1"] = [Variant(pid, "1", "100.0", "200.0", rows)]
    ret["1"].append(Variant(pid, "1", "1025", "1119", rows))
    ret["2"] = [Variant(pid, "2", "25006", "25124", rows)]
    ret["X"] = [Variant(pid, "X", "90045", "90157.5", rows)]
    return ret

예제 #5

0

파일 보기

    def parse_MAF(self):
        ''' maf filetype parser function. Input: InputParser object. Output: Variant object '''

        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        position = int(
            str(row[fieldId['Start_position']]).split('.')[0]
        )  # case sensitive. what if, 'Start_Position' instead? case-insensitive hash lookup, or make everything lowercase befor making comparisons?
        dp = int(str(row[fieldId['TTotCov']]).split('.')[0])
        vf = float(float(row[fieldId['TVarCov']]) / float(dp))
        chrom = str(row[fieldId['Chromosome']])
        ref = str(row[fieldId['Reference_Allele']])
        alt = str(row[fieldId['Tumor_Seq_Allele2']])
        effect = self.eff
        fc = self.fc
        if ref == "-":
            ref = ""
        if alt == "-":
            alt = ""
        var = Variant(source=fn.split('/')[-1],
                      pos=HTSeq.GenomicPosition(chrom, int(position)),
                      ref=ref,
                      alt=alt,
                      frac=vf,
                      dp=dp,
                      eff=effect.strip(';'),
                      fc=fc.strip(';'))
        return var

예제 #6

0

파일 보기

    def group_trls(cls, adjs):
        """Group 2 translocation adjacencies into single reciprocal event"""
        trls = sorted([adj for adj in adjs if not adj.dubious],
                      key=lambda adj: (adj.chroms[0], adj.breaks[0]))

        grouped_trl_ids = Set()
        neighborhood = 10000
        variants = []
        i = 0
        if len(trls) > 1:
            while i < len(trls) - 1:
                if trls[i].chroms[0] == trls[i + 1].chroms[0] and\
                   trls[i].chroms[1] == trls[i + 1].chroms[1] and\
                   abs(trls[i + 1].breaks[0] - trls[i].breaks[0]) <= neighborhood and\
                   abs(trls[i + 1].breaks[1] - trls[i].breaks[1]) <= neighborhood and\
                   ((trls[i].orients == ('L', 'R') and trls[i + 1].orients == ('R', 'L')) or\
                    (trls[i].orients == ('R', 'L') and trls[i + 1].orients == ('L', 'R')) or\
                    (trls[i].orients == ('L', 'L') and trls[i + 1].orients == ('R', 'R')) or\
                    (trls[i].orients == ('R', 'R') and trls[i + 1].orients == ('L', 'L'))
                    ):
                    variants.append(Variant('TRL', [trls[i], trls[i + 1]]))
                    grouped_trl_ids.add(trls[i].id)
                    grouped_trl_ids.add(trls[i + 1].id)
                    i += 2
                else:
                    i += 1

        grouped_trl_ids = Set()
        trls_remained = [trl for trl in trls if trl.id not in grouped_trl_ids]

        return variants, trls_remained

예제 #7

0

파일 보기

파일: mesh.py 프로젝트: alxgmpr/Mesh

    def get_product_skus(self, product):
        # scrape product variants and stock status from its info
        # returns a list of variant objects
        logt(self.tid, 'fetching product variants')
        variants = []
        try:
            params = {
                "expand": "variations,informationBlocks,customisations",
                "channel": "iphone-app"
            }
            url = "https://commerce.mesh.mx/stores/{}/products/{}".format(self.sitename, product)
            r = requests.request(
                'GET',
                url,
                headers=self.headers,
                params=params
            ).json()

            for size in r['options']:
                logt(self.tid,"[size] {}  \t sku {} \t {}".format(
                    size,
                    r['options'][size]['SKU'],
                    r['options'][size]['stockStatus']
                ))
                v = Variant(
                    size,
                    r['options'][size]['SKU'],
                    r['options'][size]['stockStatus']
                )
                variants.append(v)
            return variants
        except KeyError:
            logt(self.tid,"[error] exception while getting product info json")
            exit(-1)

예제 #8

0

파일 보기

    def parse_MuTectOUT(self):
        ''' MuTect '.out' parser function. Input: InputParser object. Output: Variant object '''

        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        chrom = row[0]
        ref = row[3]
        alt = row[4]
        effect = self.eff
        fc = self.fc
        vf = float(row[fieldId['tumor_f']])
        dp = int(
            int(str(row[fieldId['t_ref_count']]).strip()) +
            int(str(row[fieldId['t_alt_count']]).strip()))
        position = int(row[fieldId['position']])

        var = Variant(source=fn.split('/')[-1],
                      pos=HTSeq.GenomicPosition(chrom, int(position)),
                      ref=ref,
                      alt=alt,
                      frac=vf,
                      dp=dp,
                      eff=effect.strip(';'),
                      fc=fc.strip(';'))
        return var

예제 #9

0

파일 보기

파일: normalisedvcf.py 프로젝트: jiaan-yu/scripts

    def process_vcf(self, cols):
        """Build object from vcf
        """
        vcf = open(self.name, 'r')
        info_dict, format_dict = {}, {}

        # Read the meta-information lines from the vcf
        for i, line in enumerate(vcf):
            # Handle exceptions: the AF will be calcualted regardless;
            if line.startswith('##FORMAT=<ID=AF'):
                pass
            # Select the INFO/FORMAT lines
            elif line.startswith('##FORMAT'):
                vcf_header = VcfHeader(line)
                format_dict.update({vcf_header.meta_id: vcf_header})
            elif line.startswith('##INFO'):
                vcf_header = VcfHeader(line)
                info_dict.update({vcf_header.meta_id: vcf_header})
            # Keep other meta-info lines
            elif line.startswith('##'):
                if line.startswith('##source='):
                    self.caller = line.replace('##source=', '').strip()
                self.meta_info.append(line)
            else:
                break
            # Only extract the (filtered) DP in the format
            if "DP" in info_dict.keys() and format_dict.keys():
                info_dict.pop("DP", None)

        if not self.caller:
            sys.exit("Cannot identify caller from file {}\nPlease add caller \
                     identify line '##source=(caller name)' to vcf header"
                     .format(self.name))

        # When user specify the AF and vcf does not have, try to calculate that 
        # for the user
        if ('AF' in cols) and ('AF' not in info_dict.keys()):
            vcf_header = VcfHeader(AF_LINE)
            info_dict.update({vcf_header.meta_id: vcf_header})

        # Select the columns from INFO/FORMAT
        info_cols, format_cols = extract_cols(info_dict, format_dict, cols)

        # Add the INFO line (with caller) / FORMAT (unchanged) to header_list
        self.meta_info += [VcfHeader.write(VcfHeader.add_caller(v,
                           self.caller)) for k, v in info_cols.items()]
        self.meta_info += [VcfHeader.write(v) for k, v in format_cols.items()]

        self.header = line

        # Continue to read the file, this time the variants
        for j, line in enumerate(vcf):
            variant = Variant().process_variant(line, caller=self.caller)
            if variant.alt == '*':
                print("Warning: Vcf {} line {} has variant with alt=*".format(self.caller, str(i+j+1)))
            cleaned_variant = Variant.select_info(variant, info_cols, format_cols)
            # The dictionary is query by chr\tpos\tref\talt
            self.variants.update({cleaned_variant.variant_key: cleaned_variant})

        return self

예제 #10

0

파일 보기

    def parse_SomaticIndelDetector(self):
        ''' GATK SomaticIndelDetector vcf parser function. Input: InputParser object. Output: Variant object '''

        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        chrom = row[0]
        ref = row[3]
        alt = row[4]
        effect = self.eff
        fc = self.fc
        j = 0
        # Below attempts to grab sample ID.
        # assumes that sample ID is the final column in the self.header. always true?
        # if not always true, adopt the parse_mutect solution here as well
        tmpsampID = header[-1]

        for i in row[fieldId['FORMAT']].split(':'):
            if i == "AD":
                ALT_count = row[fieldId[tmpsampID]].split(':')[j].split(',')[1]
            elif i == "DP":
                dp = row[fieldId[tmpsampID]].split(':')[j]
                vf = float(float(ALT_count) / float(dp))
            j += 1
        position = int(row[fieldId['POS']])
        var = Variant(source=fn.split('/')[-1],
                      pos=HTSeq.GenomicPosition(chrom, int(position)),
                      ref=ref,
                      alt=alt,
                      frac=vf,
                      dp=dp,
                      eff=effect.strip(';'),
                      fc=fc.strip(';'))
        return var

예제 #11

0

파일 보기

파일: variantSet.py 프로젝트: yilmazOz/legos

    def parseVCF(self, file):
        #load the file to be parsed
        fileReader = open(file, "r")

        #loop over the file
        for line in fileReader:
            #see if line starts with # and skip
            if line.startswith("#"):
                continue

            #tokenize the line
            lineTokens = line.split("\t")

            #set up the variables just so it clear what we are using
            chromosome = lineTokens[0]
            position = int(lineTokens[1])
            id = lineTokens[2]
            referenceAllele = lineTokens[3]
            alternateAllele = lineTokens[4]
            qualityScore = float(lineTokens[5])
            filterFlag = lineTokens[6]
            infoGroup = lineTokens[7]
            formatGroup = lineTokens[8]
            noneGroup = lineTokens[9]

            #create the variant and add it
            variant = Variant(chromosome, position, id, referenceAllele,
                              alternateAllele, qualityScore, filterFlag,
                              infoGroup, formatGroup, noneGroup)
            self.__variants.append(variant)

예제 #12

0

파일 보기

    def parse_SamTools(self):
        ''' samtools vcf parser function. Input: InputParser object. Output: Variant object '''

        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        chrom = row[0]
        ref = row[3]
        alt = row[4]
        effect = self.eff
        fc = self.fc
        position = int(row[fieldId['POS']])
        for i in row[fieldId['INFO']].split(';'):
            if i.startswith("DP4="):
                j = i.split('=')[1].split(',')
                ro = int(int(j[0]) + int(j[1]))
                ao = int(int(j[2]) + int(j[3]))
                dp = int(int(ro) + int(ao))
                vf = float(float(ao) / float(dp))
                var = Variant(source=fn.split('/')[-1],
                              pos=HTSeq.GenomicPosition(chrom, int(position)),
                              ref=ref,
                              alt=alt,
                              frac=vf,
                              dp=dp,
                              eff=effect.strip(';'),
                              fc=fc.strip(';'))
                return var

예제 #13

0

파일 보기

    def parse_VarScan(self):
        ''' varscan vcf parser function. Input: InputParser object. Output: Variant object '''

        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        chrom = row[0]
        ref = row[3]
        alt = row[4]
        effect = self.eff
        fc = self.fc
        j = 0
        position = int(row[fieldId['POS']])
        for i in row[fieldId['FORMAT']].split(':'):
            if str(i) == "DP":
                dp = int(row[fieldId[header[-1]]].split(':')[j])
            if str(i) == "FREQ":
                vf = float(
                    float(
                        str(row[fieldId[header[-1]]].split(':')[j]).strip('%'))
                    / float(100))
            j += 1
        var = Variant(source=fn.split('/')[-1],
                      pos=HTSeq.GenomicPosition(chrom, int(position)),
                      ref=ref,
                      alt=alt,
                      frac=vf,
                      dp=dp,
                      eff=effect.strip(';'),
                      fc=fc.strip(';'))
        return var

예제 #14

0

파일 보기

    def parse_HapCaller(self):
        ''' GATK haplotype caller vcf parser function. Input: InputParser object. Output: Variant object '''

        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        chrom = row[0]
        ref = row[3]
        alt = row[4]
        effect = self.eff
        fc = self.fc
        j = 0
        position = int(row[fieldId['POS']])
        '''
        for i in row[fieldId['INFO']].split(';'):
            if i.startswith("DP="):
                dp = i.split('=')[1]
            if i.startswith("AF="):
                vf1 = float(i.split('=')[1])
        '''
        for i in row[fieldId['FORMAT']].split(':'):
            if str(i) == "DP":
                dp = int(row[fieldId[header[-1]]].split(':')[j])
            if str(i) == "AD":
                ad = str(row[fieldId[header[-1]]].split(':')[j])
                if str(',') in ad:
                    ref_count = int(ad.split(',')[0])
                    alt_count = int(ad.split(',')[1])
                    try:
                        vf = float(
                            float(alt_count) /
                            (float(ref_count) + float(alt_count)))
                    except:
                        vf = 0.0
                else:
                    abortWithMessage(
                        "Sample {0} may not have Haplotype Caller mutations with no ALT or vf"
                        .format(header[-1]))
            j += 1
        try:
            vf
        except:
            print(row, file=sys.stderr)
            vf = 0.0
        try:
            dp
        except:
            print(row, file=sys.stderr)
            dp = 0.0
        var = Variant(source=fn.split('/')[-1],
                      pos=HTSeq.GenomicPosition(chrom, int(position)),
                      ref=ref,
                      alt=alt,
                      frac=vf,
                      dp=dp,
                      eff=effect.strip(';'),
                      fc=fc.strip(';'))
        return var

예제 #15

0

파일 보기

    def parse_MiSeq(self):
        ''' MiSeq vcf parser function. Input: InputParser object. Output: Variant object '''

        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        chrom = row[0]
        ref = row[3]
        alt = row[4]
        fc = self.fc
        effect = self.eff
        for i in row[fieldId['INFO']].split(';'):
            if i.startswith("DP="):
                dp = i.split('=')[1]

            # if the MiSeq software reported functional consequence and effect and the file is not snpEff anotated, the MiSeq annotations will be used instead
            if i.startswith("FC=") and not fc:
                for j in i.split('=')[1].split(','):
                    if str(j.split('_')[0]) not in str(fc):
                        fc += str(j.split('_')[0]) + ";"
                    try:
                        if str(j.split('_')[1]) not in str(effect):
                            effect += str(j.split('_')[1]) + ";"
                    except:
                        pass
            elif str(i) == "EXON":
                fc += 'EXON'
        if not fc:
            fc = str("?")
        if not effect:
            effect = str("?")
        k = 0
        for i in row[fieldId['FORMAT']].split(':'):
            if str(i) == "VF":
                vf = float(row[fieldId[header[-1]]].split(':')[k])
            '''
            #for when vf is not in the format column, but AD is
            if str(i) == "AD" and not dp or not vf:
                dp = 0
                rd = int(row[fieldId[header[-1]]].split(':')[k].split(',')[0])
                ad = int(row[fieldId[header[-1]]].split(':')[k].split(',')[1])
                dp = int(rd) + int(ad)
            '''
            k += 1

        position = int(row[fieldId['POS']])
        var = Variant(source=fn.split('/')[-1],
                      pos=HTSeq.GenomicPosition(chrom, int(position)),
                      ref=ref,
                      alt=alt,
                      frac=vf,
                      dp=dp,
                      eff=effect.strip(';'),
                      fc=fc.strip(';'))
        return var

예제 #16

0

파일 보기

def test_pc_iter_3():
    seq = 'AAAAAAAAAAA'
    #      01234567890
    #          CGT

    variants = [
        Variant('t', 't', 4, 'A', 'C', 0.25),
        Variant('t', 't', 5, 'A', 'G', 0.25),
        Variant('t', 't', 6, 'A', 'T', 0.25)
    ]
    it = PseudocontigIterator(seq, variants, 4)
    pc = it.next()
    pcs = []
    while pc:
        pcs.append(pc)
        pc = it.next()
    assert 'AAACAAA' in pcs
    assert 'AACGAA' in pcs
    assert 'ACATA' in pcs
    assert 'ACGTA' in pcs

예제 #17

0

파일 보기

파일: variantSummary.py 프로젝트: icwells/blastnWrapper

 def __setVariant__(self, row):
     # Reads row of variant file into dict by id and chromosome
     pid = row[self.vhead["Patient"]]
     c = self.__setChromosome__(row[self.vhead["Chr"]])
     start = row[self.vhead["Start"]]
     end = row[self.vhead["End"]]
     name = row[self.vhead["Name"]]
     if pid not in self.variants.keys():
         self.variants[pid] = {}
     if c not in self.variants[pid].keys():
         self.variants[pid][c] = []
     self.variants[pid][c].append(Variant(pid, c, start, end, row, name))

예제 #18

0

파일 보기

파일: varplotlib.py 프로젝트: ykoski/varplotlib

def variant_from_index_list(idx_list, line):
    # Inputs: list of indexes of the line
    #   One line of the input file as a list
    chrom = line[idx_list[0]]
    start = line[idx_list[1]]
    end = line[idx_list[2]]
    ref = line[idx_list[3]]
    alt = line[idx_list[4]]
    gene = line[idx_list[5]]
    var_type = line[idx_list[6]].replace(" ", "_")
    var_type = var_type.strip()
    return Variant(chrom, start, end, ref, alt, gene, var_type, None)

예제 #19

0

파일 보기

def test_pc_iter_4():
    seq = 'AAANAAAAA'
    #      012345678
    #          T

    variants = [Variant('t', 't', 4, 'A', 'T', 0.25)]
    it = PseudocontigIterator(seq, variants, 4)
    pc = it.next()
    pcs = []
    while pc:
        pcs.append(pc)
        pc = it.next()
    assert 0 == len(pcs)

예제 #20

0

파일 보기

def test_pc_iter_deletion_2():
    seq = 'AAAAAAAAA'
    #      012345678
    #         xxx

    variants = [Variant('t', 't', 3, 'AAA', [''], 0.25)]
    it = PseudocontigIterator(seq, variants, 4)
    pc = it.next()
    pcs = []
    while pc:
        pcs.append(pc)
        pc = it.next()
    assert 1 == len(pcs)
    assert 'AAAAAA' == pcs[0]

예제 #21

0

파일 보기

파일: varplotlib.py 프로젝트: ykoski/varplotlib

def parse_variants(line):
    # Parses variant information from annovar-annotated vcf-file.
    variants = []
    chromosome = line[0]
    start = line[1]
    end = line[2]
    ref = line[3]
    alt = line[4]
    info = line[7]
    genes, type, af = parse_info(info.split(';'))
    for g in genes:
        new_var = Variant(chromosome, start, end, ref, alt, g, type, af, None)
        variants.append(new_var)
    return variants

예제 #22

0

파일 보기

def test_pc_iter_insertion_2():
    seq = 'AAAAAAAAA'
    #      012345678
    #          ^
    #          TT

    variants = [Variant('t', 't', 4, '', ['TT'], 0.25)]
    it = PseudocontigIterator(seq, variants, 4)
    pc = it.next()
    pcs = []
    while pc:
        pcs.append(pc)
        pc = it.next()
    assert 1 == len(pcs)
    assert 'AATTAA' == pcs[0]

예제 #23

0

파일 보기

파일: vaggregatefeature.py 프로젝트: xtmgah/mucor

    def uniqueVariants(self):
        '''Return the set of unique variants from the set of all variants (for this feature)'''
        # exploit the hashtable and uniqueness of sets to quickly find
        # unique tuples (contig, pos, ref, alt) of variant info
        # sorted by chrom, pos
        uniqueVariantsTemp = set()
        for var in self.variants:
            candidate = (var.pos.chrom, var.pos.pos, var.ref, var.alt)
            uniqueVariantsTemp.add(candidate)
        # sort by chr, then position
        # TO DO: python sorted() will sort as: chr1, chr10, chr2, chr20, chrX. Fix.
        uniqueVariantsTemp = sorted(uniqueVariantsTemp,
                                    key=lambda varx: (varx[0] + str(varx[1])))

        # Now construct a returnable set of Variant objects,
        # specifying multiple "sources" in the source field
        # this loop's inner-product is #unique variants * #total variants, times #features
        # and is a major inefficiency
        uniqueVariants = set()
        for uniqueVarTup in uniqueVariantsTemp:
            source = ""
            frac = ""
            dp = ""
            eff = ""
            fc = ""
            #annot = ""
            for varClass in self.variants:
                if (varClass.pos.chrom, varClass.pos.pos, varClass.ref,
                        varClass.alt) == uniqueVarTup:
                    source += varClass.source + ", "
                    frac += str(varClass.frac) + ", "
                    dp += str(varClass.dp) + ", "
                    eff += str(varClass.eff) + ", "
                    fc += str(varClass.fc) + ", "
                    #annot += str(varClass.annot) + ", "
            pos = HTSeq.GenomicPosition(uniqueVarTup[0], uniqueVarTup[1])
            uniqueVar = Variant(
                source.strip(", "),
                pos,
                ref=uniqueVarTup[2],
                alt=uniqueVarTup[3],
                frac=str(frac).strip(", "),
                dp=str(dp).strip(", "),
                eff=str(eff).strip(", "),
                fc=str(fc).strip(", "))  ######## Karl Modified ##############
            uniqueVariants.add(uniqueVar)

        return uniqueVariants

예제 #24

0

파일 보기

    def parse_IonTorrent(self):
        ''' Ion Torrent vcf parser function. Input: InputParser object. Output: Variant object '''

        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        chrom = row[0]
        ref = row[3]
        alt = row[4]
        effect = self.eff
        fc = self.fc
        for i in row[fieldId['INFO']].split(';'):
            if i.startswith("AO="):
                tempval = i.split('=')[1]
            if i.startswith("RO="):
                ro = i.split('=')[1]
            if i.startswith("DP="):
                dp = i.split("=")[1]
        if str(',') in str(tempval):
            tempval2 = [
                int(numeric_string) for numeric_string in tempval.split(',')
            ]
            try:
                ao = sum(tempval2)
            except:
                abortWithMessage(
                    "AO should be an int, or a list of ints: AO = {0}/".format(
                        tempval2))
        else:
            ao = tempval
        vf = float(float(ao) / float(float(ro) + float(ao)))
        position = int(row[fieldId['POS']])
        for i in str(row[fieldId['ALT']]).split(','):
            if len(str(row[fieldId['REF']])) > len(i):
                # this is a deletion in Ion Torrent data
                position = int(row[fieldId['POS']])
                break
        var = Variant(source=fn.split('/')[-1],
                      pos=HTSeq.GenomicPosition(chrom, int(position)),
                      ref=ref,
                      alt=alt,
                      frac=vf,
                      dp=dp,
                      eff=effect.strip(';'),
                      fc=fc.strip(';'))
        return var

예제 #25

0

파일 보기

    def parse_GenericGATK(self):
        ''' 
        Generic GATK parser function. This was written for the Illumina BaseSpace BWA Enrichment Workflow vcf files, but may apply to more filetypes
        Input: InputParser object. Output: Variant object 
        '''
        row = self.row
        fieldId = self.fieldId
        header = self.header
        fn = self.fn
        chrom = row[0]
        ref = row[3]
        alt = row[4]
        effect = self.eff
        fc = self.fc
        j = 0
        position = int(row[fieldId['POS']])
        for i in row[fieldId['FORMAT']].split(':'):
            if str(i) == "AD":
                ro = int(row[fieldId[header[-1]]].split(':')[j].split(',')[0])
                #ao = int(row[fieldId[header[-1]]].split(':')[j].split(',')[-1]) # fails when the mutation has two alternate alleles in the same VCF line
                ao = sum([
                    int(x) for x in row[fieldId[header[-1]]].split(':')
                    [j].split(',')[1:]
                ])
                dp = ro + ao
                try:
                    vf = float(
                        float(ao) / float(dp)
                    )  # one VF for all possible alternate alleles. Nothing unusual, unless the mutation has multiple alt alleles in 1 vcf line
                except:
                    print("\nwarning: no vaf?\n" + str(row) + "\n")
                    vf = 0
                break
            j += 1

        var = Variant(source=fn.split('/')[-1],
                      pos=HTSeq.GenomicPosition(chrom, int(position)),
                      ref=ref,
                      alt=alt,
                      frac=vf,
                      dp=dp,
                      eff=effect.strip(';'),
                      fc=fc.strip(';'))
        return var

예제 #26

0

파일 보기

def main():
    global options, args
    # **********************
    # store in DBNLVar
    # **********************

    # Define connection parameters andd perform connection:
    connection = httplib2.Http(".cache")
    connection.add_credentials('*****@*****.**',
                               'prueba')

    # Open annotation file and parse each line in it
    annotation_vcf = vcf.Reader(open(options.input_vcf, 'r'))

    # Load metadata in variant object
    variant = Variant(samples=annotation_vcf.samples)

    for record in annotation_vcf:
        # Load variant information in DBNLVar, from consequences
        variant.get_from_record(record=record)
        for consequence in variant.consequences:
            resp = load_consequence(consequence=consequence)

        quit()
        # Store consequence non-relating data in DBNLVar
        if not check_record(table='chromosome',
                            value=chrom_to_number(record.CHROM)):
            #payload = {'id': chrom_to_number(record.CHROM), 'name': number_to_chrom(chrom_to_number(record.CHROM))}
            load_record(payload=record)

        # Store consequence relating data in DBNLVar
        for consequence in record.INFO['CSQ']:
            for index, annotation in enumerate(consequence.split(separator)):
                payload = {}

        resp = requests.get(uri + 'chromosome/id/3/24.json', auth=auth)
        print resp.json()
        if resp.status_code == 200:
            content = resp.json()['content']
        else:
            print "ERROR: Problem in query"
            raise
        print content

예제 #27

0

파일 보기

파일: variantSummary.py 프로젝트: icwells/blastnWrapper

 def __setBlastResults__(self, name, infile):
     # Reads in infile as a dictionary stored by chromosome (each file is one sample)
     first = True
     with open(infile, "r") as f:
         for line in f:
             if first == True:
                 delim = getDelim(line)
                 first = False
             row = line.strip().split(delim)
             c = row[self.bhead["subjectid"]]
             pas = self.__evaluateRows__(row)
             if pas == True and c in self.variants[name].keys():
                 # Only proceed if there is sufficient match quality and chromosome is present in variants
                 qid = row[self.bhead["queryid"]]
                 start = row[self.bhead["sstart"]]
                 end = row[self.bhead["send"]]
                 if c not in self.results.keys():
                     self.results[c] = []
                 self.results[c].append(Variant(qid, c, start, end, row))

예제 #28

0

파일 보기

def main():
    global options, args
    separator = '|'

    # Parse the HGVS name into genomic coordinates and alleles.
    #chrom, offset, ref, alt = hgvs.parse_hgvs_name('ENST00000515609.1:c.30G>T', genome, get_transcript=get_transcript)
    #print chrom, offset, ref, alt

    # Format an HGVS name.
    chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'G')
    transcript = get_transcript('ENST00000359218.5')
    hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome,
                                      transcript)
    print hgvs_name
    chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'GA')
    transcript = get_transcript('ENST00000359218.5')
    hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome,
                                      transcript)
    hgvs_var = hgvs.HGVSName(hgvs_name)
    hgvs_str = 'ENST00000359218.5:c.10597+1079_10597+1080delTTinsT'
    hgvs_var2 = hgvs.HGVSName(hgvs_str)

    print hgvs_name
    quit()

    # Open and parse each line of the vcf file
    input_vcf = vcf.Reader(open(options.input_vcf, 'r'))
    variant = Variant(samples=input_vcf.samples)

    # Open output file
    with open(options.output_vcf, 'w') as output_psv:
        # Generate output file header
        #variant = ConsequenceType(input_vcf.samples)
        output_psv.write(variant.create_psv_header(separator=separator))

        # Now parse lines in .vcf and output with new format:
        for record in input_vcf:
            # Only output sites that hasn't been filtered out
            if len(record.FILTER) == 0:
                #for consequence in range(0, len(record.INFO['CSQ'])):
                variant.get_from_record(record=record)
                output_psv.write(variant.put_to_psv(separator=separator))

예제 #29

0

파일 보기

def variantParser(variantFile):
    returnDict = dict()
    firstLine = True
    for ro in reader(variantFile, delimiter="\t"):
        if firstLine:
            firstLine = False
        else:
            # Calculate and assign the variant determinants.
            iposition = ro[1]
            iancestral = ro[2]
            icounts = ro[5]
            iTotCounts = ro[4]
            ifrequency = ro[6]
            isubstitution = ro[18]

            # Create a new dictionary entry with the variant information mapped to its bp position.
            returnDict[iposition] = Variant(iposition, iancestral,
                                            isubstitution, icounts, ifrequency,
                                            iTotCounts)
    return returnDict

예제 #30

0

파일 보기

파일: shopify.py 프로젝트: blitzh0t/Shopify2Open

 def get_product_variants(self, product):
     if not(isinstance(product, Product)):
         raise Exception('Expected product object')
     log('[{}.json] Getting product variants'.format(product.url), color='blue')
     endpoint = '{}.json'.format(product.url)
     r = self.S.get(
         endpoint,
         headers=self.headers,
         verify=False
     )
     try:
         r.raise_for_status()
     except requests.exceptions.HTTPError:
         log('[error][{}][{}.json] Failed to get variants'.format(r.status_code, product.url), color='red')
         return None
     with r.json() as json:
         variant_objects = []
         for var in json['variants']:
             variant_objects.append(Variant(var['id'], var['title']))
     return variant_objects