Exemplo n.º 1
0
 def importIsoggSnps(self):
     'import ISOGG SNPs'
     
     SNP.setClassVariables(self)
     self.readPreferredSNPnameSet()
     self.readRepresentativeSNPnameSet()
     self.readIsoggMultiAllelicPosSet()
     self.readIsoggOmitSet()
     self.readIsoggCorrectionsDict()
     self.parseIsoggTable()
     self.setDepthFirstNodeList()
     self.sortSNPlistsAndSetRepresentatives()
     self.writeIsoggCounts()
     self.writeUniqueSNPtable()
     self.writeNewick()
     self.checkMultiAllelics()
Exemplo n.º 2
0
    def prioritySortSNPlistAndSetHgSNP(self):
        '''
        first, sorts snp list (or dropped marker list) by priority ranking.
        then, sets reresentative-SNP-based label: self.hgSNP
        the standard form incudes the truncated haplogroup label 
        and the label of a representative SNP, separated by a hyphen (e.g. R-V88).
        '''

        # root: no markers
        if self.isRoot():
            self.hgSNP = self.haplogroup

        # normal case
        elif self.snpList:
            self.snpList = SNP.prioritySortMarkerList(self.snpList)
            self.hgSNP = self.mostHighlyRankedSNP.hgSNP

        # backup: use discared marker name
        elif self.droppedMarkerList:
            self.droppedMarkerList = SNP.prioritySortMarkerList(
                self.droppedMarkerList)
            markerName = self.mostHighlyRankedDroppedMarker.name
            self.hgSNP = '%s-%s' % (self.hgTrunc, markerName)

        # no markers to use
        else:
            if self.parent.hgSNP:
                symbol = '*' if self.isLeaf() else '+'
                self.hgSNP = self.parent.hgSNP + symbol

                # uniquify if necessary
                if self.hgSNP in Node.hgSNPset:
                    i = 1
                    hgSNPuniqe = '%s%d' % (self.hgSNP, i)
                    while hgSNPuniqe in Node.hgSNPset:
                        i += 1
                        hgSNPuniqe = '%s%d' % (self.hgSNP, i)

                    self.hgSNP = hgSNPuniqe
            else:
                Node.errAndLog('WARNING. Attempted to set star label, ' + \
                               'but parent.hgSNP not set yet: %s\n' % self.haplogroup)
                self.hgSNP = self.haplogroup

        Node.hgSNPset.add(self.hgSNP)
Exemplo n.º 3
0
 def snp(self, rsid):
     """Returns SNP with given integer-only RSID."""
     try:
         genotype, chromo, position = self._genome[rsid]
         geno = map(Nucleotide, genotype)
         return _to_snp("rs%d" % rsid, self._orientation, (geno, chromo,
             position))
     except KeyError:
         return SNP([], "rs%d" % rsid, self._orientation, 0, 0)
Exemplo n.º 4
0
    def constructSNP(self, name, haplogroup, position, mutation):
        '''
        typically, instantiates a SNP and adds it to various containers. 
        note that when SNPs are instantiated, they are added to the tree, 
        and this process may entail growing the tree to include the corresponding node.
        
        more specialized things occur if a SNP already exists at this position.
        '''

        if self.hg2nodeDict:
            ancestral, derived = mutation[0], mutation[3]
            snpKey = (haplogroup, position)

            if snpKey in self.snpDict:  # snp exists under an alias
                snp = self.snpDict[snpKey]
                if snp.isAncestral(ancestral) and snp.isDerived(derived):
                    snp.addName(name)
                    self.snpDict[name] = snp
                else:
                    newSNP = SNP(name, haplogroup, position, ancestral,
                                 derived)
                    sys.exit('\n\nERROR! Conlicting SNPs:\n%s\n%s\n' % \
                             (snp, newSNP))
            else:
                if position in self.snpDict:  # another snp with same position
                    oldSNP = self.snpDict[position]
                    if ancestral not in oldSNP.alleleSet or \
                       derived not in oldSNP.alleleSet:
                        self.multiAllelicNewPosSet.add(position)

                # typical behavior
                snp = SNP(name, haplogroup, position, ancestral, derived)
                self.snpDict[(haplogroup, position)] = snp
                self.snpDict[name] = snp
                self.snpDict[position] = snp
                self.snpList.append(snp)
                self.snpPosSet.add(position)
                self.snpNameSet.add(name)
                self.isoggCountsDict['unique'] += 1
Exemplo n.º 5
0
    def updateWithBranchAssessment(self, ancSNPlist, derSNPlist):
        '''
        extends derived SNP list, sets most derived SNP, and adds number of 
        ancestral alleles seen. also, manages tracking of whether or not path 
        has pushed through an (anc,der)==(1,0) branch
        '''

        numAncestral, numDerived = len(ancSNPlist), len(derSNPlist)
        self.numAncestral += numAncestral
        self.derSNPlist.extend(derSNPlist)
        if derSNPlist:
            self.mostDerivedSNP = SNP.mostHighlyRankedMarkerOnList(derSNPlist)

        if self.hasPushedThrough:
            self.updatePushThroughVars(numAncestral, numDerived)
        elif (numAncestral, numDerived) == (1, 0):
            self.setPushThroughVars()
Exemplo n.º 6
0
def positions_from_csv(filename, min_allele_freq):
    '''
    Read SNP positions from a CSV file in following format:
    chrom,pos,freq
    1,1110,0.02631
    1,2271,0.03125
    1,2402,0.03125
    1,4559,0.02777
    '''
    #log.info('Reading SNPs from file: %s', filename)
    SNPs = []
    with open(os.path.join(os.path.dirname(__file__), filename)) as f:
        reader = csv.reader(f)
        header = next(reader, None)
        for chrom, pos, freq in reader:
            snp = SNP(int(chrom), int(pos), float(freq))
            if snp.freq > min_allele_freq:
                SNPs.append(snp)

    return SNPs
Exemplo n.º 7
0
 l_count = 0
 snps = []
 for variant_line in variant_lines:
     # remove /n form end of line
     # variant_line = variant_line.strip()
     # remove " from lines
     # variant_line[0] = variant_line[0].translate(None, '"')
     # variant_line[-1] = variant_line[-1].translate(None, '"')
     if len(variant_line) >= 16:
         success_count += 1
         context = variant_line[5].split(",")
         consequences = variant_line[6].split(",")
         snps.append(
             SNP(l_count, variant_line[0], int(variant_line[1]),
                 variant_line[2], variant_line[3], variant_line[4], context,
                 consequences,
                 variant_line[7], variant_line[8], variant_line[9],
                 int(variant_line[10]), variant_line[11], variant_line[12],
                 variant_line[13], variant_line[14], variant_line[15]))
     elif 16 > len(variant_line) > 4:
         success_count += 1
         # context = variant_line[5].split(",")
         # consequences = variant_line[6].split(",")
         snps.append(
             SNP(l_count, variant_line[0], int(variant_line[1]),
                 variant_line[2], variant_line[3], variant_line[4],
                 variant_line[5], ".", ".", ".", ".", ".", ".", ".", ".",
                 ".", "."))
     else:
         print "INVALID DATA (16 >= length > 4) in Line {}".format(l_count)
         print variant_line
         fail_count += 1
Exemplo n.º 8
0
    def mostHighlyRankedSNP(self):
        'the most highly ranked SNP'

        return SNP.mostHighlyRankedMarkerOnList(self.snpList)
Exemplo n.º 9
0
    def mostHighlyRankedDroppedMarker(self):
        'the most highly ranked dropped marker'

        return SNP.mostHighlyRankedMarkerOnList(self.droppedMarkerList)
Exemplo n.º 10
0
def _to_snp(rsid, orientation, value):
    genotype, chromo, position = value
    return SNP(list(genotype), rsid, orientation, chromo, position)