Пример #1
0
    def getGeneticChanges(self, abspos, variants):
        """ Given the possible variants at chromosomal position abspos, determine
            any "interesting" genetic changes induced by each variant.  That would
            be any changes modifying a splice-site or interefering with the promoter
            region
        """
        import getGenomeSpan

        ### Stitch together the MRNA Sequence ...
        sequence = []
        changes = []
        relpos = self.getRelpos(abspos)
        sequence = getGenomeSpan.getGenomeSpan(self.chrom, self.txStart, self.txEnd)

        sequence2 = list(sequence) ### This makes a copy

        ### For each variant specified, determine if there is an amino
        ### acid change in the protein when changing from the reference
        ### base at that position, to this "mutant" base
        return []
        if relpos < 0:
            return 'Promoter?'
        refBase = sequence[int(relpos)]
        for variant in variants:
            ### Don't bother calculating a matching base, or deletions
            if variant in (refBase, '-'):
                continue
            sequence2[positionInSequence] = variant
            resp.append('NUCLEOTIDE=%s/%s' % (sequence[positionInSequence], sequence2[positionInSequence]))
            sequence = ''.join(sequence)
            sequence2 = ''.join(sequence2)
        return changes
Пример #2
0
    def getGeneticChanges(self, abspos, variants):
        """ Given the possible variants at chromosomal position abspos, determine
            any "interesting" genetic changes induced by each variant.  That would
            be any changes modifying a splice-site or interefering with the promoter
            region
        """
        import getGenomeSpan

        ### Stitch together the MRNA Sequence ...
        sequence = []
        changes = []
        relpos = self.getRelpos(abspos)
        sequence = getGenomeSpan.getGenomeSpan(self.chrom, self.txStart,
                                               self.txEnd)

        sequence2 = list(sequence)  ### This makes a copy

        ### For each variant specified, determine if there is an amino
        ### acid change in the protein when changing from the reference
        ### base at that position, to this "mutant" base
        return []
        if relpos < 0:
            return 'Promoter?'
        refBase = sequence[int(relpos)]
        for variant in variants:
            ### Don't bother calculating a matching base, or deletions
            if variant in (refBase, '-'):
                continue
            sequence2[positionInSequence] = variant
            resp.append(
                'NUCLEOTIDE=%s/%s' %
                (sequence[positionInSequence], sequence2[positionInSequence]))
            sequence = ''.join(sequence)
            sequence2 = ''.join(sequence2)
        return changes
Пример #3
0
    def getProteinChanges(self, abspos, variants):
        """
        """
        import getGenomeSpan

        ### Stitch together the MRNA Sequence ...
        sequence = []
        variantPositions = []        
        for exonStart, exonEnd in self.exons:
            ### Forget exons that are entirely before cdsStart
            ### or after cdsEnd
            if exonStart <= self.cdsStart and exonEnd <= self.cdsStart:
                continue
            elif exonStart >= self.cdsEnd and exonEnd >= self.cdsEnd:
                continue
            ### Otherwise, add a CDS feature for the part of the
            ### exon contained in the range cdsStart-cdsEnd
            fStart = max(exonStart, self.cdsStart)
            fEnd = min(exonEnd, self.cdsEnd)
            exon = getGenomeSpan.getGenomeSpan(self.chrom, fStart, fEnd)
            sequence.append(exon)

            ### Determine the relative position within the mrna sequence
            if fStart <= abspos <= fEnd:
                variantPositions.append(len(sequence) - (fEnd - abspos))

        changes = []
        warnings = Set()
        for positionInSequence in variantPositions:
            sequence = list(''.join(sequence))

            ### For each variant specified, determine if there is an amino
            ### acid change in the protein when changing from the reference
            ### base at that position, to this "mutant" base
            refBase = sequence[positionInSequence]
            for variant in variants:
                sequence2 = list(sequence)
                change = {}
                change['REF_BASE'] = refBase

                ### Don't bother calculating a matching base, or deletions
                if variant in (refBase, '-'):
                    continue
                sequence2[positionInSequence] = variant
                change['VARIANT_BASE'] = sequence2[positionInSequence]
                sequence = ''.join(sequence)
                sequence2 = ''.join(sequence2)

                mrna = Seq(sequence, alphabet)
                mrna2 = Seq(sequence2, alphabet)
                if self.strand == '-':
                    mrna = mrna.reverse_complement()
                    mrna2 = mrna2.reverse_complement()

                protein = translator.translate(mrna)
                protein2 = translator.translate(mrna2)

                aminopos = positionInSequence/3
                change['AMINOPOS'] = aminopos+1
                
                ### These two warning flags are here temporarily so we can get an idea
                ### of how often this happens.  We've noticed sequences of stitched-together
                ### coding sequences that are not divisible by three, so when they are
                ### translated into proteins, the remaining bases get truncated.  This is
                ### a general problem in that I don't know how best to handle that, and
                ### whether it's supposed to happen at all.  It is a more specific problem
                ### when the SNP of interest is actually one of the truncated bases!
                if len(mrna) % 3 != 0:
                    warnings.add('MRNA_LENGTH_NOT_3_MULTIPLE')
                if len(protein) <= aminopos:
                    warnings.add('PROTEIN_LENGTH_LESS_THAN_AMINOPOS')
                    change['PROTEIN_LENGTH'] = len(protein)
                    continue

                refAmino = protein[aminopos]
                varAmino = protein2[aminopos]
                if varAmino != refAmino:
                    ### Some of these are rather verbose and are only useful for debugging
                    #change['HEAD'] = ''.join([str(i)[-2:].ljust(3) for i in range(1, len(protein)+1)])
                    #change['PRO1'] = '  '.join(protein.tostring())
                    #change['PRO2'] = '  '.join(protein2.tostring())
                    #change['SEQ1'] = mrna.tostring()
                    #change['SEQ2'] = mrna2.tostring()
                    change['VARIANT_POS'] = positionInSequence
                    change['CODING_LENGTH'] = len(sequence)
                    change['PROTEIN_LENGTH'] = len(protein)
                    change['REF_AMINO'] = refAmino
                    change['VAR_AMINO'] = varAmino
                changes.append(change)
                    
        return changes, warnings
Пример #4
0
    def getProteinChanges(self, abspos, variants):
        """
        """
        import getGenomeSpan

        ### Stitch together the MRNA Sequence ...
        sequence = []
        variantPositions = []
        for exonStart, exonEnd in self.exons:
            ### Forget exons that are entirely before cdsStart
            ### or after cdsEnd
            if exonStart <= self.cdsStart and exonEnd <= self.cdsStart:
                continue
            elif exonStart >= self.cdsEnd and exonEnd >= self.cdsEnd:
                continue
            ### Otherwise, add a CDS feature for the part of the
            ### exon contained in the range cdsStart-cdsEnd
            fStart = max(exonStart, self.cdsStart)
            fEnd = min(exonEnd, self.cdsEnd)
            exon = getGenomeSpan.getGenomeSpan(self.chrom, fStart, fEnd)
            sequence.append(exon)

            ### Determine the relative position within the mrna sequence
            if fStart <= abspos <= fEnd:
                variantPositions.append(len(sequence) - (fEnd - abspos))

        changes = []
        warnings = Set()
        for positionInSequence in variantPositions:
            sequence = list(''.join(sequence))

            ### For each variant specified, determine if there is an amino
            ### acid change in the protein when changing from the reference
            ### base at that position, to this "mutant" base
            refBase = sequence[positionInSequence]
            for variant in variants:
                sequence2 = list(sequence)
                change = {}
                change['REF_BASE'] = refBase

                ### Don't bother calculating a matching base, or deletions
                if variant in (refBase, '-'):
                    continue
                sequence2[positionInSequence] = variant
                change['VARIANT_BASE'] = sequence2[positionInSequence]
                sequence = ''.join(sequence)
                sequence2 = ''.join(sequence2)

                mrna = Seq(sequence, alphabet)
                mrna2 = Seq(sequence2, alphabet)
                if self.strand == '-':
                    mrna = mrna.reverse_complement()
                    mrna2 = mrna2.reverse_complement()

                protein = translator.translate(mrna)
                protein2 = translator.translate(mrna2)

                aminopos = positionInSequence / 3
                change['AMINOPOS'] = aminopos + 1

                ### These two warning flags are here temporarily so we can get an idea
                ### of how often this happens.  We've noticed sequences of stitched-together
                ### coding sequences that are not divisible by three, so when they are
                ### translated into proteins, the remaining bases get truncated.  This is
                ### a general problem in that I don't know how best to handle that, and
                ### whether it's supposed to happen at all.  It is a more specific problem
                ### when the SNP of interest is actually one of the truncated bases!
                if len(mrna) % 3 != 0:
                    warnings.add('MRNA_LENGTH_NOT_3_MULTIPLE')
                if len(protein) <= aminopos:
                    warnings.add('PROTEIN_LENGTH_LESS_THAN_AMINOPOS')
                    change['PROTEIN_LENGTH'] = len(protein)
                    continue

                refAmino = protein[aminopos]
                varAmino = protein2[aminopos]
                if varAmino != refAmino:
                    ### Some of these are rather verbose and are only useful for debugging
                    #change['HEAD'] = ''.join([str(i)[-2:].ljust(3) for i in range(1, len(protein)+1)])
                    #change['PRO1'] = '  '.join(protein.tostring())
                    #change['PRO2'] = '  '.join(protein2.tostring())
                    #change['SEQ1'] = mrna.tostring()
                    #change['SEQ2'] = mrna2.tostring()
                    change['VARIANT_POS'] = positionInSequence
                    change['CODING_LENGTH'] = len(sequence)
                    change['PROTEIN_LENGTH'] = len(protein)
                    change['REF_AMINO'] = refAmino
                    change['VAR_AMINO'] = varAmino
                changes.append(change)

        return changes, warnings