def convertTranscriptEffectVEP(self, annStr, hgvsG): """ Takes the ANN string of a VEP generated VCF, splits it and returns a populated GA4GH transcript effect object. :param annStr: String :param hgvsG: String :return: effect protocol.TranscriptEffect """ effect = self._createGaTranscriptEffect() (alt, effects, impact, symbol, geneName, featureType, featureId, trBiotype, exon, intron, hgvsC, hgvsP, cdnaPos, cdsPos, protPos, aminos, codons, existingVar, distance, strand, symbolSource, hgncId, hgvsOffset) = annStr.split('|') effect.alternateBases = alt effect.effects = self.convertSeqOntology(effects) effect.featureId = featureId effect.hgvsAnnotation = protocol.HGVSAnnotation() effect.hgvsAnnotation.genomic = hgvsG effect.hgvsAnnotation.transcript = hgvsC effect.hgvsAnnotation.protein = hgvsP self.addLocations(effect, protPos, cdnaPos) effect.id = self.getTranscriptEffectId(effect) effect.analysisResults = [] return effect
def _addTranscriptEffectLocations(self, effect, ann): # TODO Make these valid HGVS values effect.hgvsAnnotation = protocol.HGVSAnnotation() effect.hgvsAnnotation.genomic = str(ann.start) effect.hgvsAnnotation.transcript = str(ann.start) effect.hgvsAnnotation.protein = str(ann.start) effect.proteinLocation = self._createGaAlleleLocation() effect.proteinLocation.start = ann.start effect.CDSLocation = self._createGaAlleleLocation() effect.CDSLocation.start = ann.start effect.cDNALocation = self._createGaAlleleLocation() effect.cDNALocation.start = ann.start return effect
def _createCsqTranscriptEffect( self, alt, term, protPos, cdnaPos, featureId): effect = self._createGaTranscriptEffect() effect.alternateBases = alt effect.effects = self.convertSeqOntology(term) effect.featureId = featureId effect.hgvsAnnotation = protocol.HGVSAnnotation() # These are not present in the data effect.hgvsAnnotation.genomic = None effect.hgvsAnnotation.transcript = None effect.hgvsAnnotation.protein = None self.addLocations(effect, protPos, cdnaPos) effect.id = self.getTranscriptEffectId(effect) effect.analysisResults = [] return effect
def testAddLocations(self): effect = protocol.TranscriptEffect() effect.hgvsAnnotation = protocol.HGVSAnnotation() effect.hgvsAnnotation.protein = "NM_001005484.1:p.Ile144Asn" effect.hgvsAnnotation.transcript = "NM_001005484.1:c.431T>A" effect.proteinLocation = protocol.AlleleLocation() effect.cDNALocation = protocol.AlleleLocation() effect.CDSLocation = protocol.AlleleLocation() effect.proteinLocation.alternateSequence = "Asn" effect.proteinLocation.referenceSequence = "Ile" effect.proteinLocation.start = 143 effect.CDSLocation.alternateSequence = "A" effect.CDSLocation.referenceSequence = "T" effect.CDSLocation.start = 430 effect.cDNALocation.start = 430 protPos = "144/305" cdnaPos = "431/918" testEffect = self._variantAnnotationSet.addLocations( effect, protPos, cdnaPos) self.assertEqual(testEffect, effect)
def convertTranscriptEffectSnpEff(self, annStr, hgvsG): """ Takes the ANN string of a SnpEff generated VCF, splits it and returns a populated GA4GH transcript effect object. :param annStr: String :param hgvsG: String :return: effect protocol.TranscriptEffect() """ effect = self._createGaTranscriptEffect() # SnpEff and VEP don't agree on this :) (alt, effects, impact, geneName, geneId, featureType, featureId, trBiotype, rank, hgvsC, hgvsP, cdnaPos, cdsPos, protPos, distance, errsWarns) = annStr.split('|') effect.alternateBases = alt effect.effects = self.convertSeqOntology(effects) effect.featureId = featureId effect.hgvsAnnotation = protocol.HGVSAnnotation() effect.hgvsAnnotation.genomic = hgvsG effect.hgvsAnnotation.transcript = hgvsC effect.hgvsAnnotation.protein = hgvsP self.addLocations(effect, protPos, cdnaPos) effect.id = self.getTranscriptEffectId(effect) effect.analysisResults = [] return effect