示例#1
0
 def __init__(self, seq):
     self.seq = seq 
     self.featureSizeEstimators = []
     self.sequenceEstimatedSize = 0
     
     self.sequenceDataLength = sum([cu.TAG_LENGTH_ST26['SequenceData'], 
                 len(seq.seqIdNo) + cu.TAG_LENGTH_ST26['sequenceIDNumber'],
                 cu.TAG_LENGTH_ST26['INSDSeq']])
     
     self.INSDSeq_lengthLength = len(seq.length) + cu.TAG_LENGTH_ST26['INSDSeq_length']
     self.INSDSeq_moltypeLength = len('AA' if seq.molType == 'PRT' else seq.molType) + cu.TAG_LENGTH_ST26['INSDSeq_moltype']
     self.INSDSeq_divisionLength = len("PAT") + cu.TAG_LENGTH_ST26['INSDSeq_division']
     self.INSDSeq_feature_tableLength = cu.TAG_LENGTH_ST26['INSDSeq_feature-table']
     
     sourceFeatureSizeEstimator = self.getSourceFeatureSizeEstimator(seq)
     self.featureSizeEstimators.append(sourceFeatureSizeEstimator)
     
     residues = seq.residues_nuc 
     if seq.molType == 'PRT':
         residues = cu.oneLetterCode(seq.residues_prt)
     self.INSDSeq_sequenceLength = len(residues) + cu.TAG_LENGTH_ST26['INSDSeq_sequence']
     
     for f in self.seq.features:
         if f.key == cu.BLANK_PLACEHOLDER and f.location == cu.BLANK_PLACEHOLDER:
             noteQ = {'INSDQualifierLength': cu.TAG_LENGTH_ST26['INSDQualifier'], 
                      'INSDQualifier_nameLength': len('note') + cu.TAG_LENGTH_ST26['INSDQualifier_name'],
                      'INSDQualifier_valueLength': len(f.description) + cu.TAG_LENGTH_ST26['INSDQualifier_value']
                      }
             sourceFeatureSizeEstimator['qualifiers'].append(noteQ)
         else:
             self.featureSizeEstimators.append(self.getOtherFeatureSizeEstimator(f))
     
     featuresEstimatedSize = 0
     for fse in self.featureSizeEstimators:
         qualsSize = sum(sum([q['INSDQualifierLength'], 
                          q['INSDQualifier_nameLength'], 
                          q['INSDQualifier_valueLength']]) for q in fse['qualifiers'])
         res = sum([fse['INSDFeatureLength'], 
                    fse['INSDFeature_keyLength'], 
                    fse['INSDFeature_locationLength'], 
                    fse['INSDFeature_qualsLength'], 
                    qualsSize])
         featuresEstimatedSize += res 
     
     self.sequenceEstimatedSize = sum([self.sequenceDataLength, 
                                      self.INSDSeq_lengthLength, 
                                      self.INSDSeq_moltypeLength, 
                                      self.INSDSeq_divisionLength,
                                      self.INSDSeq_feature_tableLength, 
                                     featuresEstimatedSize, 
                                     self.INSDSeq_sequenceLength])
示例#2
0
    def setSequencesSt26(self):
        
        for s25 in self.seql_st25.generateSequence():
            residues_st26 = ''
            if s25.molType in ('DNA', 'RNA'):
                molType_st26 = s25.molType
                sourceKey = 'source'
                organismQualifierName = 'organism'
                noteQualifierName = 'note'
                residues_st26 = s25.residues_nuc 
            else:
                molType_st26 = 'AA'
                sourceKey = 'SOURCE'
                organismQualifierName = 'ORGANISM'
                noteQualifierName = 'NOTE'
                residues_st26 = converter_util.oneLetterCode(s25.residues_prt)
            
            s26 = Sequence(sequenceListing = self.seql_st26,
                sequenceIdNo = s25.seqIdNo,
                length = s25.length,
                moltype = molType_st26,
                division = 'PAT',
#                 otherSeqId = '-', #optional, so we don't include it in converted sl
                residues = residues_st26)
            
            s26.save()
            
            sourceFeature = Feature(sequence=s26, 
                                    featureKey = sourceKey,
                                    location = '1..%s' % s26.length)
            sourceFeature.save()
            
            organismQualifier = Qualifier(feature=sourceFeature,
                                          qualifierName=organismQualifierName,
                                          qualifierValue=s25.organism)
            organismQualifier.save()
            
            for f in s25.features:
                currentFeature = Feature(sequence=s26,
                                         featureKey = f.key,
                                         location = f.location)
                currentFeature.save()
                currentQualifier = Qualifier(feature=currentFeature,
                                          qualifierName=noteQualifierName,
                                          qualifierValue=f.description)
                currentQualifier.save()
示例#3
0
 def getOtherFeatureSizeEstimator(self, feat):
     qualsLength = 0
     quals = []
     if feat.description != cu.BLANK_PLACEHOLDER:
         qualsLength = cu.TAG_LENGTH_ST26['INSDFeature_quals']
         quals = [{'INSDQualifierLength': cu.TAG_LENGTH_ST26['INSDQualifier'], 
       'INSDQualifier_nameLength': len('note') + cu.TAG_LENGTH_ST26['INSDQualifier_name'],
        'INSDQualifier_valueLength': len(feat.description) + cu.TAG_LENGTH_ST26['INSDQualifier_value']
        }]
         
     if feat.key == 'CDS':
         qualsLength = cu.TAG_LENGTH_ST26['INSDFeature_quals']
         quals = [{'INSDQualifierLength': cu.TAG_LENGTH_ST26['INSDQualifier'], 
                   'INSDQualifier_nameLength': len('translation') + cu.TAG_LENGTH_ST26['INSDQualifier_name'],
                   'INSDQualifier_valueLength': len(cu.oneLetterCode(feat.translation)) + cu.TAG_LENGTH_ST26['INSDQualifier_value']
        }]
     
     return {
     'INSDFeatureLength': cu.TAG_LENGTH_ST26['INSDFeature'],
     'INSDFeature_keyLength': len(feat.key) + cu.TAG_LENGTH_ST26['INSDFeature_key'],          
     'INSDFeature_locationLength': len(feat.location) + cu.TAG_LENGTH_ST26['INSDFeature_location'],            
     'INSDFeature_qualsLength': qualsLength,
     'qualifiers': quals
     }
示例#4
0
    def setSequencesSt26(self):
        
#         for s25 in self.seql_st25.sequences:
        for s25 in self.seql_st25.generateSequence():
            print 'seq', s25.seqIdNo
            residues_st26 = ''
            if s25.molType in ('DNA', 'RNA'):
                molType_st26 = s25.molType
                sourceKey = 'source'
                organismQualifierName = 'organism'
                mol_typeQualifierName = 'mol_type'
                mol_typeQualifierValue = 'genomic %s' % s25.molType
                noteQualifierName = 'note'
                residues_st26 = s25.residues_nuc 
            else:
                molType_st26 = 'AA'
                sourceKey = 'SOURCE'
                organismQualifierName = 'ORGANISM'
                mol_typeQualifierName = 'MOL_TYPE'
                mol_typeQualifierValue = 'protein'
                noteQualifierName = 'NOTE'
                residues_st26 = converter_util.oneLetterCode(s25.residues_prt)
            
            s26 = Sequence(sequenceListing = self.seql_st26,
                sequenceIdNo = s25.seqIdNo,
                length = s25.length,
                moltype = molType_st26,
                division = 'PAT',
#                 otherSeqId = '-', #optional, so we don't include it in converted sl
                residues = residues_st26)
            
            s26.save()
            
            sourceFeature = Feature(sequence=s26, 
                                    featureKey = sourceKey,
                                    location = '1..%s' % s26.length)
            sourceFeature.save()
            
            organismQualifier = Qualifier(feature=sourceFeature,
                                          qualifierName=organismQualifierName,
                                          qualifierValue=s25.organism)
            organismQualifier.save()
            
            mol_typeQualifier = Qualifier(feature=sourceFeature,
                                          qualifierName=mol_typeQualifierName,
                                          qualifierValue=mol_typeQualifierValue)
            mol_typeQualifier.save()
            
            for f in s25.features:
                if f.key == seqlutils.DEFAULT_STRING and f.location == seqlutils.DEFAULT_STRING:
                    sourceNoteQualifier = Qualifier(feature=sourceFeature,
                                                  qualifierName=noteQualifierName,
                                                  qualifierValue=f.description)
                    sourceNoteQualifier.save()
                else:
                    currentFeature = Feature(sequence=s26,
                                         featureKey = f.key,
                                         location = f.location)
                    currentFeature.save()
                    if f.description != seqlutils.DEFAULT_STRING:
                        currentQualifier = Qualifier(feature=currentFeature,
                                                  qualifierName=noteQualifierName,
                                                  qualifierValue=f.description)
                        currentQualifier.save()
                    
                    if f.key == 'CDS':
                        translationQualifierValue = converter_util.oneLetterCode(f.translation)
                        translationQualifier = Qualifier(feature=currentFeature,
                                              qualifierName='translation',
                                              qualifierValue=translationQualifierValue)
                        translationQualifier.save()
                
         
        self.successful = True                
示例#5
0
    def setSequenceRows(self):
        res = []
        
        parsedSequences = []
        for s in self.seql.generateSequence():
            parsedSequences.append(s)
#             TODO: test
            if s.molType == 'PRT':
                self.seql.quantity_prt += 1 
            else:
                self.seql.quantity_nuc += 1
                if s.mixedMode:
                    self.seql.quantity_mix += 1
                
        for seq in self.seql_raw.raw_sequences:
            
            currentIndex = self.seql_raw.raw_sequences.index(seq)
            parsedSequence = parsedSequences[currentIndex]
            currentSeqId = parsedSequence.seqIdNo
# ====================== 210 ======================
            currentRow_SequenceData = self._getSt25St26Lengths(0, currentSeqId, 
                            '-', '-', 'SequenceData', 
                            'ST.26 specific element')

            res.append(currentRow_SequenceData)
            
            currentRow210 = self._getSt25St26Lengths(210, currentSeqId, 
                            seq.seqIdNo, parsedSequence.seqIdNo, 'sequenceIDNumber', '-')

            res.append(currentRow210)
            
            currentRow_INSDSeq = self._getSt25St26Lengths(0, currentSeqId, 
                            '-', '-', 'INSDSeq', 'ST.26 specific element')

            res.append(currentRow_INSDSeq)

# ====================== 211 ======================            
            currentRow211 = self._getSt25St26Lengths(211, currentSeqId, 
                            seq.length, parsedSequence.length, 'INSDSeq_length', cu.BLANK_PLACEHOLDER)
            res.append(currentRow211)

# ====================== 212 ======================            
            moltypeValue = 'AA' if parsedSequence.molType == 'PRT' else parsedSequence.molType 

            currentRow212 = [212, currentSeqId, cu.safeLength(seq.molType), 
                            cu.safeLength(parsedSequence.molType), 
                            cu.TAG_LENGTH_ST26['INSDSeq_moltype'], 
                            cu.safeLength(moltypeValue) + cu.TAG_LENGTH_ST26['INSDSeq_moltype'],
                            'INSDSeq_moltype', 
                            'PRT replaced by AA for protein raw_sequences' if moltypeValue == 'AA' else cu.BLANK_PLACEHOLDER]
            
            res.append(currentRow212)

# ====================== INSDSeq_division ======================            
            INSDSeq_division_val = 'PAT'
            currentRow_INSDSeq_division = self._getSt25St26Lengths(0, currentSeqId, 
                            '-', INSDSeq_division_val, 'INSDSeq_division', 
                            'ST.26 specific element')
            res.append(currentRow_INSDSeq_division)

# ====================== INSDSeq_other-seqids ======================
# optional element, therefore not included in calculations

# ====================== INSDSeq_feature-table ======================            
            currentRow_INSDSeq_feature_table = self._getSt25St26Lengths(0, 
                            currentSeqId, 
                            '-', '-', 'INSDSeq_feature-table', 
                            'ST.26 specific element')
            res.append(currentRow_INSDSeq_feature_table)

# ====================== 213 ======================                        
#             create ST.26 feature source
            currentRow_INSDFeature = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDFeature'], 
                            cu.TAG_LENGTH_ST26['INSDFeature'],
                            'INSDFeature', 
                            'ST.26 mandatory feature source']
            res.append(currentRow_INSDFeature)
            
            currentRow_INSDFeature_key = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDFeature_key'], 
                            len('source') + cu.TAG_LENGTH_ST26['INSDFeature_key'],
                            'INSDFeature_key', 
                            'ST.26 mandatory feature source']
            
            res.append(currentRow_INSDFeature_key)
            
            sourceLocation = '1..%s' % parsedSequence.length
            currentRow_INSDFeature_location = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDFeature_location'], 
                            len(sourceLocation) + cu.TAG_LENGTH_ST26['INSDFeature_location'],
                            'INSDFeature_location', 
                            'ST.26 mandatory feature source']
            
            res.append(currentRow_INSDFeature_location)
            
            def append_INSDFeature_quals(msg):
                res.append([0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDFeature_quals'], 
                            cu.TAG_LENGTH_ST26['INSDFeature_quals'],
                            'INSDFeature_quals', 
                            msg])
            
#             add first the parent element INSDFeature_quals
            append_INSDFeature_quals('ST.26 mandatory feature source')
            
            def createQualifier(name, msg):
                currentRow_INSDQualifier = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDQualifier'], 
                            cu.TAG_LENGTH_ST26['INSDQualifier'],
                            'INSDQualifier', 
                            msg]
            
                res.append(currentRow_INSDQualifier)
                
                currentRow_INSDQualifier_name = [0, currentSeqId, 0, 0, 
                            cu.TAG_LENGTH_ST26['INSDQualifier_name'], 
                            len(name) + cu.TAG_LENGTH_ST26['INSDQualifier_name'],
                            'INSDQualifier_name', 
                            msg]
            
                res.append(currentRow_INSDQualifier_name)
            
            def createQualifierValue(tag_st25, element_st25, value_st25, msg):
                
                currentRow_INSDQualifier_value = [tag_st25, 
                    currentSeqId, cu.safeLength(element_st25), 
                    cu.safeLength(value_st25), 
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'], 
                    cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    'INSDQualifier_value', 
                    msg]
            
                res.append(currentRow_INSDQualifier_value)
            
#             qualifier organism
            createQualifier('organism', 'ST.26 mandatory qualifier organism')
            createQualifierValue(213, seq.organism, 
                            parsedSequence.organism, 
                            'ST.26 mandatory qualifier organism')

#             qualifier mol_type
            mol_typeValue = 'protein' if parsedSequence.molType == 'PRT' else 'genomic DNA'
            createQualifier('mol_type', 'ST.26 mandatory qualifier mol_type') 
#             createQualifierValue(0, 0, mol_typeValue, 'ST.26 mandatory qualifier mol_type')
            res.append([0, currentSeqId, 0, 0,  
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'], 
                    cu.safeLength(mol_typeValue) + cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    'INSDQualifier_value', 
                    'ST.26 mandatory qualifier mol_type'])
            
#             end create ST.26 feature source

# ====================== other features ======================        
            parsedFeatures = parsedSequence.features
            for feat in seq.features:
                currentFeatureIndex = seq.features.index(feat)
                parsedFeature = parsedFeatures[currentFeatureIndex]
                isSimpleFeature = False
                if parsedFeature.key == cu.BLANK_PLACEHOLDER and parsedFeature.location == cu.BLANK_PLACEHOLDER:
                    isSimpleFeature = True 
                if not isSimpleFeature:
                    # ====================== 220 ======================                
                    currentRow220 = self._getSt25St26Lengths(220, currentSeqId, 
                                feat.featureHeader, parsedFeature.featureHeader, 
                                'INSDFeature', cu.BLANK_PLACEHOLDER)
                    res.append(currentRow220)
    
                    # ====================== 221 ======================                
                    currentRow221 = self._getSt25St26Lengths(221, currentSeqId, 
                                feat.key, parsedFeature.key, 
                                'INSDFeature_key', cu.BLANK_PLACEHOLDER)
                    res.append(currentRow221)
    
                    # ====================== add row for mixed mode translation qualifier ======================                               
                    if parsedFeature.key == 'CDS':
                        createQualifier('translation', 'ST.26 specific element translation')
                        translationRow = [400, currentSeqId, 
                                0, 
                                cu.safeLength(parsedFeature.translation),
                                cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                                (cu.TAG_LENGTH_ST26['INSDQualifier_value'] + 
                                len(cu.oneLetterCode(parsedFeature.translation))),
                                'INSDQualifier_value', '3-to-1 letter code']
                        
                        res.append(translationRow)
    
                    # ====================== 222 ======================                
                    currentRow222 = self._getSt25St26Lengths(222, currentSeqId, 
                                feat.location, parsedFeature.location, 
                                'INSDFeature_location', cu.BLANK_PLACEHOLDER)
                    res.append(currentRow222)
                
# ====================== 223 ======================                
                if parsedFeature.description != cu.BLANK_PLACEHOLDER: #do not add row if 223 missing!
                    append_INSDFeature_quals('ST.26 mandatory element')
                    createQualifier('note', cu.BLANK_PLACEHOLDER)
                    createQualifierValue(223, feat.description, 
                                        parsedFeature.description, 
                                        cu.BLANK_PLACEHOLDER)
                                   
# ====================== 400 ======================        
            if parsedSequence.molType == 'PRT':
                parsedResidues = parsedSequence.residues_prt
                currentRow400 = [400, currentSeqId, 
                            cu.safeLength(seq.residues), 
                            cu.safeLength(parsedResidues),
                            cu.TAG_LENGTH_ST26['INSDSeq_sequence'],
                            (cu.TAG_LENGTH_ST26['INSDSeq_sequence'] + 
                            len(cu.oneLetterCode(parsedResidues))),
                            'INSDSeq_sequence', '3-to-1 letter code']
                
            else:
                parsedResidues = parsedSequence.residues_nuc
                currentRow400 = self._getSt25St26Lengths(400, currentSeqId, 
                                seq.residues, parsedResidues, 
                                'INSDSeq_sequence', cu.BLANK_PLACEHOLDER)
            res.append(currentRow400)
        
        return res 
示例#6
0
    def setSequencesSt26(self):

        #         for s25 in self.seql_st25.sequences:
        for s25 in self.seql_st25.generateSequence():
            print 'seq', s25.seqIdNo
            residues_st26 = ''
            if s25.molType in ('DNA', 'RNA'):
                molType_st26 = s25.molType
                sourceKey = 'source'
                organismQualifierName = 'organism'
                mol_typeQualifierName = 'mol_type'
                mol_typeQualifierValue = 'genomic %s' % s25.molType
                noteQualifierName = 'note'
                residues_st26 = s25.residues_nuc
            else:
                molType_st26 = 'AA'
                sourceKey = 'SOURCE'
                organismQualifierName = 'ORGANISM'
                mol_typeQualifierName = 'MOL_TYPE'
                mol_typeQualifierValue = 'protein'
                noteQualifierName = 'NOTE'
                residues_st26 = converter_util.oneLetterCode(s25.residues_prt)

            s26 = Sequence(
                sequenceListing=self.seql_st26,
                sequenceIdNo=s25.seqIdNo,
                length=s25.length,
                moltype=molType_st26,
                division='PAT',
                #                 otherSeqId = '-', #optional, so we don't include it in converted sl
                residues=residues_st26)

            s26.save()

            sourceFeature = Feature(sequence=s26,
                                    featureKey=sourceKey,
                                    location='1..%s' % s26.length)
            sourceFeature.save()

            organismQualifier = Qualifier(feature=sourceFeature,
                                          qualifierName=organismQualifierName,
                                          qualifierValue=s25.organism)
            organismQualifier.save()

            mol_typeQualifier = Qualifier(
                feature=sourceFeature,
                qualifierName=mol_typeQualifierName,
                qualifierValue=mol_typeQualifierValue)
            mol_typeQualifier.save()

            for f in s25.features:
                if f.key == seqlutils.DEFAULT_STRING and f.location == seqlutils.DEFAULT_STRING:
                    sourceNoteQualifier = Qualifier(
                        feature=sourceFeature,
                        qualifierName=noteQualifierName,
                        qualifierValue=f.description)
                    sourceNoteQualifier.save()
                else:
                    currentFeature = Feature(sequence=s26,
                                             featureKey=f.key,
                                             location=f.location)
                    currentFeature.save()
                    if f.description != seqlutils.DEFAULT_STRING:
                        currentQualifier = Qualifier(
                            feature=currentFeature,
                            qualifierName=noteQualifierName,
                            qualifierValue=f.description)
                        currentQualifier.save()

                    if f.key == 'CDS':
                        translationQualifierValue = converter_util.oneLetterCode(
                            f.translation)
                        translationQualifier = Qualifier(
                            feature=currentFeature,
                            qualifierName='translation',
                            qualifierValue=translationQualifierValue)
                        translationQualifier.save()

        self.successful = True
示例#7
0
    def setSequenceRows(self):
        res = []

        parsedSequences = []
        for s in self.seql.generateSequence():
            parsedSequences.append(s)
            #             TODO: test
            if s.molType == 'PRT':
                self.seql.quantity_prt += 1
            else:
                self.seql.quantity_nuc += 1
                if s.mixedMode:
                    self.seql.quantity_mix += 1

        for seq in self.seql_raw.raw_sequences:

            currentIndex = self.seql_raw.raw_sequences.index(seq)
            parsedSequence = parsedSequences[currentIndex]
            currentSeqId = parsedSequence.seqIdNo
            # ====================== 210 ======================
            currentRow_SequenceData = self._getSt25St26Lengths(
                0, currentSeqId, '-', '-', 'SequenceData',
                'ST.26 specific element')

            res.append(currentRow_SequenceData)

            currentRow210 = self._getSt25St26Lengths(210, currentSeqId,
                                                     seq.seqIdNo,
                                                     parsedSequence.seqIdNo,
                                                     'sequenceIDNumber', '-')

            res.append(currentRow210)

            currentRow_INSDSeq = self._getSt25St26Lengths(
                0, currentSeqId, '-', '-', 'INSDSeq', 'ST.26 specific element')

            res.append(currentRow_INSDSeq)

            # ====================== 211 ======================
            currentRow211 = self._getSt25St26Lengths(211, currentSeqId,
                                                     seq.length,
                                                     parsedSequence.length,
                                                     'INSDSeq_length',
                                                     cu.BLANK_PLACEHOLDER)
            res.append(currentRow211)

            # ====================== 212 ======================
            moltypeValue = 'AA' if parsedSequence.molType == 'PRT' else parsedSequence.molType

            currentRow212 = [
                212, currentSeqId,
                cu.safeLength(seq.molType),
                cu.safeLength(parsedSequence.molType),
                cu.TAG_LENGTH_ST26['INSDSeq_moltype'],
                cu.safeLength(moltypeValue) +
                cu.TAG_LENGTH_ST26['INSDSeq_moltype'], 'INSDSeq_moltype',
                'PRT replaced by AA for protein raw_sequences'
                if moltypeValue == 'AA' else cu.BLANK_PLACEHOLDER
            ]

            res.append(currentRow212)

            # ====================== INSDSeq_division ======================
            INSDSeq_division_val = 'PAT'
            currentRow_INSDSeq_division = self._getSt25St26Lengths(
                0, currentSeqId, '-', INSDSeq_division_val, 'INSDSeq_division',
                'ST.26 specific element')
            res.append(currentRow_INSDSeq_division)

            # ====================== INSDSeq_other-seqids ======================
            # optional element, therefore not included in calculations

            # ====================== INSDSeq_feature-table ======================
            currentRow_INSDSeq_feature_table = self._getSt25St26Lengths(
                0, currentSeqId, '-', '-', 'INSDSeq_feature-table',
                'ST.26 specific element')
            res.append(currentRow_INSDSeq_feature_table)

            # ====================== 213 ======================
            #             create ST.26 feature source
            currentRow_INSDFeature = [
                0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature'],
                cu.TAG_LENGTH_ST26['INSDFeature'], 'INSDFeature',
                'ST.26 mandatory feature source'
            ]
            res.append(currentRow_INSDFeature)

            currentRow_INSDFeature_key = [
                0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_key'],
                len('source') + cu.TAG_LENGTH_ST26['INSDFeature_key'],
                'INSDFeature_key', 'ST.26 mandatory feature source'
            ]

            res.append(currentRow_INSDFeature_key)

            sourceLocation = '1..%s' % parsedSequence.length
            currentRow_INSDFeature_location = [
                0, currentSeqId, 0, 0,
                cu.TAG_LENGTH_ST26['INSDFeature_location'],
                len(sourceLocation) +
                cu.TAG_LENGTH_ST26['INSDFeature_location'],
                'INSDFeature_location', 'ST.26 mandatory feature source'
            ]

            res.append(currentRow_INSDFeature_location)

            def append_INSDFeature_quals(msg):
                res.append([
                    0, currentSeqId, 0, 0,
                    cu.TAG_LENGTH_ST26['INSDFeature_quals'],
                    cu.TAG_LENGTH_ST26['INSDFeature_quals'],
                    'INSDFeature_quals', msg
                ])

#             add first the parent element INSDFeature_quals

            append_INSDFeature_quals('ST.26 mandatory feature source')

            def createQualifier(name, msg):
                currentRow_INSDQualifier = [
                    0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier'],
                    cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier', msg
                ]

                res.append(currentRow_INSDQualifier)

                currentRow_INSDQualifier_name = [
                    0, currentSeqId, 0, 0,
                    cu.TAG_LENGTH_ST26['INSDQualifier_name'],
                    len(name) + cu.TAG_LENGTH_ST26['INSDQualifier_name'],
                    'INSDQualifier_name', msg
                ]

                res.append(currentRow_INSDQualifier_name)

            def createQualifierValue(tag_st25, element_st25, value_st25, msg):

                currentRow_INSDQualifier_value = [
                    tag_st25, currentSeqId,
                    cu.safeLength(element_st25),
                    cu.safeLength(value_st25),
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    cu.safeLength(value_st25) +
                    cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                    'INSDQualifier_value', msg
                ]

                res.append(currentRow_INSDQualifier_value)

#             qualifier organism

            createQualifier('organism', 'ST.26 mandatory qualifier organism')
            createQualifierValue(213, seq.organism, parsedSequence.organism,
                                 'ST.26 mandatory qualifier organism')

            #             qualifier mol_type
            mol_typeValue = 'protein' if parsedSequence.molType == 'PRT' else 'genomic DNA'
            createQualifier('mol_type', 'ST.26 mandatory qualifier mol_type')
            #             createQualifierValue(0, 0, mol_typeValue, 'ST.26 mandatory qualifier mol_type')
            res.append([
                0, currentSeqId, 0, 0,
                cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                cu.safeLength(mol_typeValue) +
                cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                'INSDQualifier_value', 'ST.26 mandatory qualifier mol_type'
            ])

            #             end create ST.26 feature source

            # ====================== other features ======================
            parsedFeatures = parsedSequence.features
            for feat in seq.features:
                currentFeatureIndex = seq.features.index(feat)
                parsedFeature = parsedFeatures[currentFeatureIndex]
                isSimpleFeature = False
                if parsedFeature.key == cu.BLANK_PLACEHOLDER and parsedFeature.location == cu.BLANK_PLACEHOLDER:
                    isSimpleFeature = True
                if not isSimpleFeature:
                    # ====================== 220 ======================
                    currentRow220 = self._getSt25St26Lengths(
                        220, currentSeqId, feat.featureHeader,
                        parsedFeature.featureHeader, 'INSDFeature',
                        cu.BLANK_PLACEHOLDER)
                    res.append(currentRow220)

                    # ====================== 221 ======================
                    currentRow221 = self._getSt25St26Lengths(
                        221, currentSeqId, feat.key, parsedFeature.key,
                        'INSDFeature_key', cu.BLANK_PLACEHOLDER)
                    res.append(currentRow221)

                    # ====================== add row for mixed mode translation qualifier ======================
                    if parsedFeature.key == 'CDS':
                        createQualifier('translation',
                                        'ST.26 specific element translation')
                        translationRow = [
                            400, currentSeqId, 0,
                            cu.safeLength(parsedFeature.translation),
                            cu.TAG_LENGTH_ST26['INSDQualifier_value'],
                            (cu.TAG_LENGTH_ST26['INSDQualifier_value'] +
                             len(cu.oneLetterCode(parsedFeature.translation))),
                            'INSDQualifier_value', '3-to-1 letter code'
                        ]

                        res.append(translationRow)

                    # ====================== 222 ======================
                    currentRow222 = self._getSt25St26Lengths(
                        222, currentSeqId, feat.location,
                        parsedFeature.location, 'INSDFeature_location',
                        cu.BLANK_PLACEHOLDER)
                    res.append(currentRow222)

# ====================== 223 ======================
                if parsedFeature.description != cu.BLANK_PLACEHOLDER:  #do not add row if 223 missing!
                    append_INSDFeature_quals('ST.26 mandatory element')
                    createQualifier('note', cu.BLANK_PLACEHOLDER)
                    createQualifierValue(223, feat.description,
                                         parsedFeature.description,
                                         cu.BLANK_PLACEHOLDER)

# ====================== 400 ======================
            if parsedSequence.molType == 'PRT':
                parsedResidues = parsedSequence.residues_prt
                currentRow400 = [
                    400, currentSeqId,
                    cu.safeLength(seq.residues),
                    cu.safeLength(parsedResidues),
                    cu.TAG_LENGTH_ST26['INSDSeq_sequence'],
                    (cu.TAG_LENGTH_ST26['INSDSeq_sequence'] +
                     len(cu.oneLetterCode(parsedResidues))),
                    'INSDSeq_sequence', '3-to-1 letter code'
                ]

            else:
                parsedResidues = parsedSequence.residues_nuc
                currentRow400 = self._getSt25St26Lengths(
                    400, currentSeqId, seq.residues, parsedResidues,
                    'INSDSeq_sequence', cu.BLANK_PLACEHOLDER)
            res.append(currentRow400)

        return res
示例#8
0
    def test_setSequencesSt26(self):
 
        sequences = self.sc1.seql_st26.sequence_set.all()
        self.assertEqual(4, sequences.count())
         
        s2 = sequences.get(sequenceIdNo=2)
        s4 = sequences.get(sequenceIdNo=4)
         
        self.assertEqual('DNA', s2.moltype)
        self.assertEqual('AA', s4.moltype)
         
        self.assertEqual('ttgaccaagctggggaccccggtcccttgggaccagtggcagaggagtc', s2.residues)
         
        features_s2 = s2.feature_set.all()
         
        self.assertEqual("3'clip", features_s2[1].featureKey)
        self.assertEqual("1..30", features_s2[1].location)
                
        sequences_1004 = self.sc1004.seql_st26.sequence_set.all()
        sequence_1004_1 = sequences_1004.get(sequenceIdNo=1)
        sequence_1004_7 = sequences_1004.get(sequenceIdNo=7)
        
        self.assertEqual(903, sequence_1004_1.length)
        features_1004_1 = sequence_1004_1.feature_set.all()
        
        self.assertEqual("CDS", features_1004_1[1].featureKey)
        self.assertEqual("(1)..(903)", features_1004_1[1].location)
        
#         test that feature description missing is not converted to empty element
        s1 = sequences.get(sequenceIdNo=1)
        features_s1 = s1.feature_set.all()
        for f in features_s1:
            qualifiers = f.qualifier_set.all()
            for q in qualifiers:
                self.assertFalse(q.qualifierName in ['note', "NOTE"])
        
        features_s4 = s4.feature_set.all()
        for f in features_s4:
            qualifiers = f.qualifier_set.all()
            for q in qualifiers:
                if q.qualifierName == 'NOTE':
                    exp = 'influenza virus A hemagglutinin subtype H9'
                    self.assertEqual(exp, q.qualifierValue)
                

#         ============== tests for mixed mode ==================================   
        translQualifier_seq1 = features_1004_1[1].qualifier_set.all()[0]
        self.assertEqual("translation", translQualifier_seq1.qualifierName)
        
        translQualValue_exp = converter_util.oneLetterCode(self.sc1004_seql_st25_sequences[0].residues_prt)
        self.assertEqual(translQualValue_exp, translQualifier_seq1.qualifierValue)
        
        features_1004_7 = sequence_1004_7.feature_set.all()
        
        self.assertEqual("CDS", features_1004_7[1].featureKey)
        self.assertEqual("(1)..(84)", features_1004_7[1].location)
        
        translQualifier7_1 = features_1004_7[1].qualifier_set.all()[0]
        self.assertEqual("translation", translQualifier7_1.qualifierName)
        
        translQualifier7_2 = features_1004_7[2].qualifier_set.all()[0]
        self.assertEqual("translation", translQualifier7_2.qualifierName)
        
        translation1 = converter_util.oneLetterCode('MetLysLysSerLeuValLeuLysAlaSerValAlaValAlaThrLeuValProMetLeuSerPheAlaAlaGluGlyGluPhe')
        translation2 = converter_util.oneLetterCode('AspProAlaLysAlaAlaPheAspSerLeuGlnAlaSerAlaThrGluTyrIleGlyTyrAlaTrpAlaMetValValValIleValGlyAlaThrIleGlyIleLysLeuPheLysLysPheThrSerLysAlaSer')
        
        self.assertEqual(translation1, translQualifier7_1.qualifierValue)
        self.assertEqual(translation2, translQualifier7_2.qualifierValue)
        
#         ============== tests for simple feature conversion ==================================   
        sourceFeature_1004_7 = features_1004_7[0]
        noteQual = sourceFeature_1004_7.qualifier_set.get(qualifierName='note')
        self.assertEqual('pc89 major coat protein PVIII', noteQual.qualifierValue)