def setRow_header(self): self.generalInformationRows.append([0, 0, cu.safeLength(self.seql_raw.seqlHeader), cu.safeLength(self.seql.generalInformation.seqlHeader), cu.TAG_LENGTH_ST26['ST26SequenceListing'], cu.TAG_LENGTH_ST26['ST26SequenceListing'], 'ST26SequenceListing', 'ST.25 seqlHeader discarded'])
def setRow_header(self): self.generalInformationRows.append([ 0, 0, cu.safeLength(self.seql_raw.seqlHeader), cu.safeLength(self.seql.generalInformation.seqlHeader), cu.TAG_LENGTH_ST26['ST26SequenceListing'], cu.TAG_LENGTH_ST26['ST26SequenceListing'], 'ST26SequenceListing', 'ST.25 seqlHeader discarded' ])
def createQualifierValue(tag_st25, element_st25, value_st25, msg): currentRow_INSDQualifier_value = [tag_st25, currentSeqId, cu.safeLength(element_st25), cu.safeLength(value_st25), cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', msg] res.append(currentRow_INSDQualifier_value)
def _getSt25St26Lengths(self, element_st25_tag, seqIdNo, element_st25, value_st25, element_st26, comment): return [ element_st25_tag, seqIdNo, cu.safeLength(element_st25), cu.safeLength(value_st25), 0 if element_st26 == '-' else cu.TAG_LENGTH_ST26[element_st26], 0 if element_st26 == '-' else cu.TAG_LENGTH_ST26[element_st26] + cu.safeLength(value_st25), element_st26, comment ]
def createQualifierValue(tag_st25, element_st25, value_st25, msg): currentRow_INSDQualifier_value = [ tag_st25, currentSeqId, cu.safeLength(element_st25), cu.safeLength(value_st25), cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', msg ] res.append(currentRow_INSDQualifier_value)
def _getSt25St26Lengths(self, element_st25_tag, seqIdNo, element_st25, value_st25, element_st26, comment): return [element_st25_tag, seqIdNo, cu.safeLength(element_st25), cu.safeLength(value_st25), 0 if element_st26 == '-' else cu.TAG_LENGTH_ST26[element_st26], 0 if element_st26 == '-' else cu.TAG_LENGTH_ST26[element_st26] + cu.safeLength(value_st25), element_st26, comment ]
def setRow_prio(self): res = ['prio', 0, cu.safeLength(self.seql_raw.priorities)] priority_clean = self.seql.generalInformation.priority pr_length = 0 if priority_clean: pr = priority_clean[0] pr_applNr = pr[0] pr_filingDate = pr[1] pr_length = cu.safeLength(pr_applNr) + cu.safeLength(pr_filingDate) res.append(pr_length) res.append(cu.TAG_LENGTH_ST26['EarliestPriorityApplicationIdentification'] + cu.TAG_LENGTH_ST26['IPOfficeCode'] + cu.TAG_LENGTH_ST26['ApplicationNumberText'] + cu.TAG_LENGTH_ST26['FilingDate']) res.append(pr_length + res[4]) res.append('EarliestPriorityApplicationIdentification') res.append('only first ST.25 priority retained, if any') self.generalInformationRows.append(res)
def setRow_prio(self): res = ['prio', 0, cu.safeLength(self.seql_raw.priorities)] priority_clean = self.seql.generalInformation.priority pr_length = 0 if priority_clean: pr = priority_clean[0] pr_applNr = pr[0] pr_filingDate = pr[1] pr_length = cu.safeLength(pr_applNr) + cu.safeLength(pr_filingDate) res.append(pr_length) res.append( cu.TAG_LENGTH_ST26['EarliestPriorityApplicationIdentification'] + cu.TAG_LENGTH_ST26['IPOfficeCode'] + cu.TAG_LENGTH_ST26['ApplicationNumberText'] + cu.TAG_LENGTH_ST26['FilingDate']) res.append(pr_length + res[4]) res.append('EarliestPriorityApplicationIdentification') res.append('only first ST.25 priority retained, if any') self.generalInformationRows.append(res)
def test_safeLength(self): self.assertEqual(0, cu.safeLength(None)) self.assertEqual(3, cu.safeLength('abc')) self.assertEqual(20, cu.safeLength('<400> 40\r\n\r\nMet Ser'))
def setSequenceRows(self): res = [] parsedSequences = [] for s in self.seql.generateSequence(): parsedSequences.append(s) # TODO: test if s.molType == 'PRT': self.seql.quantity_prt += 1 else: self.seql.quantity_nuc += 1 if s.mixedMode: self.seql.quantity_mix += 1 for seq in self.seql_raw.raw_sequences: currentIndex = self.seql_raw.raw_sequences.index(seq) parsedSequence = parsedSequences[currentIndex] currentSeqId = parsedSequence.seqIdNo # ====================== 210 ====================== currentRow_SequenceData = self._getSt25St26Lengths(0, currentSeqId, '-', '-', 'SequenceData', 'ST.26 specific element') res.append(currentRow_SequenceData) currentRow210 = self._getSt25St26Lengths(210, currentSeqId, seq.seqIdNo, parsedSequence.seqIdNo, 'sequenceIDNumber', '-') res.append(currentRow210) currentRow_INSDSeq = self._getSt25St26Lengths(0, currentSeqId, '-', '-', 'INSDSeq', 'ST.26 specific element') res.append(currentRow_INSDSeq) # ====================== 211 ====================== currentRow211 = self._getSt25St26Lengths(211, currentSeqId, seq.length, parsedSequence.length, 'INSDSeq_length', cu.BLANK_PLACEHOLDER) res.append(currentRow211) # ====================== 212 ====================== moltypeValue = 'AA' if parsedSequence.molType == 'PRT' else parsedSequence.molType currentRow212 = [212, currentSeqId, cu.safeLength(seq.molType), cu.safeLength(parsedSequence.molType), cu.TAG_LENGTH_ST26['INSDSeq_moltype'], cu.safeLength(moltypeValue) + cu.TAG_LENGTH_ST26['INSDSeq_moltype'], 'INSDSeq_moltype', 'PRT replaced by AA for protein raw_sequences' if moltypeValue == 'AA' else cu.BLANK_PLACEHOLDER] res.append(currentRow212) # ====================== INSDSeq_division ====================== INSDSeq_division_val = 'PAT' currentRow_INSDSeq_division = self._getSt25St26Lengths(0, currentSeqId, '-', INSDSeq_division_val, 'INSDSeq_division', 'ST.26 specific element') res.append(currentRow_INSDSeq_division) # ====================== INSDSeq_other-seqids ====================== # optional element, therefore not included in calculations # ====================== INSDSeq_feature-table ====================== currentRow_INSDSeq_feature_table = self._getSt25St26Lengths(0, currentSeqId, '-', '-', 'INSDSeq_feature-table', 'ST.26 specific element') res.append(currentRow_INSDSeq_feature_table) # ====================== 213 ====================== # create ST.26 feature source currentRow_INSDFeature = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature'], cu.TAG_LENGTH_ST26['INSDFeature'], 'INSDFeature', 'ST.26 mandatory feature source'] res.append(currentRow_INSDFeature) currentRow_INSDFeature_key = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_key'], len('source') + cu.TAG_LENGTH_ST26['INSDFeature_key'], 'INSDFeature_key', 'ST.26 mandatory feature source'] res.append(currentRow_INSDFeature_key) sourceLocation = '1..%s' % parsedSequence.length currentRow_INSDFeature_location = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_location'], len(sourceLocation) + cu.TAG_LENGTH_ST26['INSDFeature_location'], 'INSDFeature_location', 'ST.26 mandatory feature source'] res.append(currentRow_INSDFeature_location) def append_INSDFeature_quals(msg): res.append([0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_quals'], cu.TAG_LENGTH_ST26['INSDFeature_quals'], 'INSDFeature_quals', msg]) # add first the parent element INSDFeature_quals append_INSDFeature_quals('ST.26 mandatory feature source') def createQualifier(name, msg): currentRow_INSDQualifier = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier'], cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier', msg] res.append(currentRow_INSDQualifier) currentRow_INSDQualifier_name = [0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier_name'], len(name) + cu.TAG_LENGTH_ST26['INSDQualifier_name'], 'INSDQualifier_name', msg] res.append(currentRow_INSDQualifier_name) def createQualifierValue(tag_st25, element_st25, value_st25, msg): currentRow_INSDQualifier_value = [tag_st25, currentSeqId, cu.safeLength(element_st25), cu.safeLength(value_st25), cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', msg] res.append(currentRow_INSDQualifier_value) # qualifier organism createQualifier('organism', 'ST.26 mandatory qualifier organism') createQualifierValue(213, seq.organism, parsedSequence.organism, 'ST.26 mandatory qualifier organism') # qualifier mol_type mol_typeValue = 'protein' if parsedSequence.molType == 'PRT' else 'genomic DNA' createQualifier('mol_type', 'ST.26 mandatory qualifier mol_type') # createQualifierValue(0, 0, mol_typeValue, 'ST.26 mandatory qualifier mol_type') res.append([0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(mol_typeValue) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', 'ST.26 mandatory qualifier mol_type']) # end create ST.26 feature source # ====================== other features ====================== parsedFeatures = parsedSequence.features for feat in seq.features: currentFeatureIndex = seq.features.index(feat) parsedFeature = parsedFeatures[currentFeatureIndex] isSimpleFeature = False if parsedFeature.key == cu.BLANK_PLACEHOLDER and parsedFeature.location == cu.BLANK_PLACEHOLDER: isSimpleFeature = True if not isSimpleFeature: # ====================== 220 ====================== currentRow220 = self._getSt25St26Lengths(220, currentSeqId, feat.featureHeader, parsedFeature.featureHeader, 'INSDFeature', cu.BLANK_PLACEHOLDER) res.append(currentRow220) # ====================== 221 ====================== currentRow221 = self._getSt25St26Lengths(221, currentSeqId, feat.key, parsedFeature.key, 'INSDFeature_key', cu.BLANK_PLACEHOLDER) res.append(currentRow221) # ====================== add row for mixed mode translation qualifier ====================== if parsedFeature.key == 'CDS': createQualifier('translation', 'ST.26 specific element translation') translationRow = [400, currentSeqId, 0, cu.safeLength(parsedFeature.translation), cu.TAG_LENGTH_ST26['INSDQualifier_value'], (cu.TAG_LENGTH_ST26['INSDQualifier_value'] + len(cu.oneLetterCode(parsedFeature.translation))), 'INSDQualifier_value', '3-to-1 letter code'] res.append(translationRow) # ====================== 222 ====================== currentRow222 = self._getSt25St26Lengths(222, currentSeqId, feat.location, parsedFeature.location, 'INSDFeature_location', cu.BLANK_PLACEHOLDER) res.append(currentRow222) # ====================== 223 ====================== if parsedFeature.description != cu.BLANK_PLACEHOLDER: #do not add row if 223 missing! append_INSDFeature_quals('ST.26 mandatory element') createQualifier('note', cu.BLANK_PLACEHOLDER) createQualifierValue(223, feat.description, parsedFeature.description, cu.BLANK_PLACEHOLDER) # ====================== 400 ====================== if parsedSequence.molType == 'PRT': parsedResidues = parsedSequence.residues_prt currentRow400 = [400, currentSeqId, cu.safeLength(seq.residues), cu.safeLength(parsedResidues), cu.TAG_LENGTH_ST26['INSDSeq_sequence'], (cu.TAG_LENGTH_ST26['INSDSeq_sequence'] + len(cu.oneLetterCode(parsedResidues))), 'INSDSeq_sequence', '3-to-1 letter code'] else: parsedResidues = parsedSequence.residues_nuc currentRow400 = self._getSt25St26Lengths(400, currentSeqId, seq.residues, parsedResidues, 'INSDSeq_sequence', cu.BLANK_PLACEHOLDER) res.append(currentRow400) return res
def setSequenceRows(self): res = [] parsedSequences = [] for s in self.seql.generateSequence(): parsedSequences.append(s) # TODO: test if s.molType == 'PRT': self.seql.quantity_prt += 1 else: self.seql.quantity_nuc += 1 if s.mixedMode: self.seql.quantity_mix += 1 for seq in self.seql_raw.raw_sequences: currentIndex = self.seql_raw.raw_sequences.index(seq) parsedSequence = parsedSequences[currentIndex] currentSeqId = parsedSequence.seqIdNo # ====================== 210 ====================== currentRow_SequenceData = self._getSt25St26Lengths( 0, currentSeqId, '-', '-', 'SequenceData', 'ST.26 specific element') res.append(currentRow_SequenceData) currentRow210 = self._getSt25St26Lengths(210, currentSeqId, seq.seqIdNo, parsedSequence.seqIdNo, 'sequenceIDNumber', '-') res.append(currentRow210) currentRow_INSDSeq = self._getSt25St26Lengths( 0, currentSeqId, '-', '-', 'INSDSeq', 'ST.26 specific element') res.append(currentRow_INSDSeq) # ====================== 211 ====================== currentRow211 = self._getSt25St26Lengths(211, currentSeqId, seq.length, parsedSequence.length, 'INSDSeq_length', cu.BLANK_PLACEHOLDER) res.append(currentRow211) # ====================== 212 ====================== moltypeValue = 'AA' if parsedSequence.molType == 'PRT' else parsedSequence.molType currentRow212 = [ 212, currentSeqId, cu.safeLength(seq.molType), cu.safeLength(parsedSequence.molType), cu.TAG_LENGTH_ST26['INSDSeq_moltype'], cu.safeLength(moltypeValue) + cu.TAG_LENGTH_ST26['INSDSeq_moltype'], 'INSDSeq_moltype', 'PRT replaced by AA for protein raw_sequences' if moltypeValue == 'AA' else cu.BLANK_PLACEHOLDER ] res.append(currentRow212) # ====================== INSDSeq_division ====================== INSDSeq_division_val = 'PAT' currentRow_INSDSeq_division = self._getSt25St26Lengths( 0, currentSeqId, '-', INSDSeq_division_val, 'INSDSeq_division', 'ST.26 specific element') res.append(currentRow_INSDSeq_division) # ====================== INSDSeq_other-seqids ====================== # optional element, therefore not included in calculations # ====================== INSDSeq_feature-table ====================== currentRow_INSDSeq_feature_table = self._getSt25St26Lengths( 0, currentSeqId, '-', '-', 'INSDSeq_feature-table', 'ST.26 specific element') res.append(currentRow_INSDSeq_feature_table) # ====================== 213 ====================== # create ST.26 feature source currentRow_INSDFeature = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature'], cu.TAG_LENGTH_ST26['INSDFeature'], 'INSDFeature', 'ST.26 mandatory feature source' ] res.append(currentRow_INSDFeature) currentRow_INSDFeature_key = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_key'], len('source') + cu.TAG_LENGTH_ST26['INSDFeature_key'], 'INSDFeature_key', 'ST.26 mandatory feature source' ] res.append(currentRow_INSDFeature_key) sourceLocation = '1..%s' % parsedSequence.length currentRow_INSDFeature_location = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_location'], len(sourceLocation) + cu.TAG_LENGTH_ST26['INSDFeature_location'], 'INSDFeature_location', 'ST.26 mandatory feature source' ] res.append(currentRow_INSDFeature_location) def append_INSDFeature_quals(msg): res.append([ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDFeature_quals'], cu.TAG_LENGTH_ST26['INSDFeature_quals'], 'INSDFeature_quals', msg ]) # add first the parent element INSDFeature_quals append_INSDFeature_quals('ST.26 mandatory feature source') def createQualifier(name, msg): currentRow_INSDQualifier = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier'], cu.TAG_LENGTH_ST26['INSDQualifier'], 'INSDQualifier', msg ] res.append(currentRow_INSDQualifier) currentRow_INSDQualifier_name = [ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier_name'], len(name) + cu.TAG_LENGTH_ST26['INSDQualifier_name'], 'INSDQualifier_name', msg ] res.append(currentRow_INSDQualifier_name) def createQualifierValue(tag_st25, element_st25, value_st25, msg): currentRow_INSDQualifier_value = [ tag_st25, currentSeqId, cu.safeLength(element_st25), cu.safeLength(value_st25), cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(value_st25) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', msg ] res.append(currentRow_INSDQualifier_value) # qualifier organism createQualifier('organism', 'ST.26 mandatory qualifier organism') createQualifierValue(213, seq.organism, parsedSequence.organism, 'ST.26 mandatory qualifier organism') # qualifier mol_type mol_typeValue = 'protein' if parsedSequence.molType == 'PRT' else 'genomic DNA' createQualifier('mol_type', 'ST.26 mandatory qualifier mol_type') # createQualifierValue(0, 0, mol_typeValue, 'ST.26 mandatory qualifier mol_type') res.append([ 0, currentSeqId, 0, 0, cu.TAG_LENGTH_ST26['INSDQualifier_value'], cu.safeLength(mol_typeValue) + cu.TAG_LENGTH_ST26['INSDQualifier_value'], 'INSDQualifier_value', 'ST.26 mandatory qualifier mol_type' ]) # end create ST.26 feature source # ====================== other features ====================== parsedFeatures = parsedSequence.features for feat in seq.features: currentFeatureIndex = seq.features.index(feat) parsedFeature = parsedFeatures[currentFeatureIndex] isSimpleFeature = False if parsedFeature.key == cu.BLANK_PLACEHOLDER and parsedFeature.location == cu.BLANK_PLACEHOLDER: isSimpleFeature = True if not isSimpleFeature: # ====================== 220 ====================== currentRow220 = self._getSt25St26Lengths( 220, currentSeqId, feat.featureHeader, parsedFeature.featureHeader, 'INSDFeature', cu.BLANK_PLACEHOLDER) res.append(currentRow220) # ====================== 221 ====================== currentRow221 = self._getSt25St26Lengths( 221, currentSeqId, feat.key, parsedFeature.key, 'INSDFeature_key', cu.BLANK_PLACEHOLDER) res.append(currentRow221) # ====================== add row for mixed mode translation qualifier ====================== if parsedFeature.key == 'CDS': createQualifier('translation', 'ST.26 specific element translation') translationRow = [ 400, currentSeqId, 0, cu.safeLength(parsedFeature.translation), cu.TAG_LENGTH_ST26['INSDQualifier_value'], (cu.TAG_LENGTH_ST26['INSDQualifier_value'] + len(cu.oneLetterCode(parsedFeature.translation))), 'INSDQualifier_value', '3-to-1 letter code' ] res.append(translationRow) # ====================== 222 ====================== currentRow222 = self._getSt25St26Lengths( 222, currentSeqId, feat.location, parsedFeature.location, 'INSDFeature_location', cu.BLANK_PLACEHOLDER) res.append(currentRow222) # ====================== 223 ====================== if parsedFeature.description != cu.BLANK_PLACEHOLDER: #do not add row if 223 missing! append_INSDFeature_quals('ST.26 mandatory element') createQualifier('note', cu.BLANK_PLACEHOLDER) createQualifierValue(223, feat.description, parsedFeature.description, cu.BLANK_PLACEHOLDER) # ====================== 400 ====================== if parsedSequence.molType == 'PRT': parsedResidues = parsedSequence.residues_prt currentRow400 = [ 400, currentSeqId, cu.safeLength(seq.residues), cu.safeLength(parsedResidues), cu.TAG_LENGTH_ST26['INSDSeq_sequence'], (cu.TAG_LENGTH_ST26['INSDSeq_sequence'] + len(cu.oneLetterCode(parsedResidues))), 'INSDSeq_sequence', '3-to-1 letter code' ] else: parsedResidues = parsedSequence.residues_nuc currentRow400 = self._getSt25St26Lengths( 400, currentSeqId, seq.residues, parsedResidues, 'INSDSeq_sequence', cu.BLANK_PLACEHOLDER) res.append(currentRow400) return res