Пример #1
0
 def getProteinGenbankDescription(self,directory,proteinID):
     fname = "%s/%s.gbk"%(directory,proteinID)
     seq_record = SeqIO.parse(open(fname), "genbank").next()
     note = ""
     for feature in seq_record.features: 
         try:
             if "note" in feature.qualifiers:
                 note+= text.formatText(feature.qualifiers["note"][0])
             if "function" in feature.qualifiers:
                 note+= text.formatText(feature.qualifiers["function"][0])
             if "product" in feature.qualifiers:
                 note+= text.formatText(feature.qualifiers["product"][0])    
         except KeyError as k:
             continue
     return note 
Пример #2
0
 def buildProteinTable(self):
     for genbank_file in self.genbank_files:
         seq_record = SeqIO.parse(open(genbank_file), "genbank").next()
         for feature in seq_record.features:
             try:
                 proteinID = feature.qualifiers["protein_id"][0]
                 note = ""
                 if "note" in feature.qualifiers:
                     note+= text.formatText(feature.qualifiers["note"][0])
                 if "function" in feature.qualifiers:
                     note+= text.formatText(feature.qualifiers["function"][0])
                 if "product" in feature.qualifiers:
                     note+= text.formatText(feature.qualifiers["product"][0])
                 self.proteinDict[proteinID] = note
             except KeyError as k:
                 continue
Пример #3
0
def insertGenbankProteins(genbank_files,dbout):
    db = sqlite3.connect(dbout)
    cursor = db.cursor()
    
    for genbank_file in genbank_files:
        seq_record = SeqIO.parse(open(genbank_file), "genbank").next()
        for feature in seq_record.features:
            try:
                proteinID = feature.qualifiers["protein_id"][0]
                note = ""
                if "note" in feature.qualifiers:
                    note+= text.formatText(feature.qualifiers["note"][0])
                if "function" in feature.qualifiers:
                    note+= text.formatText(feature.qualifiers["function"][0])
                if "product" in feature.qualifiers:
                    note+= text.formatText(feature.qualifiers["product"][0])
                 
                cursor.execute('''INSERT INTO protein_text(protein_id,note)
                                    VALUES(?,?)''',(proteinID,note))
            except KeyError as k:
                continue
        db.commit()
    db.close()
Пример #4
0
 def filterWords(self, words):
     # Remove all single letter words
     return [text.formatText(w) for w in words if len(w) > 1]