def t_write_format(): record_parser = GenBank.RecordParser(debug_level=0) for file in write_format_files: print("Testing GenBank writing for %s..." % os.path.basename(file)) cur_handle = open(os.path.join("GenBank", file), "r") compare_handle = open(os.path.join("GenBank", file), "r") iterator = GenBank.Iterator(cur_handle, record_parser) compare_iterator = GenBank.Iterator(compare_handle) while True: cur_record = next(iterator) compare_record = next(compare_iterator) if cur_record is None or compare_record is None: break print("\tTesting for %s" % cur_record.version) output_record = str(cur_record) + "\n" do_comparison(compare_record, output_record) cur_handle.close() compare_handle.close()
def write_format(file): """Write a GenBank record from a Genbank file and compare them.""" record_parser = GenBank.RecordParser(debug_level=2) print("Testing GenBank writing for %s..." % os.path.basename(file)) # be able to handle gzipped files if ".gz" in file: cur_handle = gzip.open(file, "r") compare_handle = gzip.open(file, "r") else: cur_handle = open(file, "r") compare_handle = open(file, "r") iterator = GenBank.Iterator(cur_handle, record_parser) compare_iterator = GenBank.Iterator(compare_handle) while True: cur_record = next(iterator) compare_record = next(compare_iterator) if cur_record is None or compare_record is None: break # print("\tTesting for %s" % cur_record.version) output_record = str(cur_record) + "\n" try: do_comparison(compare_record, output_record) except AssertionError as msg: print("\tTesting for %s" % cur_record.version) print(msg) cur_handle.close() compare_handle.close()
def write_format(file): record_parser = GenBank.RecordParser(debug_level = 2) print "Testing GenBank writing for %s..." % os.path.basename(file) # be able to handle gzipped files if '.gz' in file: cur_handle = gzip.open(file, "r") compare_handle = gzip.open(file, "r") else: cur_handle = open(file, "r") compare_handle = open(file, "r") iterator = GenBank.Iterator(cur_handle, record_parser) compare_iterator = GenBank.Iterator(compare_handle) while 1: cur_record = iterator.next() compare_record = compare_iterator.next() if cur_record is None or compare_record is None: break # print "\tTesting for %s" % cur_record.version output_record = str(cur_record) + "\n" try: do_comparison(compare_record, output_record) except AssertionError, msg: print "\tTesting for %s" % cur_record.version print msg
def setUp(self): # create TESTDB create_database() # load the database db_name = "biosql-test" server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) # remove the database if it already exists try: server[db_name] server.remove_database(db_name) except KeyError: pass self.db = server.new_database(db_name) # get the GenBank file we are going to put into it input_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb") handle = open(input_file, "r") parser = GenBank.FeatureParser() self.iterator = GenBank.Iterator(handle, parser)
def loadData(self, data, dbtype): if (dbtype == "GenBank"): # get the GenBank file we are going to put into it parser = GenBank.FeatureParser() iterator = GenBank.Iterator(data, parser) # finally put it in the database try: self.getDatabase().load(iterator) except: self.getBioSQLRoot().getDBServer().adaptor.conn.rollback() return traceback.format_exc() self.getBioSQLRoot().getDBServer().adaptor.conn.commit() return "" else: raise "Unknown dbtype: %r" % (dbtype)
def t_cleaning_features(): """Test the ability to clean up feature values.""" gb_parser = GenBank.FeatureParser( feature_cleaner=utils.FeatureValueCleaner()) handle = open(os.path.join("GenBank", "arab1.gb")) iterator = GenBank.Iterator(handle, gb_parser) first_record = next(iterator) # test for cleaning of translation translation_feature = first_record.features[1] test_trans = translation_feature.qualifiers["translation"][0] assert " " not in test_trans, "Did not clean spaces out of the translation" assert "\012" not in test_trans, "Did not clean newlines out of the translation" handle.close()
def t_cleaning_features(): """Test the ability to clean up feature values. """ parser = GenBank.FeatureParser(feature_cleaner = \ utils.FeatureValueCleaner()) handle = open(os.path.join("GenBank", "arab1.gb")) iterator = GenBank.Iterator(handle, parser) first_record = iterator.next() # test for cleaning of translation translation_feature = first_record.features[1] test_trans = translation_feature.qualifiers["translation"][0] assert test_trans.find(" ") == -1, \ "Did not clean spaces out of the translation" assert test_trans.find("\012") == -1, \ "Did not clean newlines out of the translation"
def load_database(gb_handle): """Load a GenBank file into a BioSQL database. This is useful for running tests against a newly created database. """ create_database() # now open a connection to load the database db_name = "biosql-test" server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) db = server.new_database(db_name) # get the GenBank file we are going to put into it parser = GenBank.FeatureParser() iterator = GenBank.Iterator(gb_handle, parser) # finally put it in the database db.load(iterator) server.adaptor.conn.commit() server.adaptor.conn.close()
# files_to_parse += [os.path.join(os.getcwd(), 'GenBank', 'biojava_test.gb')] # test the parsers feature_parser = GenBank.FeatureParser(debug_level=0) record_parser = GenBank.RecordParser(debug_level=0) all_parsers = [feature_parser, record_parser] print("Testing parsers...") for parser in all_parsers: for filename in files_to_parse: if not os.path.isfile(filename): print("Missing test input file: %s" % filename) continue handle = open(filename, 'r') iterator = GenBank.Iterator(handle, parser) while True: with warnings.catch_warnings(): warnings.simplefilter("ignore", BiopythonParserWarning) # e.g. BiopythonParserWarning: Premature end of file in sequence data cur_record = next(iterator) if cur_record is None: break if isinstance(parser, GenBank.FeatureParser): print("***Record from %s with the FeatureParser" % filename.split(os.path.sep)[-1]) print("Seq: %r" % cur_record.seq) print("Id: %s" % cur_record.id)
import sys # GenBank from Bio import GenBank verbose = 0 if len(sys.argv) != 2: print("Usage ./find_parser_problems <GenBank file to parse>") sys.exit() feature_parser = GenBank.FeatureParser(debug_level=0) parser = GenBank.ErrorParser(feature_parser) handle = open(sys.argv[1], 'r') iterator = GenBank.Iterator(handle, parser, has_header=1) while 1: have_record = 0 while have_record == 0: try: cur_record = iterator.next() have_record = 1 except GenBank.ParserFailureError as msg: print "Parsing Problem:", msg sys.exit() if cur_record is None: break
#!/usr/bin/env python from Bio import GenBank from Bio import Entrez from BioSQL import BioSeqDatabase import sys # Should read these from settings at some point dbpath = 'biosql.sqlite3' dbname = 'local_db' Entrez.email = '*****@*****.**' server = BioSeqDatabase.open_database(driver='sqlite3', db=dbpath) db = server[dbname] parser = GenBank.FeatureParser() loadgb = lambda _id: db.load( GenBank.Iterator( Entrez.efetch(db='nucleotide', id=_id, rettype='gb', retmode='text'), parser)) ACCESSIONS_FILE = 'accession.lst' if len(sys.argv) < 2 else sys.argv[1] for id in open(ACCESSIONS_FILE): print "Loading %s" % id loadgb(id) server.adaptor.commit()
"Leptocephalus sp. 'type II larva' (Smith, 1989)": "Leptocephalus sp. 'type II larva'", "Humphaplotropis culaishanensis": "Humphaplotropis culaishanensis (nomen nudum)", "Paraglypturus tonganus": "Paraglypturus tonganus (nomen nudum)", "Hoploplana elisabelloi": "Hoploplana elisabelloi (nomen nudum)", "Palpitomonas bilix Eukaryota.": "Palpitomonas bilix", "Eukaryota sp. BB2 Eukaryota.": "Eukaryota sp. BB2", "Ancoracysta twista Eukaryota.": "Ancoracysta twista" } # --- load a parser and iterator for our GenBank file gb_handle = gzip.open(sys.argv[1], "r") # -- a parser that will give you back SeqFeature objects feature_parser = GenBank.FeatureParser() iterator = GenBank.Iterator(gb_handle, feature_parser) # load taxonomy for taxids ncbi = NCBITaxa() # output using prefix out_1 = open("%s.cdna.fasta" % sys.argv[2], "w") out_2 = open("%s.codons.tab" % sys.argv[2], "w") strands = [] stop_codons = [] prot_ids = [] excluded = [] missing_id = [] # begin iterating through the file and getting GenBank records
def loadDB(catalog): from BioSQL import BioSeqDatabase import sys username = raw_input("Please enter user name: ") password = raw_input("and password: "******"dbpg-ifi-utv.uio.no" db_name = "rnammer" server = BioSeqDatabase.open_database(driver="psycopg2", user=username,passwd=password, host=host, db=db_name) biodb_name = "empty" # genebank problem ? se staving db = "nodb" gi_rep = 1 for gbff in catalog: #server.remove_database(source) print gi_rep print gbff parser = GenBank.FeatureParser() #record = parser.parse(open(gbff)) #records = SeqIO.parse(open(gbff),'genbank') records = GenBank.Iterator(open(gbff), parser) for x in records: if re.search("plasmid",x.description, re.IGNORECASE): continue print "Record name:" print x.id #print dir(x) if "Proteobacteria" == x.annotations["taxonomy"][1]: print x.annotations["taxonomy"][1] print x.annotations["taxonomy"][2] biodb_name = x.annotations["taxonomy"][2] else : print x.annotations["taxonomy"][1] biodb_name = x.annotations["taxonomy"][1] while True : try : db = server[biodb_name] #print "here" break except KeyError : #print ("Cannot find biodatabase with name %r making it" % source) server.new_database(biodb_name) server.commit() db.load([x]) #record.annotations["gi"] = gi_rep #print type(records) #print record.id gi_rep = gi_rep + 1 #db.load([records]) server.adaptor.commit()
# -*- coding: utf-8 -*- """ Created on Wed Aug 17 15:02:37 2016 @author: rgilmore """ import sqlite3 from BioSQL import BioSeqDatabase from Bio import GenBank import os server = BioSeqDatabase.open_database(driver="sqlite3", db="biosql.db") db = server.new_database("HTR1A") dir_list1 = os.listdir() print(dir_list1) #for files in dir_list1: parser = GenBank.FeatureParser() iterator = GenBank.Iterator(open("HTR1A_Ailuropoda melanoleuca.gbk"), parser) db.load(iterator) db.adaptor.commit() #input("%s loaded into HTR1A database. Proceed?") server.commit() server.close()