def check_config(dbdriver, dbtype, dbhost, dbuser, dbpasswd, testdb): global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA, SQL_FILE DBDRIVER = dbdriver DBTYPE = dbtype DBHOST = dbhost DBUSER = dbuser DBPASSWD = dbpasswd TESTDB = testdb #Check the database driver is installed: try: __import__(DBDRIVER) except ImportError: message = "Install %s if you want to use %s with BioSQL " % (DBDRIVER, DBTYPE) raise MissingExternalDependencyError(message) try: if DBDRIVER in ["sqlite3"]: server = BioSeqDatabase.open_database(driver = DBDRIVER, db = TESTDB) else: server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST) server.close() del server except Exception, e: message = "Connection failed, check settings if you plan to use BioSQL: %s" % str(e) raise MissingExternalDependencyError(message)
def check_config(dbdriver, dbtype, dbhost, dbuser, dbpasswd, testdb): """Verify the database settings work for connecting.""" global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA global SYSTEM, SQL_FILE DBDRIVER = dbdriver DBTYPE = dbtype DBHOST = dbhost DBUSER = dbuser DBPASSWD = dbpasswd TESTDB = testdb if not DBDRIVER or not DBTYPE or not DBUSER: # No point going any further... raise MissingExternalDependencyError("Incomplete BioSQL test settings") # Check the database driver is installed: if SYSTEM == "Java": try: if DBDRIVER in ["MySQLdb"]: import com.mysql.jdbc.Driver elif DBDRIVER in ["psycopg2", "pgdb"]: import org.postgresql.Driver except ImportError: message = "Install the JDBC driver for %s to use BioSQL " % DBTYPE raise MissingExternalDependencyError(message) else: try: __import__(DBDRIVER) except ImportError: if DBDRIVER in ["MySQLdb"]: message = "Install MySQLdb or mysqlclient if you want to use %s with BioSQL " % (DBTYPE) else: message = "Install %s if you want to use %s with BioSQL " % (DBDRIVER, DBTYPE) raise MissingExternalDependencyError(message) try: if DBDRIVER in ["sqlite3"]: server = BioSeqDatabase.open_database(driver=DBDRIVER, db=TESTDB) else: server = BioSeqDatabase.open_database(driver=DBDRIVER, host=DBHOST, user=DBUSER, passwd=DBPASSWD) server.close() del server except Exception as e: message = "Connection failed, check settings if you plan to use BioSQL: %s" % e raise MissingExternalDependencyError(message) DBSCHEMA = "biosqldb-" + DBTYPE + ".sql" SQL_FILE = os.path.join(os.getcwd(), "BioSQL", DBSCHEMA) if not os.path.isfile(SQL_FILE): message = "Missing SQL schema file: %s" % SQL_FILE raise MissingExternalDependencyError(message)
def _do_db_cleanup(): """Cleanup everything from TESTDB. Relevant for MySQL and PostgreSQL. """ if DBDRIVER in ["psycopg2", "pgdb"]: # first open a connection the database # notice that postgres doesn't have createdb privileges, so # the TESTDB must exist server = BioSeqDatabase.open_database(driver=DBDRIVER, host=DBHOST, user=DBUSER, passwd=DBPASSWD, db=TESTDB) # The pgdb postgres driver does not support autocommit, so here we # commit the current transaction so that 'drop database' query will # be outside a transaction block server.adaptor.cursor.execute("COMMIT") # drop anything in the database # with Postgres, can get errors about database still being used. # Wait briefly to be sure previous tests are done with it. time.sleep(1) # drop anything in the database sql = r"DROP OWNED BY " + DBUSER server.adaptor.cursor.execute(sql, ()) server.close() else: # first open a connection to create the database server = BioSeqDatabase.open_database(driver=DBDRIVER, host=DBHOST, user=DBUSER, passwd=DBPASSWD) # Auto-commit try: server.adaptor.autocommit() except AttributeError: pass # drop the database try: sql = r"DROP DATABASE " + TESTDB server.adaptor.cursor.execute(sql, ()) except (server.module.OperationalError, server.module.Error, server.module.DatabaseError) as e: # the database doesn't exist pass except (server.module.IntegrityError, server.module.ProgrammingError) as e: # ditto--perhaps if str(e).find('database "%s" does not exist' % TESTDB) == -1: server.close() raise # create a new database sql = r"CREATE DATABASE " + TESTDB server.adaptor.execute(sql, ()) server.close()
def loop(self, filename, format): original_records = list(SeqIO.parse(open(filename, "rU"), format)) # now open a connection to load the database server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST, db = TESTDB) db_name = "test_loop_%s" % filename # new namespace! db = server.new_database(db_name) count = db.load(original_records) self.assertEqual(count, len(original_records)) server.commit() #Now read them back... biosql_records = [db.lookup(name=rec.name) for rec in original_records] #And check they agree self.assertTrue(compare_records(original_records, biosql_records)) #Now write to a handle... handle = StringIO() SeqIO.write(biosql_records, handle, "gb") #Now read them back... handle.seek(0) new_records = list(SeqIO.parse(handle, "gb")) #And check they still agree self.assertEqual(len(new_records), len(original_records)) for old, new in zip(original_records, new_records): #TODO - remove this hack because we don't yet write these (yet): for key in ["comment", "references", "db_source"]: if key in old.annotations and key not in new.annotations: del old.annotations[key] self.assertTrue(compare_record(old, new)) #Done server.close()
def create_database(): """Create an empty BioSQL database.""" # first open a connection to create the database server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST) # Auto-commit: postgresql cannot drop database in a transaction try: server.adaptor.autocommit() except AttributeError: pass # drop anything in the database try: # with Postgres, can get errors about database still being used and # not able to be dropped. Wait briefly to be sure previous tests are # done with it. import time time.sleep(1) sql = r"DROP DATABASE " + TESTDB server.adaptor.cursor.execute(sql, ()) except server.module.OperationalError: # the database doesn't exist pass except (server.module.IntegrityError, server.module.ProgrammingError), e: # ditto--perhaps if str(e).find('database "%s" does not exist' % TESTDB) == -1 : raise
def _do_db_create(): """Do the actual work of database creation. Relevant for MySQL and PostgreSQL """ # first open a connection to create the database server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST) if DBDRIVER == "pgdb": # The pgdb postgres driver does not support autocommit, so here we # commit the current transaction so that 'drop database' query will # be outside a transaction block server.adaptor.cursor.execute("COMMIT") else: # Auto-commit: postgresql cannot drop database in a transaction try: server.adaptor.autocommit() except AttributeError: pass # drop anything in the database try: # with Postgres, can get errors about database still being used and # not able to be dropped. Wait briefly to be sure previous tests are # done with it. import time time.sleep(1) sql = r"DROP DATABASE " + TESTDB server.adaptor.cursor.execute(sql, ()) except (server.module.OperationalError, server.module.Error, server.module.DatabaseError), e: # the database doesn't exist pass
def load_multi_database(gb_filename_or_handle, gb_filename_or_handle2): """Load two GenBank files into a new BioSQL database as different subdatabases. This is useful for running tests against a newly created database. """ TESTDB = create_database() # now open a connection to load the database db_name = "biosql-test" db_name2 = "biosql-test2" server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) db = server.new_database(db_name) # get the GenBank file we are going to put into it iterator = SeqIO.parse(gb_filename_or_handle, "gb") count = db.load(iterator) db = server.new_database(db_name2) # get the GenBank file we are going to put into it iterator = SeqIO.parse(gb_filename_or_handle2, "gb") # finally put it in the database count2 = db.load(iterator) server.commit() server.close() return count + count2
def main(gbfile, length=10000): driver = "MySQLdb" user = "******" passwd = "" host = "localhost" dbname = "bioseqdb" print "Parsing Genbank file sequence file...." with open(gbfile) as gb_handle: records = list(SeqIO.parse(gb_handle, "genbank")) print "Sorting by size and name......." longrecords = [record for record in records if len(record) > length] longrecords.sort(key=lambda x: x.name) #sort by name print "Writing to BioSQL database..." server = BioSeqDatabase.open_database(driver=driver, user=user, passwd=passwd, host=host, db=dbname) try: if biodb_name not in server.keys(): server.new_database(biodb_name) else: server.remove_database(biodb_name) server.adaptor.commit() server.new_databse(biodb_name) db = server[biodb_name] db.load(longrecords) server.adaptor.commit() except: server.adaptor.rollback() raide
def setUp(self): global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA global SYSTEM, SQL_FILE Entrez.email = "*****@*****.**" # create TESTDB TESTDB = create_database() # load the database db_name = "biosql-test" self.server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) # remove the database if it already exists try: self.server[db_name] self.server.remove_database(db_name) except KeyError: pass self.db = self.server.new_database(db_name) # get the GenBank file we are going to put into it self.iterator = SeqIO.parse("GenBank/cor6_6.gb", "gb")
def trans(self, filename, format): original_records = list(SeqIO.parse(open(filename, "rU"), format)) # now open a connection to load the database server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST, db = TESTDB) db_name = "test_trans1_%s" % filename # new namespace! db = server.new_database(db_name) count = db.load(original_records) self.assertEqual(count, len(original_records)) server.commit() #Now read them back... biosql_records = [db.lookup(name=rec.name) for rec in original_records] #And check they agree self.assertTrue(compare_records(original_records, biosql_records)) #Now write to a second name space... db_name = "test_trans2_%s" % filename # new namespace! db = server.new_database(db_name) count = db.load(biosql_records) self.assertEqual(count, len(original_records)) #Now read them back again, biosql_records2 = [db.lookup(name=rec.name) for rec in original_records] #And check they also agree self.assertTrue(compare_records(original_records, biosql_records2)) #Done server.close()
def test_add_from_gff_with_taxonomy(self): """Add in sequences from a gff + fasta file given taxonomy.""" gff = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.gff') fasta = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.fna') runner = CliRunner() result = runner.invoke(cli.main, self.common_params + ['-t', '-T', 511145, '-g', gff, '-f', fasta, '-D', 'test']) self.assertEqual(result.exit_code, 0) server = BioSeqDatabase.open_database(driver = self.dbdriver, user = self.dbuser, passwd = self.dbpassword, host = self.dbhost, db = self.dbname) rows = server.adaptor.execute_and_fetchall("SELECT name FROM taxon_name where name_class = 'scientific name'") dbnames = set([x[0] for x in rows]) names = set(['cellular organisms', 'Bacteria', 'Proteobacteria', 'Gammaproteobacteria', 'Enterobacterales', 'Enterobacteriaceae', 'Escherichia', 'Escherichia coli', 'Escherichia coli K-12', 'Escherichia coli str. K-12 substr. MG1655']) self.assertCountEqual(dbnames, names) server.close()
def main(args): server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password) if args.database_name not in server.keys(): server.new_database(args.database_name) db = server[args.database_name] gen = [] if args.fasta is not None: for rec in SeqIO.parse(args.fasta, 'fasta'): gen.append(rec.name) elif args.genbank is not None: for rec in SeqIO.parse(args.genbank, 'genbank'): gen.append(rec.name) elif args.input is not None: with open(args.input) as fp: for line in fp: gen.append(line.rstrip()) if args.remove: taxon_id = None else: taxon_id = add_new_taxonomy(server, args.new_taxons, args.taxid) for rec in gen: server.adaptor.execute('update bioentry set taxon_id = %s where bioentry_id = %s',(taxon_id, db.adaptor.fetch_seqid_by_display_id(db.dbid, rec))) server.commit()
def main(args): server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password) if args.database_name not in server.keys(): server.new_database(args.database_name) db = server[args.database_name] try: if args.gff is not None and args.fasta is not None: load_gff(db, args.gff, args.fasta, args.tax_lookup, args.taxid) elif args.genbank is not None: load_genbank(db, args.genbank, args.tax_lookup, args.taxid) except: server.adaptor.rollback() raise if args.new_taxons: taxon_id = add_new_taxonomy(server, args.new_taxons, args.taxid) if args.fasta is not None: gen = SeqIO.parse(args.fasta, 'fasta') elif args.genbank is not None: gen = SeqIO.parse(args.genbank, 'genbank') for rec in gen: server.adaptor.execute('update bioentry set taxon_id = %s where bioentry_id = %s',(taxon_id, db.adaptor.fetch_seqid_by_display_id(db.dbid, rec.name))) server.commit()
def create_database(): """Delete any existing BioSQL test database, then (re)create an empty BioSQL database.""" if DBDRIVER in ["sqlite3"]: global TESTDB if os.path.exists(TESTDB): try: os.remove(TESTDB) except: time.sleep(1) try: os.remove(TESTDB) except: # Seen this with PyPy 2.1 (and older) on Windows - # which suggests an open handle still exists? print("Could not remove %r" % TESTDB) pass # Now pick a new filename - just in case there is a stale handle # (which might be happening under Windows...) TESTDB = temp_db_filename() else: _do_db_create() # now open a connection to load the database server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) try: server.load_database_sql(SQL_FILE) server.commit() server.close() except: # Failed, but must close the handle... server.close() raise
def get_database(): """Perform a connection with the database. XXX The info here shouldn't be hard coded and should be specified on the commandline. """ server = BioSeqDatabase.open_database(host="192.168.0.192", user="******", passwd="", db="biosql_new") return server["embl_rod"]
def main(args): server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password) sfids = [] with open(args.input) as fp: for line in fp: sfids.append(line.rstrip()) print_feature_qv_csv(server, sfids)
def setUp(self): gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb") load_database(gb_file) self.server = BioSeqDatabase.open_database( driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB ) self.db = self.server["biosql-test"]
def gbk_upload(self): """ Upload the BioSQL database with genbank data. """ t_count = 0 os.chdir(self.path) print(os.getcwd()) if os.path.isdir(self.path + '/Databases') is False: os.mkdir('Databases') for tier in os.listdir(os.getcwd()): if tier == 'Databases': continue db_name = str(tier) + '.db' if os.path.isfile(self.path + '/Databases/' + db_name) is False: print('Copying Template BioSQL Database... ' 'This may take a few minutes...') shutil.copy2(where.Templates + '/Template_BioSQL_DB.db', self.path + '/Databases/%s' % db_name) else: os.remove(self.path + '/Databases/' + db_name) print('Copying Template BioSQL Database... ' 'This may take a few minutes...') shutil.copy2(where.Templates + '/Template_BioSQL_DB.db', self.path + '/Databases/%s' % db_name) server = BioSeqDatabase.open_database(driver='sqlite3', db=(self.path + '/Databases/' + db_name)) os.chdir(tier) for gene in os.listdir(os.getcwd()): os.chdir(gene) sub_db_name = gene for file in os.listdir(os.getcwd()): try: if sub_db_name not in server.keys(): server.new_database(sub_db_name) db = server[sub_db_name] count = db.load(SeqIO.parse(file, 'genbank')) server.commit() print('Server Commited %s' % sub_db_name) print('%s database loaded with %s.' % (db.dbid, file)) print("That file contains %s genbank records." % str(count)) t_count = t_count + count print( 'The total number of files loaded so far is %i.' % t_count) except BaseException: server.rollback() try: del server[sub_db_name] server.commit() except BaseException: raise raise os.chdir('..') os.chdir('..')
def setUp(self): """Connect to and load up the database.""" load_database("GenBank/cor6_6.gb") self.server = BioSeqDatabase.open_database( driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB ) self.db = self.server["biosql-test"]
def setUp(self): """Load a database.""" load_database("GenBank/cor6_6.gb") self.server = BioSeqDatabase.open_database( driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB ) self.db = self.server["biosql-test"] self.item = self.db.lookup(accession="X62281")
def setUp(self): #drop any old database and create a new one: create_database() #connect to new database: self.server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST, db = TESTDB) #Create new namespace within new empty database: self.db = self.server.new_database("biosql-test")
def main(args): server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password) seqfeature_ids = get_seqfeature_ids_from_qv(server, args.qualifier, args.value, args.database_name) if args.output_format == 'feat-prot': extract_feature_sql(server, seqfeature_ids, type=['CDS'], translate=True ) elif args.output_format == 'feat-nucl': extract_feature_sql(server, seqfeature_ids )
def setUp(self): # drop any old database and create a new one: TESTDB = create_database() # connect to new database: self.server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) # Create new namespace within new empty database: self.db = self.server.new_database("biosql-test")
def setUp(self): """Connect to the database.""" db_name = "biosql-test-seqio" server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) self.server = server if db_name not in server: self.db = server.new_database(db_name) server.commit() self.db = self.server[db_name]
def setUp(self): """Load a database.""" load_database("GenBank/cor6_6.gb") self.server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) self.db = self.server["biosql-test"] self.item = self.db.lookup(accession="X62281")
def test_003_loader2(self): start = time.time() graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass, bolt=False) #if conn(): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb) seqann = BioSeqAnn(server=server, verbose=False) pickle_file1 = "unique_db-feats.pickle" pickle_file2 = "feature-service.pickle" pickle_gfe2feat = "gfe2feat.pickle" pickle_file3 = "gfe2hla.pickle" pickle_file4 = "seq2hla.pickle" with open(pickle_gfe2feat, 'rb') as handle1: gfe_feats = pickle.load(handle1) with open(pickle_file1, 'rb') as handle1: feats = pickle.load(handle1) with open(pickle_file2, 'rb') as handle2: cached_feats = pickle.load(handle2) with open(pickle_file3, 'rb') as handle3: gfe2hla = pickle.load(handle3) with open(pickle_file4, 'rb') as handle: seq2hla = pickle.load(handle) pygfe = pyGFE(graph=graph, seqann=seqann, load_features=False, verbose=False, features=feats, seq2hla=seq2hla, gfe2hla=gfe2hla, gfe_feats=gfe_feats, cached_features=cached_feats, loci=["HLA-A"]) self.assertIsInstance(pygfe, pyGFE) seqs = list(SeqIO.parse(self.data_dir + "/known_A.fasta", "fasta")) typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.20.0") typing2 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0") end = time.time() time_taken = end - start print("TIME TAKEN: " + str(time_taken)) self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01') self.assertEqual(typing2.status, "documented") self.assertIsInstance(typing2, Typing) self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01') self.assertEqual(typing1.status, "documented") self.assertIsInstance(typing1, Typing) pass
def setUp(self): # drop any old database and create a new one: testdb, dbdriver, dbuser, dbpassword, dbhost = connection_parameters(create=True) # connect to new database: self.server = BioSeqDatabase.open_database(driver=dbdriver, user=dbuser, passwd=dbpassword, host=dbhost, db=testdb) self._create_taxonomy() self.taxon_tree = TaxonTree(self.server.adaptor) self.testdb = testdb
def setUp(self): gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb") load_database(gb_file) self.server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB) self.db = self.server["biosql-test"]
def setUp(self): """Connect to and load up the database. """ load_database("GenBank/cor6_6.gb") self.server = BioSeqDatabase.open_database( driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB ) self.db = self.server["biosql-test"]
def check_config(dbdriver, dbtype, dbhost, dbuser, dbpasswd, testdb): global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA global SYSTEM, SQL_FILE DBDRIVER = dbdriver DBTYPE = dbtype DBHOST = dbhost DBUSER = dbuser DBPASSWD = dbpasswd TESTDB = testdb #Check the database driver is installed: if SYSTEM == "Java": try: if DBDRIVER in ["MySQLdb"]: import com.mysql.jdbc.Driver elif DBDRIVER in ["psycopg2"]: import org.postgresql.Driver except ImportError: message = "Install the JDBC driver for %s to use BioSQL " % DBTYPE raise MissingExternalDependencyError(message) else: try: __import__(DBDRIVER) except ImportError: message = "Install %s if you want to use %s with BioSQL " % ( DBDRIVER, DBTYPE) raise MissingExternalDependencyError(message) try: if DBDRIVER in ["sqlite3"]: server = BioSeqDatabase.open_database(driver=DBDRIVER, db=TESTDB) else: server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST) server.close() del server except Exception, e: message = "Connection failed, check settings if you plan to use BioSQL: %s" % str( e) raise MissingExternalDependencyError(message)
def load_db(params): sqlpsw = os.environ['SQLPSW'] db_type = params["chlamdb.db_type"] db_name = params["chlamdb.db_name"] if db_type != "sqlite": server = BioSeqDatabase.open_database(driver="MySQLdb", user="******", passwd=sqlpsw, host="127.0.0.1", db=db_name, charset='utf8', use_unicode=True) else: server = BioSeqDatabase.open_database(driver="sqlite3", user="******", passwd=sqlpsw, host="127.0.0.1", db=f"{db_name}") return DB(server, db_name)
def get_gbk_file(self, accession, gene, organism, server_flag=None): """Search a GenBank database for a target accession number. This function searches through the given NCBI databases (created by uploading NCBI refseq .gbff files to a BioPython BioSQL database) and creates single GenBank files. This function can be used after a blast or on its own. If used on it's own then the NCBI .db files must be manually moved to the proper directories. :param accession: Accession number of interest without the version. :param gene: Target gene of the accession number parameter. :param organism: Target organism of the accession number parameter. :param server_flag: (Default value = None) :return: """ gene_path = self.raw_data / Path(gene) / Path('GENBANK') Path.mkdir(gene_path, parents=True, exist_ok=True) # Parse each database to find the proper GenBank record for FILE in self.db_files_list: db_file_path = self.ncbi_db_repo / Path(FILE) # Stop searching if the GenBank record has been created. if server_flag is True: break server = BioSeqDatabase.open_database(driver='sqlite3', db=str(db_file_path)) # Parse the sub-databases for SUB_DB_NAME in server.keys(): db = server[SUB_DB_NAME] try: record = db.lookup(accession=accession) gbk_file = '%s_%s.gbk' % (gene, organism) gbk_file_path = gene_path / Path(gbk_file) with open(gbk_file_path, 'w') as GB_file: GB_file.write(record.format('genbank')) self.genbanklog.info(GB_file.name, 'created') # Make sure we have the correct GenBank file. self.gbk_quality_control(gbk_file_path, gene, organism) # Stop searching if the GenBank record has been created. server_flag = True break except IndexError: self.genbanklog.critical( 'Index Error in %s. Moving to the next database...' % SUB_DB_NAME) continue # If the file has not been created after searching, then raise an error if server_flag is not True: self.genbanklog.critical( "The GenBank file was not created for %s (%s, %s)." % (accession, gene, organism)) raise FileNotFoundError
def setUp(self): """Connect to the database.""" db_name = "biosql-test-seqio" server = BioSeqDatabase.open_database( driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB ) self.server = server if db_name not in server: self.db = server.new_database(db_name) server.commit() self.db = self.server[db_name]
def open(self, driver="pgdb", **kwargs): """ Opens a connection to a relational database. """ self.driver = driver self.dbargs = kwargs self.server = BioSeqDatabase.open_database(driver=driver, **kwargs) self.conn = self.server.adaptor.conn self.crs = self.server.adaptor.cursor if hasattr(self.crs, 'copy_from'): self.has_copy = True
def test_backwards_compatibility(self): """Check can re-use an old BioSQL SQLite3 database.""" original_records = list(SeqIO.parse("GenBank/cor6_6.gb", "gb")) # now open a connection to load the database server = BioSeqDatabase.open_database(driver=DBDRIVER, db="BioSQL/cor6_6.db") db = server["OLD"] self.assertEqual(len(db), len(original_records)) #Now read them back... biosql_records = [db.lookup(name=rec.name) for rec in original_records] #And check they agree self.assertTrue(compare_records(original_records, biosql_records))
def test_change_taxonomy(self): """Check that taxonomy can be properly changed.""" runner = CliRunner() infile = os.path.join(os.path.dirname(__file__), 'test_files', 'modify_header.txt') result = runner.invoke(cli.main, self.common_params + ['-i', infile, '-T', '112040', '--key', 'accession']) self.assertEqual(result.exit_code, 0) print(result.output) server = BioSeqDatabase.open_database(driver = self.dbdriver, user = self.dbuser, passwd = self.dbpassword, host = self.dbhost, db = self.dbname) rows = server.adaptor.execute_and_fetchall("select ncbi_taxon_id from taxon join bioentry using(taxon_id) where bioentry.accession = 'NC_000913'") taxid = rows[0][0] self.assertEqual(taxid, 112040)
def test_add_from_genbank(self): """Add in sequences from a Genbank file.""" infile = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.gbff') runner = CliRunner() result = runner.invoke(cli.main, self.common_params + ['-G', infile, '-D', 'test']) self.assertEqual(result.exit_code, 0) server = BioSeqDatabase.open_database(driver = self.dbdriver, user = self.dbuser, passwd = self.dbpassword, host = self.dbhost, db = self.dbname) rows = server.adaptor.execute_and_fetchall("SELECT name FROM taxon_name where name_class = 'scientific name'") self.assertEqual(rows, [('Escherichia coli str. K-12 substr. MG1655',)]) server.close()
def test_backwards_compatibility(self): """Check can re-use an old BioSQL SQLite3 database.""" original_records = list(SeqIO.parse("GenBank/cor6_6.gb", "gb")) # now open a connection to load the database server = BioSeqDatabase.open_database(driver=DBDRIVER, db="BioSQL/cor6_6.db") db = server["OLD"] self.assertEqual(len(db), len(original_records)) #Now read them back... biosql_records = [db.lookup(name=rec.name) \ for rec in original_records] #And check they agree self.assertTrue(compare_records(original_records, biosql_records))
def setUp(self): """Load a database. """ gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb") gb_handle = open(gb_file, "r") load_database(gb_handle) gb_handle.close() self.server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST, db = TESTDB) self.db = self.server["biosql-test"] self.item = self.db.lookup(accession = "X62281")
def setUp(self): """Connect to and load up the database. """ gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb") gb_handle = open(gb_file, "r") load_database(gb_handle) gb_handle.close() self.server = BioSeqDatabase.open_database( driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB ) self.db = self.server["biosql-test"]
def main(args): server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password) if args.database_name not in server.keys(): server.new_database(args.database_name) db = server[args.database_name] try: load_img(db, args.directory, args.tax_lookup, args.taxid) server.adaptor.commit() except: server.adaptor.rollback() raise
def test_005_insertionserv(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="004_insertion") input_seq = self.data_dir + '/insertion_seqs.fasta' for ex in self.expected['insertion']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertEqual(ann.method, "nt_search") self.assertFalse(ann.missing) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) n_diffs = 0 expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) for feat in expected_seqs: if feat not in ann.annotation: self.assertEqual(feat, None) else: if feat in ex['diff']: n_diffs += 1 self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) diff_len = len(str(ann.annotation[feat].seq)) - \ len(str(expected_seqs[feat])) self.assertEqual(diff_len, ex['lengths'][feat]) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) self.assertEqual(n_diffs, len(ex['diff'])) server.close() pass
def test_009_partialambigserv(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) seqann = BioSeqAnn(server=server, verbose=False, verbosity=verbosity, pid="006_partialambig") input_seq = self.data_dir + '/partial_ambig.fasta' for ex in self.expected['partial_ambig']: i = int(ex['index']) locus = ex['locus'] allele = ex['name'] hla, loc = locus.split("-") print(str(i), allele) in_seq = list(SeqIO.parse(input_seq, "fasta"))[i] ann = seqann.annotate(in_seq, locus) self.assertTrue(ann.complete_annotation) self.assertEqual(ann.method, ex['method']) self.assertFalse(ann.blocks) self.assertIsInstance(ann, Annotation) self.assertTrue(ann.complete_annotation) self.assertGreater(len(ann.annotation.keys()), 1) db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc] expected = db.lookup(name=allele) expected_seqs = get_features(expected) self.assertGreater(len(expected_seqs.keys()), 1) self.assertGreater(len(ann.annotation.keys()), 1) self.assertEqual(ann.gfe, ex['gfe']) self.assertGreater(len(ann.structure), 1) for feat in ann.structure: self.assertIsInstance(feat, Feature) # Make sure only mapped feats exist for mf in ex['missing_feats']: self.assertFalse(mf in ann.annotation) for feat in ex['feats']: if feat in ex['diff']: self.assertNotEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) else: self.assertEqual(str(expected_seqs[feat]), str(ann.annotation[feat].seq)) server.close() pass
def load_gb_dict_into_db(genbank_data): '''Load genbank_data as a dictionary into the mysql database. ''' print("Loading genbank entries into the database ...") server = BioSeqDatabase.open_database(driver = db_driver, user = db_user, passwd = db_passwd, host = db_host, db = db_name) db = server[namespace] count = db.load(genbank_data.values()) server.commit() print("Loaded %i sequences" % count) return()
def db(self, database): """ Create FASTA files for every GenBank record in the database. """ server = BioSeqDatabase.open_database(driver="sqlite3", db=database) try: for db_name in server.keys(): db = server[db_name] for item in db.keys(): record = db.lookup(item) self.write_fasta_file(record) except: raise ()
def test_add_from_gff(self): """Add in sequences from a gff + fasta file.""" gff = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.gff') fasta = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.fna') runner = CliRunner() result = runner.invoke(cli.main, self.common_params + ['-g', gff, '-f', fasta, '-D', 'test']) self.assertEqual(result.exit_code, 0) server = BioSeqDatabase.open_database(driver = self.dbdriver, user = self.dbuser, passwd = self.dbpassword, host = self.dbhost, db = self.dbname) rows = server.adaptor.execute_and_fetchall("SELECT name FROM taxon_name where name_class = 'scientific name'") self.assertEqual(rows, []) server.close()
def setUp(self): """Connect to and load up the database. """ gb_file = "GenBank/cor6_6.gb" gb_handle = open(gb_file, "r") load_database(gb_handle) gb_handle.close() self.server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST, db = TESTDB) self.db = self.server["biosql-test"]
def test_002_server(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) refdata = ReferenceData(server=server) self.assertIsInstance(refdata, ReferenceData) self.assertTrue(refdata.server_avail) self.assertFalse(refdata.seqref) self.assertFalse(refdata.hlaref) server.close() pass
def __init__(self): server = BioSeqDatabase.open_database(driver="MySQLdb", user="******", passwd="FurtherFlowersVenus", host="localhost", db="bioseqdb") handlers = [(r"/", MainHandler), (r"/just_testing", Just_Testing, { 'server': server })] settings = dict(autoescape=None, ) tornado.web.Application.__init__(self, handlers, **settings)
def create_database(): """Delete any existing BioSQL test database, then (re)create an empty BioSQL database.""" if DBDRIVER in ["sqlite3"]: if os.path.exists(TESTDB): os.remove(TESTDB) else: _do_db_create() # now open a connection to load the database server = BioSeqDatabase.open_database(driver = DBDRIVER, user = DBUSER, passwd = DBPASSWD, host = DBHOST, db = TESTDB) server.load_database_sql(SQL_FILE) server.commit() server.close()
def test_003_dblist(self): server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) for db in self.dblist: refdata = ReferenceData(server=server, dbversion=db) self.assertEqual(refdata.dbversion, db) self.assertTrue(refdata.server_avail) self.assertFalse(refdata.seqref) self.assertFalse(refdata.hlaref) server.close() pass
def main(args): server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password) seqfeature_ids = get_seqfeature_ids_from_qv(server, args.qualifier, args.value, args.database_name, fuzzy=args.fuzzy) if args.feature_type is not None: types = args.feature_type elif args.output_format == 'feat-prot': types = ['CDS'] elif args.output_format == 'feat-nucl': types = ['CDS', 'rRNA', 'tRNA'] if args.output_format == 'feat-prot': extract_feature_sql(server, seqfeature_ids, type=types, translate=True ) elif args.output_format == 'feat-nucl': extract_feature_sql(server, seqfeature_ids, type=types)
def test_007_align(self): # TODO: Add class II tests server = BioSeqDatabase.open_database(driver="pymysql", user=biosqluser, passwd=biosqlpass, host=biosqlhost, db=biosqldb, port=biosqlport) refdata = ReferenceData(server=server, alignments=True) for ex in self.expected['align']: locus = ex['locus'] allele = ex['name'].split("_")[0] hla, loc = locus.split("-") align = "".join([refdata.annoated_alignments[loc][allele][s]['Seq'] for s in refdata.annoated_alignments[loc][allele].keys()]) self.assertEqual(str(align), str(ex['alignment']))
def test_backwards_compatibility(self): """Check can re-use an old BioSQL SQLite3 database.""" original_records = [] for record in SeqIO.parse("GenBank/cor6_6.gb", "gb"): if record.annotations["molecule_type"] == "mRNA": record.annotations["molecule_type"] = "DNA" original_records.append(record) # now open a connection to load the database server = BioSeqDatabase.open_database(driver=DBDRIVER, db="BioSQL/cor6_6.db") db = server["OLD"] self.assertEqual(len(db), len(original_records)) # Now read them back... biosql_records = [db.lookup(name=rec.name) for rec in original_records] # And check they agree self.compare_records(original_records, biosql_records) server.close()
def main(args): server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password) if args.database_name not in server.keys(): server.new_database(args.database_name) db = server[args.database_name] try: if args.gff is not None and args.fasta is not None: load_gff(db, args.gff, args.fasta, args.tax_lookup, args.taxid) server.adaptor.commit() elif args.genbank is not None: load_genbank(db, args.genbank, args.tax_lookup) server.adaptor.commit() except: server.adaptor.rollback() raise
def test_backwards_compatibility(self): """Check can re-use an old BioSQL SQLite3 database.""" original_records = list(SeqIO.parse("GenBank/cor6_6.gb", "gb")) # now open a connection to load the database server = BioSeqDatabase.open_database(driver=DBDRIVER, db="BioSQL/cor6_6.db") db = server["OLD"] self.assertEqual(len(db), len(original_records)) # Now read them back... biosql_records = [db.lookup(name=rec.name) for rec in original_records] # And check they agree # Note the old parser used to create BioSQL/cor6_6.db # did not record the molecule_type, so remove it here: for r in original_records: del r.annotations["molecule_type"] self.assertTrue(compare_records(original_records, biosql_records)) server.close()