def load(self, record_iterator, fetch_NCBI_taxonomy=False): """Load a set of SeqRecords into the BioSQL database. record_iterator is either a list of SeqRecord objects, or an Iterator object that returns SeqRecord objects (such as the output from the Bio.SeqIO.parse() function), which will be used to populate the database. fetch_NCBI_taxonomy is boolean flag allowing or preventing connection to the taxonomic database on the NCBI server (via Bio.Entrez) to fetch a detailed taxonomy for each SeqRecord. Example: from Bio import SeqIO count = db.load(SeqIO.parse(open(filename), format)) Returns the number of records loaded. """ db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \ fetch_NCBI_taxonomy) num_records = 0 for cur_record in record_iterator: num_records += 1 db_loader.load_seqrecord(cur_record) return num_records
def load(self, record_iterator, fetch_NCBI_taxonomy=False): """Load a set of SeqRecords into the BioSQL database. record_iterator is either a list of SeqRecord objects, or an Iterator object that returns SeqRecord objects (such as the output from the Bio.SeqIO.parse() function), which will be used to populate the database. fetch_NCBI_taxonomy is boolean flag allowing or preventing connection to the taxonomic database on the NCBI server (via Bio.Entrez) to fetch a detailed taxonomy for each SeqRecord. Example: from Bio import SeqIO count = db.load(SeqIO.parse(open(filename), format)) Returns the number of records loaded. """ db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \ fetch_NCBI_taxonomy) num_records = 0 global _POSTGRES_RULES_PRESENT for cur_record in record_iterator: num_records += 1 #Hack to work arround BioSQL Bug 2839 - If using PostgreSQL and #the RULES are present check for a duplicate record before loading if _POSTGRES_RULES_PRESENT: #Recreate what the Loader's _load_bioentry_table will do: if cur_record.id.count(".") == 1: accession, version = cur_record.id.split('.') try: version = int(version) except ValueError: accession = cur_record.id version = 0 else: accession = cur_record.id version = 0 gi = cur_record.annotations.get("gi", None) sql = "SELECT bioentry_id FROM bioentry WHERE (identifier " + \ "= '%s' AND biodatabase_id = '%s') OR (accession = " + \ "'%s' AND version = '%s' AND biodatabase_id = '%s')" self.adaptor.execute( sql % (gi, self.dbid, accession, version, self.dbid)) if self.adaptor.cursor.fetchone(): try: raise self.adaptor.conn.IntegrityError( "Duplicate record " "detected: record has not been inserted") except AttributeError: #psycopg version 1 import psycopg raise psycopg.IntegrityError( "Psycopg1: Duplicate record " "detected: record has not been inserted") #End of hack db_loader.load_seqrecord(cur_record) return num_records