def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version = True) if str(pan_db.get_version()) != current_version: raise ConfigError("Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) for table_name in ['layer_additional_data', 'item_additional_data']: new_table = divide_stackedbar_to_multiple_entries(pan_db.get_table_as_dict(table_name)) pan_db._exec("DELETE FROM '%s'" % table_name) new_entry_counter = 0 for entry_id in new_table: pan_db.insert(table_name, (new_entry_counter, *new_table[entry_id].values())) new_entry_counter += 1 # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version = True) if str(pan_db.get_version()) != current_version: raise ConfigError("Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) pan_db._exec('ALTER TABLE "item_additional_data" ADD COLUMN "data_group" text') pan_db._exec('ALTER TABLE "layer_additional_data" ADD COLUMN "data_group" text') pan_db._exec('UPDATE "item_additional_data" SET "data_group" = "default"') pan_db._exec('UPDATE "layer_additional_data" SET "data_group" = "default"') # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version=True) if str(pan_db.get_version()) != current_version: raise ConfigError( "Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) for table_name in ['layer_additional_data', 'item_additional_data']: new_table = divide_stackedbar_to_multiple_entries( pan_db.get_table_as_dict(table_name)) pan_db._exec("DELETE FROM '%s'" % table_name) for entry_id in new_table: pan_db.insert(table_name, tuple(new_table[entry_id].values())) # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version = True) if str(pan_db.get_version()) != current_version: raise ConfigError("Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) pan_db._exec('ALTER TABLE "item_orders" ADD COLUMN "additional" text') pan_db._exec('UPDATE "item_orders" SET "additional" = "{}"') # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version=True) if str(pan_db.get_version()) != current_version: raise ConfigError( "Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) progress.new("Trying to upgrade the pan database") progress.update('...') pan_db.set_meta_value('description', '_No description is found_') # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # bye pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version = True) if str(pan_db.get_version()) != current_version: raise ConfigError("Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) # gene_clusters_ordered -> items_ordered pan_db.set_meta_value('items_ordered', pan_db.get_meta_value('gene_clusters_ordered')) pan_db.remove_meta_key_value_pair('gene_clusters_ordered') pan_db.remove_meta_key_value_pair('gene_clusters_clustered') # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s (lucky you).' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) pan_db = db.DB(db_path, None, ignore_version=True) if str(pan_db.get_version()) != current_version: pan_db.disconnect() raise ConfigError( "Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) pan_db.disconnect() # drop entry ids one by one for table_name in tables: drop_entry_id_column_from_table(db_path, table_name, table_properties=tables[table_name]) # set the version pan_db = db.DB(db_path, None, ignore_version=True) pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) pan_db.disconnect() progress.end() run.info_single( "Your pan db is now version %s. There were %d tables in it that needed upatin'. " "They're good now." % (next_version, len(tables)), nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version = True) if str(pan_db.get_version()) != current_version: raise ConfigError("Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) progress.new("Trying to upgrade the pan database") progress.update('...') pan_db.set_meta_value('description', '_No description is found_') # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # bye pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version=True) if str(pan_db.get_version()) != current_version: raise ConfigError( "Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) # gene_clusters_ordered -> items_ordered pan_db.set_meta_value('items_ordered', pan_db.get_meta_value('gene_clusters_ordered')) pan_db.remove_meta_key_value_pair('gene_clusters_ordered') pan_db.remove_meta_key_value_pair('gene_clusters_clustered') # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s (lucky you).' % next_version, nl_after=1, nl_before=1, mc='green')
def __init__(self, db_path, run=run, progress=progress): self.db_path = db_path utils.is_pan_db(db_path) self.run = run self.progress = progress Table.__init__(self, self.db_path, anvio.__pan__version__, run, progress) self.entries = []
def __init__(self, db_path, run=run, progress=progress): self.db_path = db_path utils.is_pan_db(db_path) self.run = run self.progress = progress Table.__init__(self, self.db_path, anvio.__pan__version__, run, progress) self.set_next_available_id(t.pan_gene_clusters_table_name) self.entries = []
def __init__(self, db_path, run=run, progress=progress): self.db_path = db_path utils.is_pan_db(db_path) self.run = run self.progress = progress Table.__init__(self, self.db_path, anvio.__pan__version__, run, progress) self.set_next_available_id(t.pan_gene_clusters_table_name) self.entries = []
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version = True) if str(pan_db.get_version()) != current_version: raise ConfigError("Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) # migrate item orders item_orders = pan_db.get_table_as_dict(item_orders_table_name) for order_name in item_orders: if item_orders[order_name]['type'] == 'newick': newick = Tree(item_orders[order_name]['data'], format=1) newick = newick.write(format=2) pan_db._exec("""UPDATE %s SET "data" = ? WHERE "name" LIKE ?""" % item_orders_table_name, (newick, order_name)) # migrate layer orders layer_orders = pan_db.get_table_as_dict(layer_orders_table_name) for order_name in layer_orders: if layer_orders[order_name]['data_type'] == 'newick': newick = Tree(layer_orders[order_name]['data_value'], format=1) newick = newick.write(format=2) pan_db._exec("""UPDATE %s SET "data_value" = ? WHERE "data_key" LIKE ?""" % layer_orders_table_name, (newick, order_name)) # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version=True) if str(pan_db.get_version()) != current_version: raise ConfigError( "Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) # update keys for old_key, new_key in [('maxbit', 'minbit')]: try: pan_db.set_meta_value(new_key, pan_db.get_meta_value(old_key)) except: pass # remove stuff that are not irrelevant try: pan_db.remove_meta_key_value_pair('maxbit') except: pass # learn additional_data_headers for later: additional_data_headers = pan_db.get_meta_value( 'additional_data_headers').split(',') # take care of the self table self_table = pan_db.get_table_as_list_of_tuples('self') pan_db.cursor.execute('ALTER TABLE self RENAME TO self_TEMP;') pan_db.cursor.execute('CREATE TABLE self (key text, value text);') for key, val in self_table: new_key = key.replace('PC', 'gene_cluster').replace('pc', 'gene_cluster') new_val = val.replace('PC', 'gene_cluster').replace('pc', 'gene_cluster') pan_db.set_meta_value(new_key, new_val) pan_db.cursor.execute('DROP TABLE self_TEMP;') # take care of the views table views_table = pan_db.get_table_as_list_of_tuples('views') pan_db.cursor.execute('ALTER TABLE views RENAME TO views_TEMP;') pan_db.cursor.execute( 'CREATE TABLE views (view_id str, target_table str);') values = [] for view, target in views_table: new_view = view.replace('PC', 'gene_cluster').replace('pc', 'gene_cluster') new_target = target.replace('PC', 'gene_cluster').replace( 'pc', 'gene_cluster') values.append((new_view, new_target), ) pan_db.insert_many('views', values) pan_db.cursor.execute('DROP TABLE views_TEMP;') # rename tables pan_db._exec( 'ALTER TABLE PC_frequencies RENAME TO gene_cluster_frequencies;') pan_db._exec( 'ALTER TABLE PC_presence_absence RENAME TO gene_cluster_presence_absence;' ) pan_db._exec('ALTER TABLE protein_clusters RENAME TO gene_clusters;') # protein_cluster_id -> gene_cluster_id in table gene_clusters. pan_db.cursor.execute( 'ALTER TABLE gene_clusters RENAME TO gene_clusters_TEMP;') pan_db.cursor.execute( 'CREATE TABLE gene_clusters (entry_id numeric, gene_caller_id numeric, gene_cluster_id str, genome_name str, alignment_summary str);' ) pan_db.cursor.execute( 'INSERT INTO gene_clusters(entry_id, gene_caller_id, gene_cluster_id, genome_name, alignment_summary) SELECT entry_id, gene_caller_id, protein_cluster_id, genome_name, alignment_summary FROM gene_clusters_TEMP;' ) pan_db.cursor.execute('DROP TABLE gene_clusters_TEMP;') # commit try: pan_db._exec('COMMIT') except: pass # we also added a totally new table to this version: pan_db.create_table(item_additional_data_table_name, item_additional_data_table_structure, item_additional_data_table_types) # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # we have one more thing to do: getting rid of the 'additional_data' table without losing data, by carrying # its content into our new item_additional_data_table additional_data_table_dict = pan_db.get_table_as_dict('additional_data') # close the db temporarily pan_db.disconnect() # update the contents of the item_additional_data_table args = argparse.Namespace(pan_db=db_path, just_do_it=True, ignore_db_version=True) item_additional_data_table = TableForItemAdditionalData(args) item_additional_data_table.add(additional_data_headers, additional_data_table_dict) # open the database again to remove stuff pan_db = db.DB(db_path, None, ignore_version=True) pan_db.remove_meta_key_value_pair('additional_data_headers') pan_db._exec("DROP TABLE additional_data") # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version = True) if str(pan_db.get_version()) != current_version: raise ConfigError("Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) progress.new("Trying to upgrade the pan database") progress.update('...') try: pan_db.create_table(item_orders_table_name, item_orders_table_structure, item_orders_table_types) except: pass clusterings = pan_db.get_table_as_dict('clusterings') # move clustering data into the new table for clustering in clusterings: newick = clusterings[clustering]['newick'] pan_db._exec('''INSERT INTO %s VALUES (?,?,?)''' % item_orders_table_name, tuple([clustering, 'newick', newick])) # update keys for old_key, new_key in [('pc_min_occurrence', 'gene_cluster_min_occurrence'), ('num_protein_clusters', 'num_gene_clusters'), ('num_genes_in_protein_clusters', 'num_genes_in_gene_clusters'), ('available_clusterings', 'available_item_orders'), ('PCs_clustered', 'PCs_ordered'), ('default_clustering', 'default_item_order')]: try: pan_db.set_meta_value(new_key, pan_db.get_meta_value(old_key)) except: pass # remove stuff that are not irrelevant try: pan_db._exec('DROP TABLE clusterings;') pan_db.remove_meta_key_value_pair('available_clusterings') pan_db.remove_meta_key_value_pair('PCs_clustered') pan_db.remove_meta_key_value_pair('default_clustering') pan_db.remove_meta_key_value_pair('num_protein_clusters') pan_db.remove_meta_key_value_pair('num_genes_in_protein_clusters') pan_db.remove_meta_key_value_pair('pc_min_occurrence') except: pass # commit try: pan_db._exec('COMMIT') except: pass # cleanup try: pan_db._exec('vacuum') except: pass # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # bye pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version=True) if str(pan_db.get_version()) != current_version: raise ConfigError( "Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) progress.new("Trying to upgrade the pan database") progress.update('...') try: pan_db.create_table(t.item_orders_table_name, t.item_orders_table_structure, t.item_orders_table_types) except: pass clusterings = pan_db.get_table_as_dict('clusterings') # move clustering data into the new table for clustering in clusterings: newick = clusterings[clustering]['newick'] pan_db._exec( '''INSERT INTO %s VALUES (?,?,?)''' % t.item_orders_table_name, tuple([clustering, 'newick', newick])) # update keys for old_key, new_key in [ ('pc_min_occurrence', 'gene_cluster_min_occurrence'), ('num_protein_clusters', 'num_gene_clusters'), ('num_genes_in_protein_clusters', 'num_genes_in_gene_clusters'), ('available_clusterings', 'available_item_orders'), ('PCs_clustered', 'PCs_ordered'), ('default_clustering', 'default_item_order') ]: try: pan_db.set_meta_value(new_key, pan_db.get_meta_value(old_key)) except: pass # remove stuff that are not irrelevant try: pan_db._exec('DROP TABLE clusterings;') pan_db.remove_meta_key_value_pair('available_clusterings') pan_db.remove_meta_key_value_pair('PCs_clustered') pan_db.remove_meta_key_value_pair('default_clustering') pan_db.remove_meta_key_value_pair('num_protein_clusters') pan_db.remove_meta_key_value_pair('num_genes_in_protein_clusters') pan_db.remove_meta_key_value_pair('pc_min_occurrence') except: pass # commit try: pan_db._exec('COMMIT') except: pass # cleanup try: pan_db._exec('vacuum') except: pass # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # bye pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')
def migrate(db_path): if db_path is None: raise ConfigError("No database path is given.") # make sure someone is not being funny utils.is_pan_db(db_path) # make sure the version is accurate pan_db = db.DB(db_path, None, ignore_version = True) if str(pan_db.get_version()) != current_version: raise ConfigError("Version of this pan database is not %s (hence, this script cannot really do anything)." % current_version) # update keys for old_key, new_key in [('maxbit', 'minbit')]: try: pan_db.set_meta_value(new_key, pan_db.get_meta_value(old_key)) except: pass # remove stuff that are not irrelevant try: pan_db.remove_meta_key_value_pair('maxbit') except: pass # learn additional_data_headers for later: additional_data_headers = pan_db.get_meta_value('additional_data_headers').split(',') # take care of the self table self_table = pan_db.get_table_as_list_of_tuples('self') pan_db.cursor.execute('ALTER TABLE self RENAME TO self_TEMP;') pan_db.cursor.execute('CREATE TABLE self (key text, value text);') for key, val in self_table: new_key = key.replace('PC', 'gene_cluster').replace('pc', 'gene_cluster') new_val = val.replace('PC', 'gene_cluster').replace('pc', 'gene_cluster') pan_db.set_meta_value(new_key, new_val) pan_db.cursor.execute('DROP TABLE self_TEMP;') # take care of the views table views_table = pan_db.get_table_as_list_of_tuples('views') pan_db.cursor.execute('ALTER TABLE views RENAME TO views_TEMP;') pan_db.cursor.execute('CREATE TABLE views (view_id str, target_table str);') values = [] for view, target in views_table: new_view = view.replace('PC', 'gene_cluster').replace('pc', 'gene_cluster') new_target = target.replace('PC', 'gene_cluster').replace('pc', 'gene_cluster') values.append((new_view, new_target),) pan_db.insert_many('views', values) pan_db.cursor.execute('DROP TABLE views_TEMP;') # rename tables pan_db._exec('ALTER TABLE PC_frequencies RENAME TO gene_cluster_frequencies;') pan_db._exec('ALTER TABLE PC_presence_absence RENAME TO gene_cluster_presence_absence;') pan_db._exec('ALTER TABLE protein_clusters RENAME TO gene_clusters;') # protein_cluster_id -> gene_cluster_id in table gene_clusters. pan_db.cursor.execute('ALTER TABLE gene_clusters RENAME TO gene_clusters_TEMP;') pan_db.cursor.execute('CREATE TABLE gene_clusters (entry_id numeric, gene_caller_id numeric, gene_cluster_id str, genome_name str, alignment_summary str);') pan_db.cursor.execute('INSERT INTO gene_clusters(entry_id, gene_caller_id, gene_cluster_id, genome_name, alignment_summary) SELECT entry_id, gene_caller_id, protein_cluster_id, genome_name, alignment_summary FROM gene_clusters_TEMP;') pan_db.cursor.execute('DROP TABLE gene_clusters_TEMP;') # commit try: pan_db._exec('COMMIT') except: pass # we also added a totally new table to this version: pan_db.create_table(item_additional_data_table_name, item_additional_data_table_structure, item_additional_data_table_types) # set the version pan_db.remove_meta_key_value_pair('version') pan_db.set_version(next_version) # we have one more thing to do: getting rid of the 'additional_data' table without losing data, by carrying # its content into our new item_additional_data_table additional_data_table_dict = pan_db.get_table_as_dict('additional_data') # close the db temporarily pan_db.disconnect() # update the contents of the item_additional_data_table args = argparse.Namespace(pan_db=db_path, just_do_it=True, ignore_db_version=True) item_additional_data_table = TableForItemAdditionalData(args) item_additional_data_table.add(additional_data_headers, additional_data_table_dict) # open the database again to remove stuff pan_db = db.DB(db_path, None, ignore_version = True) pan_db.remove_meta_key_value_pair('additional_data_headers') pan_db._exec("DROP TABLE additional_data") # now bye for real! pan_db.disconnect() progress.end() run.info_single('Your pan db is now %s.' % next_version, nl_after=1, nl_before=1, mc='green')