def create_vocabulary_table_if_not_exist(model, schema_name, table_name, comment): schema = model.schemas[schema_name] if table_name not in schema.tables: schema.create_table( Table.define_vocabulary(table_name, 'PDB:{RID}', comment=comment)) print('Created table {}:{}'.format(schema_name, table_name))
def create_species_table(self): schema = self.model.schemas[self.species_schema] table = schema.create_table( Table.define_vocabulary( self.species_table, '{prefix}:{{RID}}'.format(prefix=self.curie_prefix), key_defs=[Key.define(['Name'])], comment="Species")) table.columns["Alternate_IDs"].drop()
def create_gene_type_table(self): schema = self.model.schemas[self.gene_type_schema] schema.create_table( Table.define_vocabulary( self.gene_type_table, '{prefix}:{{RID}}'.format(prefix=self.curie_prefix), key_defs=[Key.define(['Name'])], uri_template='https://{host}/id/{{RID}}'.format( host=self.host), comment="Gene types"))
def create_vocabulary_tables(self): schema = self.model.schemas[self.VOCABULARY] table_def = Table.define_vocabulary("Species", "deriva-demo:{RID}", provide_system=True, key_defs=[Key.define(['Name'])], comment="Species") self.try_create_table(schema, table_def) table_def = Table.define_vocabulary( "Stage", "deriva-demo:{RID}", provide_system=True, key_defs=[Key.define(['Name'])], comment="Developmental Stage (e.g., Theiler stage, Carnegie stage)" ) self.try_create_table(schema, table_def) table_def = Table.define_vocabulary("Sex", "deriva-demo:{RID}", provide_system=True, key_defs=[Key.define(['Name'])], comment="Sex") self.try_create_table(schema, table_def) table_def = Table.define_vocabulary( "Assay_Type", "deriva-demo:{RID}", provide_system=True, key_defs=[Key.define(['Name'])], comment="Assay type (e.g., mRNA-Seq, scRNA-Seq, ISH") self.try_create_table(schema, table_def) table_def = Table.define_vocabulary("Anatomy", "deriva-demo:{RID}", provide_system=True, comment="Anatomical Region") self.try_create_table(schema, table_def) table_def = Table.define_vocabulary( "Molecule_Type", "deriva-demo:{RID}", provide_system=True, key_defs=[Key.define(['Name'])], comment="Type of molecule (e.g., DNA, RNA)") self.try_create_table(schema, table_def) table_def = Table.define_vocabulary("File_Type", "deriva-demo:{RID}", provide_system=True, key_defs=[Key.define(['Name'])], comment="File type") self.try_create_table(schema, table_def)
def create_ontology_table(self): schema = self.model.schemas[self.ontology_schema] table = schema.create_table( Table.define_vocabulary( self.ontology_table, '{prefix}:{{RID}}'.format(prefix=self.curie_prefix), key_defs=[Key.define(['Name'])], uri_template='https://{host}/id/{{RID}}'.format( host=self.host), comment="Ontologies")) table.create_column( Column.define("Ontology_Home", builtin_types.text, nullok=True, comment="Home page for this ontology"))
def create_gene_table(self, extra_boolean_cols=[]): schema = self.model.schemas[self.gene_schema] common_cols = [ Column.define("Gene_Type", builtin_types.text, nullok=False), Column.define("Species", builtin_types.text, nullok=False), Column.define("Chromosome", builtin_types.text), Column.define("Location", builtin_types.text, comment="Location on chromosome"), Column.define( "Source_Date", builtin_types.date, comment="Last-updated date reported by the gene data source") ] for colname in extra_boolean_cols: common_cols.append(Column.define(colname, builtin_types.boolean)) fkey_defs = [ ForeignKey.define(["Gene_Type"], self.gene_type_schema, self.gene_type_table, self.adjust_fkey_columns_case(["ID"])), ForeignKey.define(["Species"], self.species_schema, self.species_table, self.adjust_fkey_columns_case( self.species_schema, self.species_table, ["ID"])), ForeignKey.define(["Chromosome"], self.chromosome_schema, self.chromosome_table, ["RID"]) ] key_defs = [["NCBI_GeneID"]] table = schema.create_table( Table.define_vocabulary( self.gene_table, '{prefix}:{{RID}}'.format(prefix=self.curie_prefix), column_defs=column_defs, key_defs=key_defs, fkey_defs=fkey_defs, comment="Genes"))
def replace_vocab_table(schema_name, old_table_name, new_table_name, replace_if_exists=False): """Replaces old vocab table with new and remaps all foreign keys from old to new.""" schema = model.schemas[schema_name] # Drop new_vocab table if exists (optional) if not args.dryrun and new_table_name in schema.tables: if replace_if_exists: verbose("Found {tname}. Dropping...".format(tname=new_table_name)) schema.tables[new_table_name].delete(catalog, schema) else: verbose("Found {tname}. Skipping...".format(tname=new_table_name)) return # Define and create new vocab table extra_cols = [ Column.define('dbxref', builtin_types['text'], comment='Legacy database external reference (dbxref).') ] if args.altids: extra_cols = [ Column.define('alternate_ids', builtin_types['text[]'], comment='Alternate identifiers for this term.') ] + extra_cols vocab_table_def = Table.define_vocabulary( new_table_name, args.curie_template, uri_template='https://www.facebase.org/id/{RID}', column_defs=extra_cols) if not args.dryrun: new_table = schema.create_table(catalog, vocab_table_def) # Populate new vocab table datapaths = catalog.getPathBuilder() old_table_path = datapaths.schemas[schema_name].tables[old_table_name] kwargs = { 'name': old_table_path.column_definitions['name'], 'description': old_table_path.column_definitions['definition'], 'synonyms': old_table_path.column_definitions['synonyms'], 'dbxref': old_table_path.column_definitions['dbxref'] } if args.altids: kwargs['alternate_dbxrefs'] = old_table_path.column_definitions[ 'alternate_dbxrefs'] cleaned_terms = [ clean_term(term) for term in old_table_path.entities(**kwargs) ] vverbose('Cleaned terms ready for insert into {tname}:'.format( tname=new_table_name)) vverbose(list(cleaned_terms)) # Create separate batches for insertion w/ defaults terms_w_ids = [ term for term in cleaned_terms if term['id'] and len(term['id']) ] terms_w_no_ids = [ term for term in cleaned_terms if not term['id'] or not len(term['id']) ] if not args.dryrun: new_table_path = datapaths.schemas[schema_name].tables[new_table_name] new_terms = list(new_table_path.insert(terms_w_ids, defaults=['uri'])) new_terms += list( new_table_path.insert(terms_w_no_ids, defaults=['id', 'uri'])) vverbose('New terms returned after insert into {tname}:'.format( tname=new_table_name)) vverbose(list(new_terms)) else: # This allows for best effort dryrun testing, though the local term CURIEs will be faked new_terms = cleaned_terms for term in new_terms: if not term['id']: term['id'] = term['dbxref'][:term['dbxref'].rindex(':')].upper( ) # Create mapping of old dbxref to new id dbxref_to_id = {term['dbxref']: term['id'] for term in new_terms} # Find all references to old vocab table dbxref old_table = schema.tables[old_table_name] for fkey in old_table.referenced_by: if fkey_blacklist_pattern.match(fkey.names[0][1]): verbose('Skipping foreign key "{sname}:{cname}"'.format( sname=fkey.names[0][0], cname=fkey.names[0][1])) continue # skip fkeys from vocab to vocab for i in range(len(fkey.referenced_columns)): # Get referenced column refcol = fkey.referenced_columns[i] # See if it references the dbxref of the old vocab table, if not skip if (refcol['schema_name'] != schema_name or refcol['table_name'] != old_table_name or refcol['column_name'] != 'dbxref'): continue # Get the corresponding referring table and its fkey column fkeycol = fkey.foreign_key_columns[i] reftable = model.schemas[fkeycol['schema_name']].tables[ fkeycol['table_name']] verbose( 'Found reference to "dbxref" from "{sname}:{tname}:{cname}"'. format(sname=fkeycol['schema_name'], tname=fkeycol['table_name'], cname=fkeycol['column_name'])) # Delete the fkey if not args.dryrun: verbose('Deleting foreign key "{sname}:{cname}"'.format( sname=fkey.names[0][0], cname=fkey.names[0][1])) fkey.delete(catalog, reftable) # Fix fkey column value verbose('Getting existing fkey column values') reftable_path = datapaths.schemas[fkeycol['schema_name']].tables[ fkeycol['table_name']] entities = reftable_path.entities( reftable_path.RID, reftable_path.column_definitions[fkeycol['column_name']]) # Map the old dbxref value to the new curie id value for the reference verbose('Remapping {count} fkey column values'.format( count=len(entities))) for entity in entities: if entity[fkeycol['column_name']]: entity[fkeycol['column_name']] = dbxref_to_id[entity[ fkeycol['column_name']]] vverbose(list(entities)) # Update referring table if not args.dryrun: verbose( 'Updating fkey column values, {max_up} at a time'.format( max_up=args.max_update)) slice_ct = 0 slice_sz = args.max_update updated = [] while (slice_ct * slice_sz) < len(entities): data = entities[(slice_ct * slice_sz):((1 + slice_ct) * slice_sz)] reftable_path.update(data, targets=[fkeycol['column_name']]) updated.extend(data) slice_ct += 1 if len(updated) != len(entities): print( 'WARNING: only updated {up_count} of {ent_count} entities!' .format(up_count=len(updated), ent_count=len(entities))) # Define new fkey verbose( 'Defining and creating new foreign key reference to new vocab table' ) fkey.referenced_columns[i]['column_name'] = 'id' new_fkey = ForeignKey.define( [ fkey.foreign_key_columns[j]['column_name'] for j in range(len(fkey.foreign_key_columns)) ], schema_name, new_table_name, [ fkey.referenced_columns[k]['column_name'] for k in range(len(fkey.referenced_columns)) ], on_update=fkey.on_update or 'NO ACTION', on_delete=fkey.on_delete or 'NO ACTION', constraint_names=fkey.names or [], comment=fkey.comment or None, acls=fkey.acls or {}, acl_bindings=fkey.acl_bindings or {}, annotations=fkey.annotations or {}) vverbose(new_fkey) if not args.dryrun: reftable.create_fkey(catalog, new_fkey) if not args.dryrun: verbose('Dropping "dbxref" column from new vocab table') dbxref = new_table.column_definitions['dbxref'] dbxref.delete(catalog, new_table)