'table_name': 'disease', 'where_clause': "dtype = 'UniProt'" }, { 'dataset_id': dataset_id, 'table_name': 'feature' }, { 'dataset_id': dataset_id, 'table_name': 'xref', 'where_clause': f"dataset_id = {dataset_id}" }, { 'dataset_id': dataset_id, 'table_name': 'alias', 'where_clause': f"dataset_id = {dataset_id}" }] for prov in provs: rv = dba.ins_provenance(prov) assert rv, f"Error inserting provenance. See logfile {logfile} for details." load_human(args, dba, dataset_id, eco_map, logger, logfile) # Mouse and Rat proteins # Dataset and Provenance # As for human, we need the dataset id for xrefs and aliases dataset_id = dba.ins_dataset({ 'name': 'UniProt Mouse and Rat Proteins', 'source': f"Mouse and Rat from UniProt XML file {UP_RODENT_FILE} from {UP_BASE_URL}", 'app': PROGRAM, 'app_version': __version__, 'url': 'https://www.uniprot.org' }) assert dataset_id, f"Error inserting dataset See logfile {logfile} for details."
f'IDG generated data in file {IDG_LIST_FILE}.', 'app': PROGRAM, 'app_version': __version__, 'comments': 'IDG Target Flags are archived on GitHub in repo https://github.com/druggablegenome/IDGTargets.', 'url': 'https://github.com/druggablegenome/IDGTargets' }) assert dataset_id, f"Error inserting dataset See logfile {logfile} for details." # Provenance provs = [{ 'dataset_id': dataset_id, 'table_name': 'target', 'column_name': 'idg' }, { 'dataset_id': dataset_id, 'table_name': 'target', 'column_name': 'fam', 'where_clause': 'idg = 1' }] #{'dataset_id': dataset_id, 'table_name': 'target', 'column_name': 'famext', 'where_clause': 'column_name == "fam"', 'where_clause': 'idg = 1'} for prov in provs: rv = dba.ins_provenance(prov) assert rv, f"Error inserting provenance. See logfile {logfile} for details." elapsed = time.time() - start_time print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
if type(rv) == int: print(f"\nSet tdl to NULL for {rv} target rows") else: print(f"Error setting target.tdl values to NULL. See logfile {logfile} for details.") exit(1) rv = dba.del_dataset('TDLs') if rv: print(f"Deleted previous 'TDLs' dataset") else: print(f"Error deleting 'TDLs' dataset. See logfile {logfile} for details.") exit(1) load_tdls(dba, logfile, logger) # Dataset dataset_id = dba.ins_dataset( {'name': 'TDLs', 'source': 'IDG-KMC generated data by Steve Mathias at UNM.', 'app': PROGRAM, 'app_version': __version__, 'comments': 'TDLs are calculated by the loading app from data in TCRD.'} ) assert dataset_id, f"Error inserting dataset. See logfile {logfile} for details." # Provenance rv = dba.ins_provenance({'dataset_id': dataset_id, 'table_name': 'target', 'column_name': 'tdl'}) assert rv, f"Error inserting provenance. See logfile {logfile} for details." # Add version number to filename and archive mapping file to old_versions dir mmver = '.'.join( dbi['data_ver'].split('.')[:2] ) outfn = OUTFILE_PAT.format(mmver) export_uniprot_mapping(dba, outfn) shutil.copy(outfn, '/usr/local/apache2/htdocs/tcrd/download/PharosTCRD_UniProt_Mapping.tsv') print(f"Copied {outfn} to /usr/local/apache2/htdocs/tcrd/download/PharosTCRD_UniProt_Mapping.tsv") elapsed = time.time() - start_time print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) start_time = time.time() #do_glygen(dba, logger, logfile) do_tiga(dba, logger, logfile) # Dataset dataset_id = dba.ins_dataset({ 'name': 'ExtLinks', 'source': 'Tested links to target/protein info in external resources.', 'app': PROGRAM, 'app_version': __version__ }) assert dataset_id, f"Error inserting dataset. See logfile {logfile} for details." # Provenance rv = dba.ins_provenance({ 'dataset_id': dataset_id, 'table_name': 'extlink' }) assert rv, f"Error inserting provenance. See logfile {logfile} for details." elapsed = time.time() - start_time print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
'logger_name': __name__ } dba = DBAdaptor(dba_params) dbi = dba.get_dbinfo() logger.info( "Connected to TCRD database {} (schema ver {}; data ver {})".format( args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) if not args['--quiet']: print("Connected to TCRD database {} (schema ver {}; data ver {})". format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) start_time = time.time() load(args, dba, logger, logfile) # Dataset dataset_id = dba.ins_dataset({ 'name': 'DRGC Resources', 'source': 'RSS APIs at ', 'app': PROGRAM, 'app_version': __version__ }) assert dataset_id, f"Error inserting dataset See logfile {logfile} for details." # Provenance rv = dba.ins_provenance({ 'dataset_id': dataset_id, 'table_name': 'drgc_resource' }) assert rv, f"Error inserting provenance. See logfile {logfile} for details." elapsed = time.time() - start_time print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed)))
format(args['--dbname'], dbi['schema_ver'], dbi['data_ver'])) start_time = time.time() load(args, dba, logger, logfile) # Dataset dataset_id = dba.ins_dataset({ 'name': 'PubMed', 'source': 'NCBI E-Utils', 'app': PROGRAM, 'app_version': __version__, 'url': 'https://www.ncbi.nlm.nih.gov/pubmed' }) assert dataset_id, "Error inserting dataset See logfile {} for details.".format( logfile) # Provenance rv = dba.ins_provenance({'dataset_id': dataset_id, 'table_name': 'pubmed'}) assert rv, f"Error inserting provenance. See logfile {logfile} for details." rv = dba.ins_provenance({ 'dataset_id': dataset_id, 'table_name': 'protein2pubmed' }) assert rv, "Error inserting provenance. See logfile {} for details.".format( logfile) elapsed = time.time() - start_time print("\n{}: Done. Elapsed time: {}\n".format(PROGRAM, slmf.secs2str(elapsed))) # Use this to manually insert errors # In [26]: t = dba.get_target(18821, include_annotations=True) # In [27]: p = target['components']['protein'][0]