# coding: utf-8 import requests, json from IPython.display import display from datetime import datetime, timedelta import pandas as pd from bis2 import gc2 from bis import bis from bis import sgcn # Set up the actions/targets for this particular instance thisRun = {} thisRun["instance"] = "DataDistillery" thisRun["db"] = "BCB" thisRun["readAPI"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"], False) thisRun["writeAPI"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"], True) thisRun["commitToDB"] = True thisRun["totalRecordsToProcess"] = 1 thisRun["totalRecordsProcessed"] = 0 thisRun["dateCheck"] = None # Retrieve information from stored files on the SGCN base repository item sb_sgcnCollectionItem = requests.get( "https://www.sciencebase.gov/catalog/item/56d720ece4b015c306f442d5?format=json&fields=files" ).json() for file in sb_sgcnCollectionItem["files"]: if file["title"] == "Configuration:Taxonomic Group Mappings": tgMappings = pd.read_table(file["url"], sep=",", encoding="utf-8") elif file["title"] == "Original 2005 SWAP National List for reference": swap2005 = pd.read_table(file["url"])
elif file["title"] == "Original 2005 SWAP National List for reference": swap2005 = pd.read_table(file["url"]) tgDict = {} for index, row in tgMappings.iterrows(): providedName = str(row["ProvidedName"]) preferredName = str(row["PreferredName"]) tgDict[providedName] = preferredName # In[4]: # Set up the actions/targets for this particular instance thisRun = {} thisRun["instance"] = "DataDistillery" thisRun["db"] = "BCB" thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"]) thisRun["commitToDB"] = True thisRun["totalRecordsToProcess"] = 5000 thisRun["totalRecordsProcessed"] = 0 numberWithoutTIRData = 1 while numberWithoutTIRData == 1 and thisRun["totalRecordsProcessed"] < thisRun[ "totalRecordsToProcess"]: q_recordToSearch = "SELECT id, registration->>'scientificname' AS name_submitted, itis->>'nameWInd' AS name_itis, worms->>'valid_name' AS name_worms FROM tir.tir WHERE registration->>'source' = 'SGCN' AND sgcn->>'dateCached' IS NULL LIMIT 1" recordToSearch = requests.get(thisRun["baseURL"] + "&q=" + q_recordToSearch).json() numberWithoutTIRData = len(recordToSearch["features"])
# Note that this entire script needs to run in sequence. Because we are coming into this from the standpoint of external config files, it is much more efficient to use targeted SQL statements to update a whole set of records at a time in the TIR as opposed to looping every TIR record and pulling information from the config files. # In[12]: import requests from IPython.display import display import pandas as pd from bis2 import gc2 # In[16]: # Set up the actions/targets for this particular instance thisRun = {} thisRun["instance"] = "DataDistillery" thisRun["db"] = "BCB" thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"]) thisRun["commitToDB"] = True thisRun["resetSGCN"] = False # In[17]: # Reset SGCN annotation in TIR if thisRun["resetSGCN"]: print( requests.get( gc2.sqlAPI("DataDistillery", "BCB") + "&q=UPDATE tir.tir SET sgcn = NULL").json()) # In[18]: # Retrieve information from stored files on the SGCN base repository item
# In[1]: import requests, re, json from IPython.display import display from bis import worms from bis import bis from bis import tir from bis2 import gc2 # In[8]: # Set up the actions/targets for this particular instance thisRun = {} thisRun["instance"] = "DataDistillery" thisRun["db"] = "BCB" thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"]) thisRun["commitToDB"] = True thisRun["totalRecordsToProcess"] = 700 thisRun["totalRecordsProcessed"] = 0 thisRun[ "wormsNameService"] = "http://www.marinespecies.org/rest/AphiaRecordsByName/" thisRun[ "wormsIDService"] = "http://www.marinespecies.org/rest/AphiaRecordByAphiaID/" numberWithoutTIRData = 1 while numberWithoutTIRData == 1 and thisRun["totalRecordsProcessed"] < thisRun[ "totalRecordsToProcess"]: q_recordToSearch = "SELECT id, registration->>'source' AS source, registration->>'followTaxonomy' AS followtaxonomy, registration->>'taxonomicLookupProperty' AS taxonomiclookupproperty, registration->>'scientificname' AS scientificname, itis->>'nameWInd' AS nameWInd, itis->>'nameWOInd' AS nameWOInd FROM tir.tir WHERE worms IS NULL LIMIT 1" recordToSearch = requests.get( gc2.sqlAPI("DataDistillery", "BCB") + "&q=" + q_recordToSearch).json()
from bis import tir from bis import gap # In[ ]: sb = pysb.SbSession() username = input("Username: "******"instance"] = "DataDistillery" thisRun["db"] = "BCB" thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"]) _habitatMapCollectionID = "527d0a83e4b0850ea0518326" _gapSpecies = sb.get_child_ids(_habitatMapCollectionID) for item in _gapSpecies: print(item) checkTIR = requests.get( thisRun["baseURL"] + "&q=SELECT id FROM tir.tir WHERE registration->>'id' = '" + item + "'").json() if len(checkTIR["features"]) == 0: gapSpecies = gap.gapToTIR( sb.get_item(item, {'fields': 'identifiers,tags'})) display(gapSpecies) print(
import requests import json from datetime import datetime from IPython.display import display from bis import tir from bis import bis from bis2 import gc2 # In[2]: # Set up the actions/targets for this particular instance thisRun = {} thisRun["instance"] = "DataDistillery" thisRun["db"] = "BCB" thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"]) thisRun["commitToDB"] = True thisRun["totalRecordsToProcess"] = 1000 thisRun["totalRecordsProcessed"] = 0 numberWithoutTIRData = 1 while numberWithoutTIRData == 1 and thisRun["totalRecordsProcessed"] < thisRun[ "totalRecordsToProcess"]: q_recordToSearch = "SELECT scientificname_submitted scientificname FROM sgcn.sgcn WHERE scientificname_submitted NOT IN (SELECT registration->>'scientificname' AS scientificname FROM tir.tir WHERE registration->>'source' = 'SGCN') GROUP BY scientificname_submitted LIMIT 1" recordToSearch = requests.get(thisRun["baseURL"] + "&q=" + q_recordToSearch).json() numberWithoutTIRData = len(recordToSearch["features"])
# In[42]: import requests,datetime from IPython.display import display from bis import tir from bis2 import gc2 # ### Get data to process # # This script gathers all of the unique taxonomic names (scientificname_submitted) from the SGCN data and registers them with the Taxonomic Information Registry. The query checks to make sure the taxon name is not already registered in the TIR. # In[43]: q_sgcn = "SELECT scientificname_submitted scientificname, array_to_string(array_agg(DISTINCT CASE WHEN commonname_submitted <> '' THEN commonname_submitted ELSE NULL END),',') commonnames, array_to_string(array_agg(DISTINCT CASE WHEN sgcn_state <> '' THEN sgcn_state ELSE NULL END),',') sgcnstates, array_to_string(array_agg(DISTINCT CASE WHEN taxonomicgroup_submitted <> '' THEN taxonomicgroup_submitted ELSE NULL END),',') taxonomicgroups FROM sgcn.sgcn WHERE scientificname_submitted NOT IN ( SELECT registration->'scientificname' AS scientificname_submitted FROM tir.tir WHERE registration->'Source' = 'SGCN' ) GROUP BY scientificname_submitted" r_sgcn = requests.get(gc2.sqlAPI("DataDistillery","BCB")+"&q="+q_sgcn).json() # ### Iterate over the data and process to TIR # # This block iterates over the unique species returned, packages up the data for the registration, and inserts them into the TIR table. It requires the following: # # * TIR table set up in the appropriate GC2-basede data schema # * Registration field in the TIR table using the hstore data type # * gc2 module from the BIS2 package (connection info for the API) # * tir module from the BIS package (function to insert registration info) # In[44]: recordCount = 0