def download_GAP_range_CONUS2001v1(gap_id, toDir): """ Downloads GAP Range CONUS 2001 v1 file and returns path to the unzipped file. NOTE: doesn't include extension in returned path so that you can specify if you want csv or shp or xml when you use the path. """ import sciencebasepy import zipfile # Connect sb = sciencebasepy.SbSession() # Search for gap range item in ScienceBase gap_id = gap_id[0] + gap_id[1:5].upper() + gap_id[5] item_search = '{0}_CONUS_2001v1 Range Map'.format(gap_id) items = sb.find_items_by_any_text(item_search) # Get a public item. No need to log in. rng = items['items'][0]['id'] item_json = sb.get_item(rng) get_files = sb.get_item_files(item_json, toDir) # Unzip rng_zip = toDir + item_json['files'][0]['name'] zip_ref = zipfile.ZipFile(rng_zip, 'r') zip_ref.extractall(toDir) zip_ref.close() # Return path to range file without extension return rng_zip.replace('.zip', '')
def download_GAP_model_CONUS2001v1(gap_id, toDir): """ Gets GAP habitat models as JSONs. Pulls out summer NE and SE. Returns a list of dictionaries. """ import sciencebasepy import json # Connect sb = sciencebasepy.SbSession() # Search for gap range item in ScienceBase gap_id = gap_id[0] + gap_id[1:5].upper() + gap_id[5] item_search = '{0}_CONUS_HabModel_2001v1.json'.format(gap_id) items = sb.find_items_by_any_text(item_search) # Get a public item. No need to log in. mod = items['items'][0]['id'] item_json = sb.get_item(mod) get_files = sb.get_item_files(item_json, toDir) # Read in json file models = json.load(open(toDir + gap_id + "_CONUS_HabModel_2001v1.json")) models = [ models["models"][gap_id + "-s6"], models["models"][gap_id + "-s3"] ] if models[0]['ysnHandModel'] == True or models[1]['ysnHandModel'] == True: print('handmodel') else: return models
def stinson2019(): """Downloads all files from ScienceBase item number "5d67eacae4b0c4f70cf15be3" ## Data website ## https://www.sciencebase.gov/catalog/item/5d67eacae4b0c4f70cf15be3 ## Citation ## Stinson, K., Rapp, J., Ahmed, S., Lutz, D., Huish, R., Dufour, B., and Morelli, T.L., 2019, Sap Quantity at Study Sites in the Northeast: U.S. Geological Survey data release, https://doi.org/10.5066/P9H65YCC. """ sb = sciencebasepy.SbSession() raw_path = os.path.join("data", "raw", "stinson2019") processed_path = os.path.join("data", "processed", "stinson2019") if not os.path.exists(raw_path): os.makedirs(raw_path) if not os.path.exists(processed_path): os.makedirs(processed_path) item_json = sb.get_item("5d67eacae4b0c4f70cf15be3") sb.get_item_files(item_json, raw_path) # Convert data csv files to dataframes and pickle df = pd.read_csv(os.path.join(raw_path, 'ACERnet_sap_2012_2017_ID.csv'), parse_dates=['Date', 'Year']) df.columns = [x.lower().replace('.', '_') for x in list(df.columns)] df.to_pickle(os.path.join(processed_path, 'stinson2019_df')) locations = pd.read_csv(os.path.join(raw_path, 'ACERnet_LatLon.csv')) locations.to_pickle(os.path.join(processed_path, 'stinson2019_locations'))
def _get_url(params: Dict[str, str]): """ Call Science Base API with the argument params and return list of download URLs :param params: Science Base params object :return: List of HTTPS download URLs for items on S3 """ log = Logger('Download') log.info('Science base query: {}'.format(params)) sb = sciencebasepy.SbSession() items = sb.find_items(params) log.info('{} Science base item(s) identified.'.format(items['total'])) urls = [] while items and 'items' in items: for item in items['items']: result = sb.get_item(item['id']) for weblink in result['webLinks']: if weblink['type'] == 'download': urls.append(weblink['uri']) # pylintrc is freaking out about sb.next being "not callable" # I don't know what that means but it's just an annoyance items = sb.next(items) return urls
def get_zip_file_and_extract(self): """ Get item JSON, download zipfile, and extract it in current working directory :return: """ sb = pysb.SbSession() item = sb.get_item(self.item_id) zip_file = self.get_zip_file(item) download_uri = zip_file["downloadUri"] file_size = zip_file["size"] if download_uri is not None: self.zip_file = self.item_id + zip_file["name"] self.download_file(download_uri, file_size) self.extract_zip_file() else: raise Exception("No URI was found for zipfile download")
def download_GAP_range_CONUS2001v1(gap_id, toDir): """ Downloads GAP Range CONUS 2001 v1 file and returns path to the unzipped file. NOTE: doesn't include extension in returned path so that you can specify if you want csv or shp or xml when you use the path. """ import sciencebasepy import zipfile import requests from io import BytesIO # Connect sb = sciencebasepy.SbSession() # Search for gap range item in ScienceBase gap_id = gap_id[0] + gap_id[1:5].upper() + gap_id[5] item_search = '{0}_CONUS_2001v1 Range Map'.format(gap_id) items = sb.find_items_by_any_text(item_search) # Get a public item. No need to log in. rngID = items['items'][0]['id'] item_json = sb.get_item(rngID) flst = item_json['files'] zname = '{0}_CONUS_Range_2001v1.zip'.format(gap_id) # Use the GetIndex function to find the zip file's index value in the # JSON item's files list dictionaries of name keys zip_index = GetIndex(flst, 'name', zname) # Here's a way to do this without using the GetIndex function created above #zip_index=next((index for (index, d) in enumerate(flst) if d["name"] == zname), None) # Get the URL to the zip file containing the HUC CSV rngzipURL = item_json['files'][zip_index]['url'] r = requests.get(rngzipURL) z = zipfile.ZipFile(BytesIO(r.content)) # Get ONLY the HUC CSV file and extract it to the designated directory rngCSV = [ y for y in sorted(z.namelist()) for end in ['csv'] if y.endswith(end) ] csvFile = z.extract(rngCSV[0], toDir) z.close() # Return the extracted range CSV return csvFile
""" Change folder name to match XML title """ rename_dirs_from_xmls(parentdir) #%% Create SB page structure """ Create SB page structure: nested child pages following directory hierarchy Inputs: parent directory, landing page ID This one should overwrite the entire data release (excluding the landing page). """ # Check whether logged in. if not sb.is_logged_in(): print('Logging back in...') try: sb = pysb.SbSession(env=None).login(useremail,password) except NameError: sb = pysb.SbSession(env=None).loginc(useremail) # If there's no id_to_json.json file available, we need to create the subpage structure. if not update_subpages and not os.path.isfile(os.path.join(parentdir,'id_to_json.json')): print("id_to_json.json file is not in parent directory, so we will perform update_subpages routine.") update_subpages = True if update_subpages: dict_DIRtoID = setup_subparents(sb, parentdir, landing_id, imagefile) # Save dictionaries with open(os.path.join(parentdir,'dir_to_id.json'), 'w') as f: json.dump(dict_DIRtoID, f) else: # Import pre-created dictionaries if all SB pages exist with open(os.path.join(parentdir,'dir_to_id.json'), 'r') as f:
# ============================================================================= import sciencebasepy as sb import getpass import pandas as pd # ============================================================================= # Parameters # ============================================================================= page_url = "5ad77f06e4b0e2c2dd25e798" username = "******" password = getpass.getpass() # ============================================================================= # login and get child ids # ============================================================================= sb_session = sb.SbSession() sb_session.login(username, password) child_ids = sb_session.get_child_ids(page_url) # ============================================================================= # Change json # ============================================================================= data = [] for child_id in child_ids: try: child_json = sb_session.get_item(child_id) except: print("---> skipping child id {0}".format(child_id)) continue
import pandas as pd import json import sciencebasepy as sb import os import glob # Set paths to data projDir = "P:/Proj6/GAP-WVBA/" dataDir = projDir + "Data/" habitatDir = projDir + "Data/habmaps/" listDir = dataDir + 'Specieslists/WV_AtlasCodes.csv' resultsCSV = projDir + "Results/elevation_summary.csv" toDir = "C:/Temp/" #toDir = "T:/Temp/" # Connect sb = sb.SbSession() """ # Read in elevation.csv as a dataframe and save a copy in the archive, #run only once as spp list will be reread and duplicated each time inDF = pd.read_csv(resultsCSV, dtype={'GAP_code': 'string', 'common_name': 'string'}) timestamp = str(datetime.now(tz=None).strftime("%d%B%Y_%I%M%p")) archiveCSV = projDir + "/Results/Archive/elevation_" + timestamp + ".csv" newDF = inDF.copy(deep=True) sppList = pd.read_csv(listDir, dtype={'strUC': 'string', 'strCommonName': 'string'}) sppList.rename(columns = {'strUC':'GAP_code'}, inplace = True) sppList.rename(columns = {'strCommonName':'common_name'}, inplace = True) sppList.drop(['state_name', 'strScientificName_x','intHa', 'strScientificName_y', 'N_birds', 'N_points', 'Det_Rate'], axis='columns', index=None, columns=None,
usage() sys.exit(1) if username is None: username, password = get_user_pw_from_file('~/.pw') if username is None: print('No user specified') usage() sys.exit(2) if sbenv is None: print('No environment specified') usage() sys.exit(3) if istest: print('user='******'env=' + sbenv), print('items=' + str(item_ids)), print('undelete? ' + str(undelete)), print('children? ' + str(delete_only_children)), print('batchsize=' + str(batchsize)) if password: sb = pysb.SbSession(sbenv).login(username, password) else: sb = pysb.SbSession(sbenv).loginc(username) for item_id in item_ids: if undelete: undelete_items(item_ids) else: delete_items(item_ids, batchsize, delete_only_children)
CONUSArea = 8103534.7 # 12-Digit HUC CONUS total area in km2 nHUCs = 82717.0 # Number of 12-digit HUCS in CONUS cntLC = 9000763993.0 # Cell count of CONUS landcover excluding 0s cntLCnoW = 8501572144.0 # Cell count of CONUS landcover excluding 0s and water # Make an empty master dataframe dfMaster = pd.DataFrame() ''' Connect to ScienceBase to pull down a species list This uses the ScienceBase item for species habitat maps and searches for a CSV file with species info in it. The habitat map item has a unique id (527d0a83e4b0850ea0518326). If this changes, the code will need to be re-written. ''' sb = sciencebasepy.SbSession() habmapItem = sb.get_item("527d0a83e4b0850ea0518326") # Make a regular expression variable for the hab map csv file name pattern fnp = 'ScienceBaseHabMapCSV.+' for file in habmapItem["files"]: # Search for the file name pattern in the hab map item files dictionary fnMatch = re.search(fnp, file['name']) if fnMatch != None: try: dfSppCSV = pd.read_csv(StringIO(sb.get(file["url"]))) except: print('!! Could not find a CSV file name match !!') # Check to make sure the CSV file was returned if dfSppCSV is not None: print('-' * 55)
def __init__(self): self.sbpy = sciencebasepy.SbSession()