def gbif_serach(taxon_name): """ API to GBIF database. :param taxon_name: Scientific name of tree species (e.g 'Fagus sylvatica') """ try: from pygbif import occurrences as occ from pygbif import species polys = ["POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))", "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))", "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))", "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))", "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))", "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))", "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))", "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))", "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))", "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))", "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"] nm = species.name_backbone(taxon_name)['usageKey'] logger.info('Taxon Key found: %s' % nm) except Exception as e: logger.error('Taxon Key defining failed %s' % (e)) try: results = [] for i, p in enumerate(polys): res = [] x = occ.search(taxonKey = nm, geometry = p) res.append(x['results']) while not x['endOfRecords']: x = occ.search(taxonKey = nm, geometry = p, offset = sum([ len(x) for x in res ])) res.append(x['results']) results.append([w for z in res for w in z]) logger.info ('Polygon %s/%s done' % (i+1, len(polys))) logger.info('***** GBIF data fetching done! ***** ') except Exception as e: logger.error('Coordinate fetching failed: %s' % (e)) try: allres = [w for z in results for w in z] coords = [ { k: v for k, v in w.items() if k.startswith('decimal') } for w in allres ] from numpy import empty latlon = empty([len(coords),2], dtype=float, order='C') for i, coord in enumerate(coords): latlon[i][0] = coord['decimalLatitude'] latlon[i][1] = coord['decimalLongitude'] logger.info('read in PA coordinates for %s rows ' % len(ll[:,0])) except Exception as e: logger.error('failed search GBIF data %s' % (e)) return latlon
def get_gbif_results(self): first = occurrences.search(**self.gbif_query) results = first['results'] for offset in range(300, min(first['count'], 90000), 300): args = {**self.gbif_query, **{'offset': offset}} results += occurrences.search(**args)['results'] return {r['gbifID']: r for r in results}
def find_species_occurrences(self, **kwargs): """ Finds and loads species occurrence data into pandas DataFrame. Data comes from the GBIF database, based on name or gbif ID the occurrences.search(...) returns a list of json structures which we load into Pandas DataFrame for easier manipulation. """ try: species_result = species.name_backbone(name=self.name_species, verbose=False) if species_result['matchType'] == 'NONE': raise ValueError("No match for the species %s " % self.name_species) self.ID = species_result['usageKey'] first_res = occurrences.search(taxonKey=self.ID, limit=100000, **kwargs) except AttributeError: # name not provided, assume at least ID is provided first_res = occurrences.search(taxonKey=self.ID, limit=100000, **kwargs) #TODO: more efficient way than copying...appending to the same dataframe? full_results = copy.copy(first_res) # results are paginated so we need a loop to fetch them all counter = 1 while first_res['endOfRecords'] is False: first_res = occurrences.search(taxonKey=self.ID, offset=300 * counter, limit=10000) full_results['results'] = copy.copy( full_results['results']) + copy.copy(first_res['results']) counter += 1 logger.info("Loading species ... ") logger.info("Number of occurrences: %s " % full_results['count']) logger.debug(full_results['count'] == len( full_results['results'])) # match? #TODO: do we want a special way of loading? say, suggesting data types in some columns? #TODO: should we reformat the dtypes of the columns? at least day/month/year we care? #data_cleaned[['day', 'month', 'year']] = data_cleaned[['day', 'month', 'year']].fillna(0.0).astype(int) self.data_full = pd.DataFrame( full_results['results']) # load results in pandas dataframes if self.data_full.empty: logger.info("Could not retrieve any occurrences!") else: logger.info("Loaded species: %s " % self.data_full['species'].unique()) return self.data_full
def get_gbif(taxon_name='Fagus sylvatica', bbox=[-10, -10, 10, 10]): """ fetching species data from GBIF database ( pageing over polygons in Europe ) :param taxon_name: Taxon name of the species to be searched default='Fagus sylvatica' :param bbox: extention of georaphical region to fetch data e.g bbox=[-180,-90,180,90] :returns dic: Dictionay of species occurences """ from numpy import arange # nan, empty, from pygbif import occurrences as occ from pygbif import species logger.info('libs loaded in get_gbif function') try: nm = species.name_backbone(taxon_name)['usageKey'] logger.info('taxon name set') print('taxon name set') # generate polygons with gridwidth 10_degree # x_len = (bbox[2] - bbox[0] ) / 10 # y_len = (bbox[3] - bbox[1] ) / 10 # logger.info('length = %s , %s ' % (x_len, y_len)) polys = [] gridlen = 10 for x in arange(bbox[0], bbox[2], gridlen): for y in arange(bbox[1], bbox[3], gridlen): print 'processing %s , %s' % (x, y) poly = "POLYGON ((%s %s,%s %s,%s %s,%s %s,%s %s))" % \ (x, y, x, y + gridlen, x + gridlen, y + gridlen, x + gridlen, y, x, y) polys.extend([poly]) print(polys) logger.info('%s polygons created' % len(polys)) gbifdic = [] for i in polys: logger.info('processing polyon') res = [] x = occ.search(taxonKey=nm, geometry=i) res.append(x['results']) while not x['endOfRecords']: x = occ.search(taxonKey=nm, geometry=i, offset=sum([len(x) for x in res])) res.append(x['results']) gbifdic.append([w for z in res for w in z]) logger.info('polyon fetched') results = [w for z in gbifdic for w in z] except: msg = 'failed search GBIF data.' logger.exception(msg) raise return results
def gbif_serach(taxon_name): from numpy import nan, empty from pygbif import occurrences as occ from pygbif import species try: nm = species.name_backbone(taxon_name)['usageKey'] ## a set of WKT polygons polys = [ "POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))", "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))", "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))", "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))", "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))", "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))", "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))", "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))", "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))", "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))", "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))" ] results = [] for i in polys: res = [] x = occ.search(taxonKey=nm, geometry=i) res.append(x['results']) while not x['endOfRecords']: x = occ.search(taxonKey=nm, geometry=i, offset=sum([len(x) for x in res])) res.append(x['results']) results.append([w for z in res for w in z]) logger.info('polyon fetched') allres = [w for z in results for w in z] coords = [{k: v for k, v in w.items() if k.startswith('decimal')} for w in allres] latlon = empty([len(coords), 2], dtype=float, order='C') for i, coord in enumerate(coords): latlon[i][0] = Latitude latlon[i][1] = Longitude nz = (latlon == 0).sum(1) ll = latlon[nz == 0, :] logger.info('read in PA coordinates for %s rows ' % len(ll[:, 0])) except Exception as e: logger.exception('failed search GBIF data %s' % (e)) return ll
def get_occurences(sp): ''' Receive the specific code of each species to extract the occurences by country and year Args: sp(int) the number code of each species Returns Dictionary with the occurrence information ''' years = range(1970, 2020) x = [] for y in years: data = occ.search(taxonKey=sp, limit=300, country='ES', year=str(y)) #Get the number of occurrences: x.append({str(y): data['count']}) # Set all the data in an unique dictionary final = {} for d in x: for k in d.keys(): final[k] = final.get(k, 0) + d[k] return final
def getOccurrences(plantsName): occurrences_output = {} index = [ 'family', 'phylum', 'order', 'genus', 'species', 'class', 'recordedBy', 'decimalLatitude', 'decimalLongitude', 'eventDate', 'country', 'stateProvince', 'locality' ] for plantName in plantsName: if (config and config.l_plant): config.l_plant["text"] = plantName occurrences_plant = occurrences.search( scientificName=util.remove_author(plantName), continent='south_america') # print(json.dumps(occurrences_plant,indent=4)) occurrences_list = [] if (occurrences_plant.__contains__('count')): for result in occurrences_plant['results']: occurrence = {} for item in index: if result and result.__contains__(item): occurrence[item] = result[item] else: occurrence[item] = '' occurrences_list.append(occurrence) occurrences_output[plantName] = occurrences_list return occurrences_output
def GetGBIF(spp): ''' NOTE: Not all species have all required fields in their set of records. To avoid an error when combining species data, create a dictionary with the required fields and no data, making it into a dataframe. Then its possible to append this empty dataframe with all the data for a given species regardless of whether that field exists for that species. Subsequently, this dataframe can be thinned back to the required fields and reordered. Finally, it can appended to a master dataframe. ''' # Make an empty dataframe with required field names using an empty dictionary data0 = {'species':[],'vernacularName':[], 'decimalLatitude':[],'decimalLongitude':[], 'coordinateUncertaintyInMeters':[],'geodeticDatum':[], 'eventRemarks':[],'locality':[],'locationRemarks':[],'occurrenceRemarks':[], 'stateProvince':[],'year':[],'month':[], 'basisOfRecord':[],'taxonRank':[],'taxonomicStatus':[]} df0 = pd.DataFrame(data=data0, index=None) print('Working on the following species:', spp) # Make an empty list to store data iterations tablelst = [] n = 0 eor = False # Because the pyGBIF occurrences module returns only 300 records at a time, # loop through all the records for a given species until its # reached the end of the records, i.e. endOfRecords is True while eor == False: # Gather the occurrences dictionary using the appropriate criteria recs = occ.search(scientificName = spp, hasCoordinate=True, country='US', geoSpatialIssue=False, offset=n) #geoSpatialIssue=False # Not all species have COUNT in their occurrence record dictionary # !!!!!!!!! WHAT THE F**K GBIF !!!!!!!!!!!!!!! # If it does, print the count, otherwise print UNKNOWN RECORD COUNT if 'count' in recs: cnt = recs['count'] print(' This species has', cnt, 'records') else: #cnt = 0.9 print(' This species has an UNKNOWN RECORD COUNT') eor = recs['endOfRecords'] tablelst = tablelst + recs['results'] n+=300 # Make a dataframe out of the compiled lists df = pd.DataFrame(data=tablelst) # Append it to the empty dataframe dfAppended = df0.append(df, ignore_index=True, sort=False) # Thin out the final dataframe to only the required fields # and make sure they are in the appropriate order dfThinned = dfAppended[['species','vernacularName','decimalLatitude','decimalLongitude', 'coordinateUncertaintyInMeters','geodeticDatum', 'eventRemarks','locality','locationRemarks','occurrenceRemarks', 'stateProvince','year','month', 'basisOfRecord','taxonRank','taxonomicStatus']] return dfThinned
def test_search_occurrenceID(): "occurrences.search - diff occurrenceID" uuid = "a55e740b-55af-4029-9481-74e0e5049581" res = occurrences.search(occurrenceID = uuid) assert 'dict' == res.__class__.__name__ assert 6 == len(res) assert uuid == res['results'][0]['occurrenceID']
def GetGBIF(spp): # Make an empty dataframe with required field names using an empty dictionary df0 = pd.DataFrame() # Make an empty list to store data iterations tablelst = [] n = 0 eor = False # Because the pyGBIF occurrences module returns only 300 records at a time, # loop through all the records for a given species until its # reached the end of the records, i.e. endOfRecords is True while eor == False: # Gather the occurrences dictionary using the appropriate criteria recs = occ.search(scientificName=spp, hasCoordinate=True, country='US', geoSpatialIssue=False, offset=n) #geoSpatialIssue=False eor = recs['endOfRecords'] tablelst = tablelst + recs['results'] n += 300 # Make a dataframe out of the compiled lists df = pd.DataFrame(data=tablelst) # Append it to the empty dataframe dfAppended = df0.append(df, ignore_index=True, sort=False) return dfAppended
def get_taxonomic_info(sp): ''' Receive the specific code of each species to extract the taxonomic infrormation Args: sp(int) the number code of each species Returns Dictionary with the taxonomic information ''' data = occ.search(taxonKey=sp, limit=300, country='ES', year='2016') taxonomic = data["results"] for dictionary in taxonomic: species_ = dictionary["scientificName"] kingdom = dictionary["kingdom"] genus = dictionary['genus'] family = dictionary['family'] country = dictionary["country"] records = dictionary['basisOfRecord'] pub = dictionary['publishingCountry'] species_info = { 'species': species_, 'kingdom': kingdom, 'Genus': genus, 'Family': family, 'country': country, 'records': records, 'Publishing_country': pub } return species_info
def gbif_serach(taxon_name): from numpy import nan, empty from pygbif import occurrences as occ from pygbif import species try: nm = species.name_backbone(taxon_name)['usageKey'] ## a set of WKT polygons polys = ["POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))", "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))", "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))", "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))", "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))", "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))", "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))", "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))", "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))", "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))", "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"] results = [] for i in polys: res = [] x = occ.search(taxonKey = nm, geometry = i) res.append(x['results']) while not x['endOfRecords']: x = occ.search(taxonKey = nm, geometry = i, offset = sum([ len(x) for x in res ])) res.append(x['results']) results.append([w for z in res for w in z]) logger.info('polyon fetched') allres = [w for z in results for w in z] coords = [ { k: v for k, v in w.items() if k.startswith('decimal') } for w in allres ] latlon = empty([len(coords),2], dtype=float, order='C') for i , coord in enumerate(coords): latlon[i][0] = Latitude latlon[i][1] = Longitude nz = (latlon == 0).sum(1) ll = latlon[nz == 0, :] logger.info('read in PA coordinates for %s rows ' % len(ll[:,0])) except Exception as e: logger.exception('failed search GBIF data %s' % (e)) return ll
def has_occurrences(self, taxid, geometry=None, country=None): occ = occurrences.search( taxonKey=taxid, geometry=geometry, country=country, limit=1 ) self.logger.debug( "Ask for {} occurrences(s), got {}".format(1, len(occ["results"])) ) return len(occ["results"]) > 0
def find_species_occurrences(self, **kwargs): """ Finds and loads species occurrence data into pandas DataFrame. Data comes from the GBIF database, based on name or gbif ID the occurrences.search(...) returns a list of json structures which we load into Pandas DataFrame for easier manipulation. """ try: species_result = species.name_backbone(name=self.name_species, verbose=False) if species_result['matchType']=='NONE': raise ValueError("No match for the species %s " % self.name_species) self.ID = species_result['usageKey'] first_res = occurrences.search(taxonKey=self.ID, limit=100000, **kwargs) except AttributeError: # name not provided, assume at least ID is provided first_res = occurrences.search(taxonKey=self.ID, limit=100000, **kwargs) #TODO: more efficient way than copying...appending to the same dataframe? full_results = copy.copy(first_res) # results are paginated so we need a loop to fetch them all counter = 1 while first_res['endOfRecords'] is False: first_res = occurrences.search(taxonKey=self.ID, offset=300*counter, limit=10000) full_results['results'] = copy.copy(full_results['results']) + copy.copy(first_res['results']) counter+=1 logger.info("Loading species ... ") logger.info("Number of occurrences: %s " % full_results['count']) logger.debug(full_results['count'] == len(full_results['results'])) # match? #TODO: do we want a special way of loading? say, suggesting data types in some columns? #TODO: should we reformat the dtypes of the columns? at least day/month/year we care? #data_cleaned[['day', 'month', 'year']] = data_cleaned[['day', 'month', 'year']].fillna(0.0).astype(int) self.data_full = pd.DataFrame(full_results['results']) # load results in pandas dataframes if self.data_full.empty: logger.info("Could not retrieve any occurrences!") else: logger.info("Loaded species: %s " % self.data_full['species'].unique()) return self.data_full
def _get_gbif_occs(self): # get the gbif key for our species self.occfile = os.path.join( self.outputs_dir, self.profile['spname'].replace(" ", "_") + ".csv") if not self.key: self.key = species.name_backbone(name=self.profile['spname'], rank='species')['usageKey'] # make lists to fill self.lats = [] self.lons = [] # cycle through observations, filling lists of lat and lon curr_offset = 0 end_records = False while not end_records: occ_records = occ.search(taxonKey=self.key, hasCoordinate=True, decimalLatitude=','.join([ str(self.profile['ymin']), str(self.profile['ymax']) ]), decimalLongitude=','.join([ str(self.profile['xmin']), str(self.profile['xmax']) ]), offset=curr_offset) end_records = occ_records['endOfRecords'] curr_offset += occ_records['limit'] self.lons.extend( [i['decimalLongitude'] for i in occ_records['results']]) self.lats.extend( [i['decimalLatitude'] for i in occ_records['results']]) # prepare array to write to csv csvarr = np.vstack([ np.repeat(self.profile['spname'].replace(" ", "_"), len(self.lons)), self.lons, [ "{}{}".format(a_, b_) for a_, b_ in zip(self.lats, np.repeat('\n', len(self.lats))) ] ]).T # write occurrence data to csv with open(self.occfile, 'w') as f: f.write('Species,Longitude,Latitude\n') for line in csvarr: f.write(",".join(line)) # make these easier to work with downstream self.lons = np.array(self.lons) self.lats = np.array(self.lats)
def searching(name, myDate): now = datetime.datetime.now().strftime( "%Y-%m-%d") #pobranie aktualnej daty x = occ.search(q=name, eventDate=(myDate + ',' + now)) #wyszukanie frazy od daty do teraz dane = (x['results']) #zapisanie danych limit = len(dane) wektor = list() #wektor przechowujacy rekordy for i in range(limit): #zapisanie potrzebnych informacji z danych do list country = None scientificName = None eventDate = None recordedBy = None try: country = dane[i]['country'] except: pass try: scientificName = dane[i]['scientificName'] except: pass try: eventDate = dane[i]['eventDate'] except: pass try: recordedBy = dane[i]['recordedBy'] except: pass dataKey = dane[i]['key'] wektor.append((dataKey, country, scientificName, eventDate, recordedBy)) #połączenie wyników z list do jednej listy #wektor.sort(key=takeDate,reverse=True) #poszeregowanie zmiennych po dacie, od daty najwcześniejszej wektor_koncowy = list() for i in range(len(wektor)): wektor_koncowy.append((i + 1, wektor[i][0], wektor[i][1], wektor[i][2], wektor[i][3], wektor[i][4])) return wektor_koncowy
def get_coordinates(sp): ''' Extracts the taxonomic information of each species and the coordinates for each one of them. Args: List with the codes of the species that we want to download from the database Returns: Dataframe with 11 columns: long, lat, locality, year, month, kingdom, class, family, genus, species, common_name ''' long, lat, month, year_list, kingdom, class_, family, genus, species_, verna, locality = [], [], [], [], [], [], [], [], [], [], [] years = range(2000, 2021) for year in years: data = occ.search(taxonKey=sp, country='ES', year=str(year)) for i in data["results"]: month.append(i.get("month")) long.append(i.get("decimalLongitude")) lat.append(i.get("decimalLatitude")) year_list.append(i.get("year")) species_.append(i.get("scientificName")) kingdom.append(i.get("kingdom")) genus.append(i.get("genus")) family.append(i.get("family")) class_.append(i.get("class")) verna.append(i.get("vernacularName")) locality.append(i.get("locality")) df = pd.DataFrame(list( zip(long, lat, locality, year_list, month, kingdom, class_, family, genus, species_, verna)), columns=[ 'long', 'lat', "locality", "year", "month", "kingdom", "class", "family", "genus", "species", "common_name" ]) return df
def test_search_(): "occurrences.search - diff taxonKey2" res = occurrences.search(taxonKey = 2683264) assert 'dict' == res.__class__.__name__ assert 6 == len(res) assert 2683264 == res['results'][0]['taxonKey']
from pygbif import species, occurrences from numpy import nan, empty TName = "Fagus sylvatica" key = species.name_backbone(name=TName, rank="species")["usageKey"] n = occurrences.count(taxonKey=key, isGeoreferenced=True) if n > 200000: max = 200000 else: max = n results = occurrences.search(taxonKey=key, limit=max) print '(', key, ')', '-', format(n, ','), " ocurrence(s)" # lonslats = [] latlon = empty([max, 2], dtype=float, order='C') for i, x in enumerate(results["results"]): #try: #if x['continent'].find('_') != -1: #Continent = ' '.join(x['continent'].split('_')).title() #else: #Continent = x['continent'].capitalize() #except: #Continent = "" #try: #Country = x['country'] #except: #Country = "" #try:
def search_species(entry): # To be used when calling the method name_input = entry # User input for testing purposes # name_input = input("Enter a common species name: ") results = {} # Uses the pygbif method to find results suggest = species.name_suggest(q=name_input, rank='SPECIES', limit=25) # Pulls the results from the dataset in json format suggest_data = suggest['data']['results'] # print(suggest_data) # Formats the data for pythonic purposes data = suggest_data # Reads data for each result for o in data: # Finds the gbif key key = o['key'] # Uses pygbif to find number of occurrences of species key occurs = occurrences.count(taxonKey=key) # Searches occurrence data for the species key occur_search = occurrences.search(taxonKey=key) # print('occur search: ' + str(occur_search)) # for country in countries: # print(country) # print(occur_search) # Runs if species has occurred more than zero times if occurs > 0: try: # Tries to retrieve scientific name canon_name = o['canonicalName'] except: continue # Vernacular name init vern_name = '' # Variable for list of vernacular names names = o['vernacularNames'] # Summary init summary = '' try: # If match found print('Scientific name: ' + canon_name) print('Vernacular names: ') match_found = False # Reads from results in matched name for name in names: # Variable for vernacular name vern_name = name['vernacularName'] # Used if all languages want to be included # print(name['vernacularName']) language = (name['language']) # Can be changed if user wants to select a specific language if language == 'eng': # Checks if vernacular name is matched with search input if name_input in vern_name and match_found is False: match_found = True print(vern_name) # If no exact match is found it reads the first vernacular name if match_found is False: name_store = [] for name in names: vern_name = name['vernacularName'] language = (name['language']) if vern_name not in name_store: name_store.append(vern_name) if language == 'eng': print(vern_name) # Adds scientific and vernacular name to results dictionary results.setdefault(canon_name, []).append(vern_name) # print(wikipedia.search(canon_name)) try: # pulls the wiki page based on canonical name in url (usually works) # desc = wikipedia.page(canon_name, auto_suggest=True) # alternative wiki page including all sections # page = wikipedia.WikipediaPage(canon_name) # pulls the summary page for the species summary = wikipedia.summary(canon_name, sentences=2) # experimental for section pages # sections = page.sections # for section in sections: # print(section) # github push error, delete this print(summary) results.setdefault(canon_name, []).append(summary) # print(desc.content) except: print("No description found") print('GBIF Key: ' + str(key)) print('GBIF Species Page: ' + 'http://www.gbif.org/species/' + str(key)) print('Count: ' + str(occurs)) # This reads results for occurences and finds countries the species # was observed in and how many occurrences there occur_data = occur_search['results'] countries = {} for occur in occur_data: try: country = occur['country'] if country not in countries: countries[country] = 1 else: countries[country] += 1 # print(occur['country']) except: continue # for country in countries: # print(country) sorted_countries = sorted(countries.items(), key=lambda x: x[1], reverse=True) if countries != {}: print('Top 3 Countries Observed: ') for country in sorted_countries[:3]: print(str(country)) print('\n') # return canon_name, vern_name, summary except: continue print(results) return results
def test_search_key1(): "occurrences.search - diff taxonKey" res = occurrences.search(taxonKey=2431762) assert "dict" == res.__class__.__name__ assert 6 == len(res) assert 2431762 == res["results"][0]["taxonKey"]
sql_twi = """ SELECT coordinate FROM gbif_requests WHERE request_id = '{0}'""".format(config.gbif_req_id) coordinate = cursor2.execute(sql_twi).fetchone()[0] sql_twi = """ SELECT continent FROM gbif_requests WHERE request_id = '{0}'""".format(config.gbif_req_id) continent = cursor2.execute(sql_twi).fetchone()[0] if continent == "None": continent = None #################### REQUEST RECORDS ACCORDING TO REQUEST PARAMS # First, find out how many records there are that meet criteria occ_search = occurrences.search(gbif_id, year=years, month=months, decimelLatitude=latRange, decimelLongitude=lonRange, hasGeospatialIssue=geoIssue, hasCoordinate=coordinate, continent=continent) occ_count = occ_search['count'] print('\n{0} records exist with the request parameters'.format(occ_count)) # Get occurrences in batches, saving into master list alloccs = [] batches = range(0, occ_count, 300) for i in batches: occ_json = occurrences.search(gbif_id, limit=300, offset=i, year=years, month=months,
def test_search_(): "occurrences.search - diff taxonKey2" res = occurrences.search(taxonKey = 1052909293) assert 'dict' == res.__class__.__name__ assert 5 == len(res)
def main(): from dateutil.parser import parse try: from pygbif import occurrences from pygbif import species except ImportError: grass.fatal( _("Cannot import pygbif (https://github.com/sckott/pygbif)" " library." " Please install it (pip install pygbif)" " or ensure that it is on path" " (use PYTHONPATH variable).")) # Parse input options output = options["output"] mask = options["mask"] species_maps = flags["i"] no_region_limit = flags["r"] no_topo = flags["b"] print_species = flags["p"] print_species_table = flags["t"] print_species_shell = flags["g"] print_occ_number = flags["o"] allow_no_geom = flags["n"] hasGeoIssue = flags["s"] taxa_list = options["taxa"].split(",") institutionCode = options["institutioncode"] basisofrecord = options["basisofrecord"] recordedby = options["recordedby"].split(",") date_from = options["date_from"] date_to = options["date_to"] country = options["country"] continent = options["continent"] rank = options["rank"] # Define static variable # Initialize cat cat = 0 # Number of occurrences to fetch in one request chunk_size = 300 # lat/lon proj string latlon_crs = [ "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000", "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0", "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000 +type=crs", ] # List attributes available in Darwin Core # not all attributes are returned in each request # to avoid key errors when accessing the dictionary returned by pygbif # presence of DWC keys in the returned dictionary is checked using this list # The number of keys in this list has to be equal to the number of columns # in the attribute table and the attributes written for each occurrence dwc_keys = [ "key", "taxonRank", "taxonKey", "taxonID", "scientificName", "species", "speciesKey", "genericName", "genus", "genusKey", "family", "familyKey", "order", "orderKey", "class", "classKey", "phylum", "phylumKey", "kingdom", "kingdomKey", "eventDate", "verbatimEventDate", "startDayOfYear", "endDayOfYear", "year", "month", "day", "occurrenceID", "occurrenceStatus", "occurrenceRemarks", "Habitat", "basisOfRecord", "preparations", "sex", "type", "locality", "verbatimLocality", "decimalLongitude", "decimalLatitude", "coordinateUncertaintyInMeters", "geodeticDatum", "higerGeography", "continent", "country", "countryCode", "stateProvince", "gbifID", "protocol", "identifier", "recordedBy", "identificationID", "identifiers", "dateIdentified", "modified", "institutionCode", "lastInterpreted", "lastParsed", "references", "relations", "catalogNumber", "occurrenceDetails", "datasetKey", "datasetName", "collectionCode", "rights", "rightsHolder", "license", "publishingOrgKey", "publishingCountry", "lastCrawled", "specificEpithet", "facts", "issues", "extensions", "language", ] # Deinfe columns for attribute table cols = [ ("cat", "INTEGER PRIMARY KEY"), ("g_search", "varchar(100)"), ("g_key", "integer"), ("g_taxonrank", "varchar(50)"), ("g_taxonkey", "integer"), ("g_taxonid", "varchar(50)"), ("g_scientificname", "varchar(255)"), ("g_species", "varchar(255)"), ("g_specieskey", "integer"), ("g_genericname", "varchar(255)"), ("g_genus", "varchar(50)"), ("g_genuskey", "integer"), ("g_family", "varchar(50)"), ("g_familykey", "integer"), ("g_order", "varchar(50)"), ("g_orderkey", "integer"), ("g_class", "varchar(50)"), ("g_classkey", "integer"), ("g_phylum", "varchar(50)"), ("g_phylumkey", "integer"), ("g_kingdom", "varchar(50)"), ("g_kingdomkey", "integer"), ("g_eventdate", "text"), ("g_verbatimeventdate", "varchar(50)"), ("g_startDayOfYear", "integer"), ("g_endDayOfYear", "integer"), ("g_year", "integer"), ("g_month", "integer"), ("g_day", "integer"), ("g_occurrenceid", "varchar(255)"), ("g_occurrenceStatus", "varchar(50)"), ("g_occurrenceRemarks", "varchar(50)"), ("g_Habitat", "varchar(50)"), ("g_basisofrecord", "varchar(50)"), ("g_preparations", "varchar(50)"), ("g_sex", "varchar(50)"), ("g_type", "varchar(50)"), ("g_locality", "varchar(255)"), ("g_verbatimlocality", "varchar(255)"), ("g_decimallongitude", "double precision"), ("g_decimallatitude", "double precision"), ("g_coordinateUncertaintyInMeters", "double precision"), ("g_geodeticdatum", "varchar(50)"), ("g_higerGeography", "varchar(255)"), ("g_continent", "varchar(50)"), ("g_country", "varchar(50)"), ("g_countryCode", "varchar(50)"), ("g_stateProvince", "varchar(50)"), ("g_gbifid", "varchar(255)"), ("g_protocol", "varchar(255)"), ("g_identifier", "varchar(50)"), ("g_recordedby", "varchar(255)"), ("g_identificationid", "varchar(255)"), ("g_identifiers", "text"), ("g_dateidentified", "text"), ("g_modified", "text"), ("g_institutioncode", "varchar(50)"), ("g_lastinterpreted", "text"), ("g_lastparsed", "text"), ("g_references", "varchar(255)"), ("g_relations", "text"), ("g_catalognumber", "varchar(50)"), ("g_occurrencedetails", "text"), ("g_datasetkey", "varchar(50)"), ("g_datasetname", "varchar(255)"), ("g_collectioncode", "varchar(50)"), ("g_rights", "varchar(255)"), ("g_rightsholder", "varchar(255)"), ("g_license", "varchar(50)"), ("g_publishingorgkey", "varchar(50)"), ("g_publishingcountry", "varchar(50)"), ("g_lastcrawled", "text"), ("g_specificepithet", "varchar(50)"), ("g_facts", "text"), ("g_issues", "text"), ("g_extensions", "text"), ("g_language", "varchar(50)"), ] # maybe no longer required in Python3 set_output_encoding() # Set temporal filter if requested by user # Initialize eventDate filter eventDate = None # Check if date from is compatible (ISO compliant) if date_from: try: parse(date_from) except: grass.fatal("Invalid invalid start date provided") if date_from and not date_to: eventDate = "{}".format(date_from) # Check if date to is compatible (ISO compliant) if date_to: try: parse(date_to) except: grass.fatal("Invalid invalid end date provided") # Check if date to is after date_from if parse(date_from) < parse(date_to): eventDate = "{},{}".format(date_from, date_to) else: grass.fatal( "Invalid date range: End date has to be after start date!") # Set filter on basisOfRecord if requested by user if basisofrecord == "ALL": basisOfRecord = None else: basisOfRecord = basisofrecord # Allow also occurrences with spatial issues if requested by user hasGeospatialIssue = False if hasGeoIssue: hasGeospatialIssue = True # Allow also occurrences without coordinates if requested by user hasCoordinate = True if allow_no_geom: hasCoordinate = False # Set reprojection parameters # Set target projection of current LOCATION proj_info = grass.parse_command("g.proj", flags="g") target_crs = grass.read_command("g.proj", flags="fj").rstrip() target = osr.SpatialReference() # Prefer EPSG CRS definitions if proj_info["epsg"]: target.ImportFromEPSG(int(proj_info["epsg"])) else: target.ImportFromProj4(target_crs) # GDAL >= 3 swaps x and y axis, see: github.com/gdal/issues/1546 if int(gdal_version[0]) >= 3: target.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) if target_crs == "XY location (unprojected)": grass.fatal("Sorry, XY locations are not supported!") # Set source projection from GBIF source = osr.SpatialReference() source.ImportFromEPSG(4326) # GDAL >= 3 swaps x and y axis, see: github.com/gdal/issues/1546 if int(gdal_version[0]) >= 3: source.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) if target_crs not in latlon_crs: transform = osr.CoordinateTransformation(source, target) reverse_transform = osr.CoordinateTransformation(target, source) # Generate WKT polygon to use for spatial filtering if requested if mask: if len(mask.split("@")) == 2: m = VectorTopo(mask.split("@")[0], mapset=mask.split("@")[1]) else: m = VectorTopo(mask) if not m.exist(): grass.fatal("Could not find vector map <{}>".format(mask)) m.open("r") if not m.is_open(): grass.fatal("Could not open vector map <{}>".format(mask)) # Use map Bbox as spatial filter if map contains <> 1 area if m.number_of("areas") == 1: region_pol = [area.to_wkt() for area in m.viter("areas")][0] else: bbox = (str(m.bbox()).replace("Bbox(", "").replace( " ", "").rstrip(")").split(",")) region_pol = "POLYGON (({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))".format( bbox[2], bbox[0], bbox[3], bbox[1]) m.close() else: # Do not limit import spatially if LOCATION is able to take global data if no_region_limit: if target_crs not in latlon_crs: grass.fatal("Import of data from outside the current region is" "only supported in a WGS84 location!") region_pol = None else: # Limit import spatially to current region # if LOCATION is !NOT! able to take global data # to avoid pprojection ERRORS region = grass.parse_command("g.region", flags="g") region_pol = "POLYGON (({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format( region["e"], region["n"], region["w"], region["s"]) # Do not reproject in latlon LOCATIONS if target_crs not in latlon_crs: pol = ogr.CreateGeometryFromWkt(region_pol) pol.Transform(reverse_transform) pol = pol.ExportToWkt() else: pol = region_pol # Create output map if not output maps for each species are requested if (not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table): mapname = output new = Vector(mapname) new.open("w", tab_name=mapname, tab_cols=cols) cat = 1 # Import data for each species for s in taxa_list: # Get the taxon key if not the taxon key is provided as input try: key = int(s) except: try: species_match = species.name_backbone(s, rank=rank, strict=False, verbose=True) key = species_match["usageKey"] except: grass.error( "Data request for taxon {} failed. Are you online?".format( s)) continue # Return matching taxon and alternatives and exit if print_species: print("Matching taxon for {} is:".format(s)) print("{} {}".format(species_match["scientificName"], species_match["status"])) if "alternatives" in list(species_match.keys()): print("Alternative matches might be: {}".format(s)) for m in species_match["alternatives"]: print("{} {}".format(m["scientificName"], m["status"])) else: print("No alternatives found for the given taxon") continue if print_species_shell: print("match={}".format(species_match["scientificName"])) if "alternatives" in list(species_match.keys()): alternatives = [] for m in species_match["alternatives"]: alternatives.append(m["scientificName"]) print("alternatives={}".format(",".join(alternatives))) continue if print_species_table: if "alternatives" in list(species_match.keys()): if len(species_match["alternatives"]) == 0: print("{0}|{1}|{2}|".format( s, key, species_match["scientificName"])) else: alternatives = [] for m in species_match["alternatives"]: alternatives.append(m["scientificName"]) print("{0}|{1}|{2}|{3}".format( s, key, species_match["scientificName"], ",".join(alternatives), )) continue try: returns_n = occurrences.search( taxonKey=key, hasGeospatialIssue=hasGeospatialIssue, hasCoordinate=hasCoordinate, institutionCode=institutionCode, basisOfRecord=basisOfRecord, recordedBy=recordedby, eventDate=eventDate, continent=continent, country=country, geometry=pol, limit=1, )["count"] except: grass.error( "Data request for taxon {} faild. Are you online?".format(s)) returns_n = 0 # Exit if search does not give a return # Print only number of returns for the given search and exit if print_occ_number: print("Found {0} occurrences for taxon {1}...".format( returns_n, s)) continue elif returns_n <= 0: grass.warning( "No occurrences for current search for taxon {0}...".format(s)) continue elif returns_n >= 200000: grass.warning( "Your search for {1} returns {0} records.\n" "Unfortunately, the GBIF search API is limited to 200,000 records per request.\n" "The download will be incomplete. Please consider to split up your search." .format(returns_n, s)) # Get the number of chunks to download chunks = int(math.ceil(returns_n / float(chunk_size))) grass.verbose("Downloading {0} occurrences for taxon {1}...".format( returns_n, s)) # Create a map for each species if requested using map name as suffix if species_maps: mapname = "{}_{}".format(s.replace(" ", "_"), output) new = Vector(mapname) new.open("w", tab_name=mapname, tab_cols=cols) cat = 0 # Download the data from GBIF for c in range(chunks): # Define offset offset = c * chunk_size # Adjust chunk_size to the hard limit of 200,000 records in GBIF API # if necessary if offset + chunk_size >= 200000: chunk_size = 200000 - offset # Get the returns for the next chunk returns = occurrences.search( taxonKey=key, hasGeospatialIssue=hasGeospatialIssue, hasCoordinate=hasCoordinate, institutionCode=institutionCode, basisOfRecord=basisOfRecord, recordedBy=recordedby, eventDate=eventDate, continent=continent, country=country, geometry=pol, limit=chunk_size, offset=offset, ) # Write the returned data to map and attribute table for res in returns["results"]: if target_crs not in latlon_crs: point = ogr.CreateGeometryFromWkt("POINT ({} {})".format( res["decimalLongitude"], res["decimalLatitude"])) point.Transform(transform) x = point.GetX() y = point.GetY() else: x = res["decimalLongitude"] y = res["decimalLatitude"] point = Point(x, y) for k in dwc_keys: if k not in list(res.keys()): res.update({k: None}) cat = cat + 1 new.write( point, cat=cat, attrs=( "{}".format(s), res["key"], res["taxonRank"], res["taxonKey"], res["taxonID"], res["scientificName"], res["species"], res["speciesKey"], res["genericName"], res["genus"], res["genusKey"], res["family"], res["familyKey"], res["order"], res["orderKey"], res["class"], res["classKey"], res["phylum"], res["phylumKey"], res["kingdom"], res["kingdomKey"], "{}".format(res["eventDate"]) if res["eventDate"] else None, "{}".format(res["verbatimEventDate"]) if res["verbatimEventDate"] else None, res["startDayOfYear"], res["endDayOfYear"], res["year"], res["month"], res["day"], res["occurrenceID"], res["occurrenceStatus"], res["occurrenceRemarks"], res["Habitat"], res["basisOfRecord"], res["preparations"], res["sex"], res["type"], res["locality"], res["verbatimLocality"], res["decimalLongitude"], res["decimalLatitude"], res["coordinateUncertaintyInMeters"], res["geodeticDatum"], res["higerGeography"], res["continent"], res["country"], res["countryCode"], res["stateProvince"], res["gbifID"], res["protocol"], res["identifier"], res["recordedBy"], res["identificationID"], ",".join(res["identifiers"]), "{}".format(res["dateIdentified"]) if res["dateIdentified"] else None, "{}".format(res["modified"]) if res["modified"] else None, res["institutionCode"], "{}".format(res["lastInterpreted"]) if res["lastInterpreted"] else None, "{}".format(res["lastParsed"]) if res["lastParsed"] else None, res["references"], ",".join(res["relations"]), res["catalogNumber"], "{}".format(res["occurrenceDetails"]) if res["occurrenceDetails"] else None, res["datasetKey"], res["datasetName"], res["collectionCode"], res["rights"], res["rightsHolder"], res["license"], res["publishingOrgKey"], res["publishingCountry"], "{}".format(res["lastCrawled"]) if res["lastCrawled"] else None, res["specificEpithet"], ",".join(res["facts"]), ",".join(res["issues"]), ",".join(res["extensions"]), res["language"], ), ) cat = cat + 1 # Close the current map if a map for each species is requested if species_maps: new.table.conn.commit() new.close() if not no_topo: grass.run_command("v.build", map=mapname, option="build") # Write history to map grass.vector_history(mapname) # Close the output map if not a map for each species is requested if (not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table): new.table.conn.commit() new.close() if not no_topo: grass.run_command("v.build", map=mapname, option="build") # Write history to map grass.vector_history(mapname)
def test_search_(): "occurrences.search - diff taxonKey2" res = occurrences.search(taxonKey=2683264) assert 'dict' == res.__class__.__name__ assert 6 == len(res) assert 2683264 == res['results'][0]['taxonKey']
def get_gbif_occs(self, geometry=False, tol=0): """ Query the gbif database for occurrence data. """ # Create a file to store occurrence data. self.occfile = os.path.join( self.outdir, self.params['spname'].replace(" ", "_") + ".csv") # Get the usageKey for species of interest. self.key = species.name_backbone(name=self.params['spname'], rank='species')['usageKey'] # Create latitude/longitude lists. self.lats = [] self.lons = [] # Build dicts for optional params. # if self.params['basis'] == True: basis_params = dict(basisOfRecord=[ 'HUMAN_OBSERVATION', 'LIVING_SPECIMEN', 'FOSSIL_SPECIMEN' ], ) # if self.params['continent'] is not None: continent_params = dict(continent=self.params['continent']) if geometry == True: geo_orient = shapely.geometry.polygon.orient( self.geometry['geometry'][0], 1.0) # Counter-clockwise for GBIF. geometry_bounds = dict(geometry=str(geo_orient.simplify(tol))) else: geometry_bounds = dict(place='holder') search_bounds = dict( decimalLatitude=','.join( [str(self.params['ymin']), str(self.params['ymax'])]), decimalLongitude=','.join( [str(self.params['xmin']), str(self.params['xmax'])]), ) # Run a while-loop to go through all observations. By default, tries to narrow to native range. # Don't pass lat/long bounds if none were entered. curr_offset = 0 end_records = False while not end_records: occ_records = occ.search( taxonKey=self.key, hasCoordinate=True, # hasGeospatialIssue = False, **{ k: v for k, v in basis_params.items() if self.params['basis'] == True }, **{ k: v for k, v in continent_params.items() if self.params['continent'] is not None }, **{ k: v for k, v in geometry_bounds.items() if geometry == True }, **{k: v for k, v in search_bounds.items() if 'None' not in v}, offset=curr_offset) end_records = occ_records['endOfRecords'] curr_offset += occ_records['limit'] # Add latitude/longitude results to lists. self.lats.extend( [i['decimalLatitude'] for i in occ_records['results']]) self.lons.extend( [i['decimalLongitude'] for i in occ_records['results']]) # Print a dot on each cycle to show progress. print(".", end="") # When end of data is reached: build pandas dataframe from lists and remove duplicate data points. if occ_records['endOfRecords']: df = pd.DataFrame({ 'Latitude': self.lats, 'Longitude': self.lons }) df = df.drop_duplicates().reset_index() df = df.drop('index', axis=1) # Filter outliers. df = df[(np.abs(stats.zscore(df)) < 3).all(axis=1)] # Reform the lists by subsetting the dataframe. self.lats = list(df['Latitude']) self.lons = list(df['Longitude']) # Print final number of records. print(f' Found {len(self.lats)} records.') # Build array to write to CSV file. np.vstack layers arrays vertically, where each layer is species-lat-lon. # np.repeat copies the species names as many times as there are entries. It also combines with zip() to put # a newline char at the end of each layer. csvarr = np.vstack([ np.repeat(self.params['spname'].replace(" ", "_"), len(self.lats)), self.lats, [ "{}{}".format(a_, b_) for a_, b_ in zip(self.lons, np.repeat('\n', len(self.lats))) ] ]).T # Write array to CSV file. with open(self.occfile, 'w') as f: f.write('Species,Latitude,Longitude\n') for line in csvarr: f.write(",".join(line)) # Transform lists to arrays for downstream application. self.lats = np.array(self.lats) self.lons = np.array(self.lons)
def hogSearch(left, right, top, bottom): l = left r = right t = top b = bottom coords = latStat(l, r, t, b) hog = occ.search(decimalLatitude=coords[0], decimalLongitude=coords[1], scientificName='Sus scrofa') print(hog['count']) dist = 0 while hog['count'] == 0: dist += 0.7 l -= .01 r += .01 t += .01 b -= .01 coords = latStat(l, r, t, b) print(coords[0], coords[1]) hog = occ.search(decimalLatitude=coords[0], decimalLongitude=coords[1], scientificName='Sus scrofa') hogHistoric = [] hog2019 = [] nDic = hog['results'] print(nDic) for sightings in nDic: si = sightings["eventDate"] hogHistoric.append(si) if '2019' in sightings["eventDate"]: hog2019.append(si) pigs2019 = [] for hd in hog2019: nd = hd[:10] dd = nd.split('-', 2) pigs2019.append(datetime(int(dd[0]), int(dd[1]), int(dd[2]))) allpigs = [] for allHog in hogHistoric: nd = allHog[:10] dd = nd.split('-', 2) allpigs.append(datetime(int(dd[0]), int(dd[1]), int(dd[2]))) if pigs2019 != []: dayDif = str( round( (abs(datetime.now() - max(pigs2019)).total_seconds()) / 86400, 2)) bigDay = str(max(pigs2019))[:10] else: dayDif = str( round((abs(datetime.now() - max(allpigs)).total_seconds()) / 86400, 2)) bigDay = str(max(allpigs))[:10] print(dayDif) print(bigDay) str1 = "WARNING: WE FOUND " + str( len(hogHistoric)) + " HOG RECORDS IN YOUR AREA. " str2 = "MOST RECENT HOG WAS SEEN " + dayDif + " DAYS AGO. CHOOSE A MORE SPECIFIC LOCATION FOR BETTER RESULTS." pigPic = [] for ss in nDic: print(ss["eventDate"][:10]) if ss["eventDate"][:10] == str(bigDay): pigPic = ss["media"] break str3 = pigPic[0]['identifier'] if dist != 0: totS = "WARNING: A HOG WAS SPOTTED " + str( dist) + " MILES FROM YOU " + dayDif + " DAYS AGO. " else: totS = str1 + str2 hogDic = [totS, str3] return hogDic
df0 = pd.DataFrame() reclst = [] lstcols = ['ScienticName', 'nRecords'] print('+' * 60) print('\n') for spp in sppList: print('Working on the following species:', spp) # First use the species module to get the taxonKey for a species scientific name tkey = species.name_backbone(name=spp, rank='species')['usageKey'] # Gather the occurrences dictionary using the appropriate criteria recs = occ.search(taxonKey=tkey, hasCoordinate=True, country='US', geoSpatialIssue=False) if 'count' in recs: cnt = recs['count'] print(' This species has', cnt, 'records') else: print(' This species has an UNKNOWN RECORD COUNT') cnt = -99 reclst.append([spp, cnt]) print('+' * 60) # Make a dataframe out of the compiled lists and save as CSV dfRecordCount = pd.DataFrame(data=reclst, columns=lstcols) dfRecordCount.to_csv(workDir + "SpeciesRecordCountGBIF.csv")
alloccs2 = [] for x in alloccs: alloccs2.append(dict((y, x[y]) for y in x if y in keykeys)) ################################################## FILTER MORE ############################################################### # COORDINATE UNCERTAINTY sql_green = """SELECT has_coordinate_uncertainty FROM gbif_filters WHERE filter_id = '{0}';""".format(config.gbif_filter_id) filt_coordUncertainty = cursor2.execute(sql_green).fetchone()[0] if filt_coordUncertainty == 1: alloccs3 = [ x for x in alloccs2 if 'coordinateUncertaintyInMeters' in x.keys() ] if filt_coordUncertainty == 0: alloccs3 = alloccs2 #___________________________________8 test_occs = occurrences.search(gbif_id, year=years, month='1,12', decimelLatitude=latRange, decimelLongitude=lonRange, hasGeospatialIssue=geoIssue, hasCoordinate=True, continent=continent) occ_count = test_occs['count'] print('{0} records exist'.format(occ_count))
def test_search(): "occurrences.search - basic test" res = occurrences.search(taxonKey = 3329049) assert 'dict' == res.__class__.__name__ assert 6 == len(res) assert sorted(keyz) == sorted(res.keys())
def test_search_(): "occurrences.search - diff taxonKey" res = occurrences.search(taxonKey = 252408386) assert 'dict' == res.__class__.__name__ assert 24 == len(res) assert 252408386 == res['key']
def gbif_serach(taxon_name): """ API to GBIF database. :param taxon_name: Scientific name of tree species (e.g 'Fagus sylvatica') """ try: from pygbif import occurrences as occ from pygbif import species polys = [ "POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))", "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))", "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))", "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))", "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))", "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))", "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))", "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))", "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))", "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))", "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))" ] nm = species.name_backbone(taxon_name)['usageKey'] logger.info('Taxon Key found: %s' % nm) except Exception as e: logger.error('Taxon Key defining failed %s' % (e)) try: results = [] for i, p in enumerate(polys): res = [] x = occ.search(taxonKey=nm, geometry=p) res.append(x['results']) while not x['endOfRecords']: x = occ.search(taxonKey=nm, geometry=p, offset=sum([len(x) for x in res])) res.append(x['results']) results.append([w for z in res for w in z]) logger.info('Polygon %s/%s done' % (i + 1, len(polys))) logger.info('***** GBIF data fetching done! ***** ') except Exception as e: logger.error('Coordinate fetching failed: %s' % (e)) try: allres = [w for z in results for w in z] coords = [{k: v for k, v in w.items() if k.startswith('decimal')} for w in allres] from numpy import empty latlon = empty([len(coords), 2], dtype=float, order='C') for i, coord in enumerate(coords): latlon[i][0] = coord['decimalLatitude'] latlon[i][1] = coord['decimalLongitude'] logger.info('read in PA coordinates for %s rows ' % len(ll[:, 0])) except Exception as e: logger.error('failed search GBIF data %s' % (e)) return latlon
def test_search(): "occurrences.search - basic test" res = occurrences.search(taxonKey = 3329049) assert 'dict' == res.__class__.__name__ assert 5 == len(res) assert [u'count', u'endOfRecords', u'limit', u'results', u'offset'] == res.keys()
def test_search_identified_by_id(): "occurrences.search - identifiedByID" res = occurrences.search(identifiedByID=x, limit=3) assert "dict" == res.__class__.__name__ assert 6 == len(res) assert x == res["results"][0]["identifiedByIDs"][0]['value']
from pygbif import species, occurrences from numpy import nan, empty TName = "Fagus sylvatica" key = species.name_backbone(name=TName, rank="species")["usageKey"] n = occurrences.count(taxonKey=key, isGeoreferenced=True) if n > 200000: max = 200000 else: max = n results = occurrences.search(taxonKey=key, limit=max) print '(', key, ')', '-', format(n, ','), " ocurrence(s)" # lonslats = [] latlon = empty([max,2], dtype=float, order='C') for i, x in enumerate(results["results"]): #try: #if x['continent'].find('_') != -1: #Continent = ' '.join(x['continent'].split('_')).title() #else: #Continent = x['continent'].capitalize() #except: #Continent = "" #try: #Country = x['country'] #except: #Country = "" #try:
def main(): try: from pygbif import occurrences from pygbif import species except ImportError: grass.fatal(_("Cannot import pygbif (https://github.com/sckott/pygbif)" " library." " Please install it (pip install pygbif)" " or ensure that it is on path" " (use PYTHONPATH variable).")) # Parse input options output = options['output'] mask = options['mask'] species_maps = flags['i'] no_region_limit = flags['r'] no_topo = flags['b'] print_species = flags['p'] print_species_table = flags['t'] print_species_shell = flags['g'] print_occ_number = flags['o'] allow_no_geom = flags['n'] hasGeoIssue = flags['s'] taxa_list = options['taxa'].split(',') institutionCode = options['institutioncode'] basisofrecord = options['basisofrecord'] recordedby = options['recordedby'].split(',') date_from = options['date_from'] date_to = options['date_to'] country = options['country'] continent = options['continent'] rank = options['rank'] # Define static variable #Initialize cat cat = 0 # Number of occurrences to fetch in one request chunk_size = 300 # lat/lon proj string latlon_crs = ['+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000', '+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0'] # List attributes available in Darwin Core # not all attributes are returned in each request # to avoid key errors when accessing the dictionary returned by pygbif # presence of DWC keys in the returned dictionary is checked using this list # The number of keys in this list has to be equal to the number of columns # in the attribute table and the attributes written for each occurrence dwc_keys = ['key', 'taxonRank', 'taxonKey', 'taxonID', 'scientificName', 'species', 'speciesKey', 'genericName', 'genus', 'genusKey', 'family', 'familyKey', 'order', 'orderKey', 'class', 'classKey', 'phylum', 'phylumKey', 'kingdom', 'kingdomKey', 'eventDate', 'verbatimEventDate', 'startDayOfYear', 'endDayOfYear', 'year', 'month', 'day', 'occurrenceID', 'occurrenceStatus', 'occurrenceRemarks', 'Habitat', 'basisOfRecord', 'preparations', 'sex', 'type', 'locality', 'verbatimLocality', 'decimalLongitude', 'decimalLatitude', 'geodeticDatum', 'higerGeography', 'continent', 'country', 'countryCode', 'stateProvince', 'gbifID', 'protocol', 'identifier', 'recordedBy', 'identificationID', 'identifiers', 'dateIdentified', 'modified', 'institutionCode', 'lastInterpreted', 'lastParsed', 'references', 'relations', 'catalogNumber', 'occurrenceDetails', 'datasetKey', 'datasetName', 'collectionCode', 'rights', 'rightsHolder', 'license', 'publishingOrgKey', 'publishingCountry', 'lastCrawled', 'specificEpithet', 'facts', 'issues', 'extensions', 'language'] # Deinfe columns for attribute table cols = [('cat', 'INTEGER PRIMARY KEY'), ('g_search', 'varchar(100)'), ('g_key', 'integer'), ('g_taxonrank', 'varchar(50)'), ('g_taxonkey', 'integer'), ('g_taxonid', 'varchar(50)'), ('g_scientificname', 'varchar(255)'), ('g_species', 'varchar(255)'), ('g_specieskey', 'integer'), ('g_genericname', 'varchar(255)'), ('g_genus', 'varchar(50)'), ('g_genuskey', 'integer'), ('g_family', 'varchar(50)'), ('g_familykey', 'integer'), ('g_order', 'varchar(50)'), ('g_orderkey', 'integer'), ('g_class', 'varchar(50)'), ('g_classkey', 'integer'), ('g_phylum', 'varchar(50)'), ('g_phylumkey', 'integer'), ('g_kingdom', 'varchar(50)'), ('g_kingdomkey', 'integer'), ('g_eventdate', 'text'), ('g_verbatimeventdate', 'varchar(50)'), ('g_startDayOfYear', 'integer'), ('g_endDayOfYear', 'integer'), ('g_year', 'integer'), ('g_month', 'integer'), ('g_day', 'integer'), ('g_occurrenceid', 'varchar(255)'), ('g_occurrenceStatus', 'varchar(50)'), ('g_occurrenceRemarks', 'varchar(50)'), ('g_Habitat', 'varchar(50)'), ('g_basisofrecord', 'varchar(50)'), ('g_preparations', 'varchar(50)'), ('g_sex', 'varchar(50)'), ('g_type', 'varchar(50)'), ('g_locality', 'varchar(255)'), ('g_verbatimlocality', 'varchar(255)'), ('g_decimallongitude', 'double precision'), ('g_decimallatitude', 'double precision'), ('g_geodeticdatum', 'varchar(50)'), ('g_higerGeography', 'varchar(255)'), ('g_continent', 'varchar(50)'), ('g_country', 'varchar(50)'), ('g_countryCode', 'varchar(50)'), ('g_stateProvince', 'varchar(50)'), ('g_gbifid', 'varchar(255)'), ('g_protocol', 'varchar(255)'), ('g_identifier', 'varchar(50)'), ('g_recordedby', 'varchar(255)'), ('g_identificationid', 'varchar(255)'), ('g_identifiers', 'text'), ('g_dateidentified', 'text'), ('g_modified', 'text'), ('g_institutioncode', 'varchar(50)'), ('g_lastinterpreted', 'text'), ('g_lastparsed', 'text'), ('g_references', 'varchar(255)'), ('g_relations', 'text'), ('g_catalognumber', 'varchar(50)'), ('g_occurrencedetails', 'text'), ('g_datasetkey', 'varchar(50)'), ('g_datasetname', 'varchar(255)'), ('g_collectioncode', 'varchar(50)'), ('g_rights', 'varchar(255)'), ('g_rightsholder', 'varchar(255)'), ('g_license', 'varchar(50)'), ('g_publishingorgkey', 'varchar(50)'), ('g_publishingcountry', 'varchar(50)'), ('g_lastcrawled', 'text'), ('g_specificepithet', 'varchar(50)'), ('g_facts', 'text'), ('g_issues', 'text'), ('g_extensions', 'text'), ('g_language', 'varchar(50)')] set_output_encoding() # Set temporal filter if requested by user # Initialize eventDate filter eventDate = None # Check if date from is compatible (ISO compliant) if date_from: try: parse(date_from) except: grass.fatal("Invalid invalid start date provided") if date_from and not date_to: eventDate = '{}'.format(date_from) # Check if date to is compatible (ISO compliant) if date_to: try: parse(date_to) except: grass.fatal("Invalid invalid end date provided") # Check if date to is after date_from if parse(date_from) < parse(date_to): eventDate = '{},{}'.format(date_from, date_to) else: grass.fatal("Invalid date range: End date has to be after start date!") # Set filter on basisOfRecord if requested by user if basisofrecord == 'ALL': basisOfRecord = None else: basisOfRecord = basisofrecord # Allow also occurrences with spatial issues if requested by user hasGeospatialIssue = False if hasGeoIssue: hasGeospatialIssue = True # Allow also occurrences without coordinates if requested by user hasCoordinate = True if allow_no_geom: hasCoordinate = False # Set reprojection parameters # Set target projection of current LOCATION target_crs = grass.read_command('g.proj', flags='fj').rstrip(os.linesep) target = osr.SpatialReference(target_crs) target.ImportFromProj4(target_crs) if target == 'XY location (unprojected)': grass.fatal("Sorry, XY locations are not supported!") # Set source projection from GBIF source = osr.SpatialReference() source.ImportFromEPSG(4326) if target_crs not in latlon_crs: transform = osr.CoordinateTransformation(source, target) reverse_transform = osr.CoordinateTransformation(target, source) # Generate WKT polygon to use for spatial filtering if requested if mask: if len(mask.split('@')) == 2: m = VectorTopo(mask.split('@')[0], mapset=mask.split('@')[1]) else: m = VectorTopo(mask) if not m.exist(): grass.fatal('Could not find vector map <{}>'.format(mask)) m.open('r') if not m.is_open(): grass.fatal('Could not open vector map <{}>'.format(mask)) # Use map Bbox as spatial filter if map contains <> 1 area if m.number_of('areas') == 1: region_pol = [area.to_wkt() for area in m.viter("areas")][0] else: bbox = str(m.bbox()).replace('Bbox(', '').replace(' ', '').rstrip(')').split(',') region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(bbox[2], bbox[0], bbox[3], bbox[1]) m.close() else: # Do not limit import spatially if LOCATION is able to take global data if no_region_limit: if target_crs not in latlon_crs: grass.fatal('Import of data from outside the current region is' 'only supported in a WGS84 location!') region_pol = None else: # Limit import spatially to current region # if LOCATION is !NOT! able to take global data # to avoid pprojection ERRORS region = grass.parse_command('g.region', flags='g') region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(region['e'], region['n'], region['w'], region['s']) # Do not reproject in latlon LOCATIONS if target_crs not in latlon_crs: pol = ogr.CreateGeometryFromWkt(region_pol) pol.Transform(reverse_transform) pol = pol.ExportToWkt() else: pol = region_pol # Create output map if not output maps for each species are requested if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table: mapname = output new = Vector(mapname) new.open('w', tab_name=mapname, tab_cols=cols) cat = 1 # Import data for each species for s in taxa_list: # Get the taxon key if not the taxon key is provided as input try: key = int(s) except: try: species_match = species.name_backbone(s, rank=rank, strict=False, verbose=True) key = species_match['usageKey'] except: grass.error('Data request for taxon {} failed. Are you online?'.format(s)) continue # Return matching taxon and alternatives and exit if print_species: print('Matching taxon for {} is:'.format(s)) print('{} {}'.format(species_match['scientificName'], species_match['status'])) if 'alternatives' in list(species_match.keys()): print('Alternative matches might be:'.format(s)) for m in species_match['alternatives']: print('{} {}'.format(m['scientificName'], m['status'])) else: print('No alternatives found for the given taxon') continue if print_species_shell: print('match={}'.format(species_match['scientificName'])) if 'alternatives' in list(species_match.keys()): alternatives = [] for m in species_match['alternatives']: alternatives.append(m['scientificName']) print('alternatives={}'.format(','.join(alternatives))) continue if print_species_table: if 'alternatives' in list(species_match.keys()): if len(species_match['alternatives']) == 0: print('{0}|{1}|{2}|'.format(s, key, species_match['scientificName'])) else: alternatives = [] for m in species_match['alternatives']: alternatives.append(m['scientificName']) print('{0}|{1}|{2}|{3}'.format(s, key, species_match['scientificName'], ','.join(alternatives))) continue try: returns_n = occurrences.search(taxonKey=key, hasGeospatialIssue=hasGeospatialIssue, hasCoordinate=hasCoordinate, institutionCode=institutionCode, basisOfRecord=basisOfRecord, recordedBy=recordedby, eventDate=eventDate, continent=continent, country=country, geometry=pol, limit=1)['count'] except: grass.error('Data request for taxon {} faild. Are you online?'.format(s)) returns_n = 0 # Exit if search does not give a return # Print only number of returns for the given search and exit if print_occ_number: grass.message('Found {0} occurrences for taxon {1}...'.format(returns_n, s)) continue elif returns_n <= 0: grass.warning('No occurrences for current search for taxon {0}...'.format(s)) continue elif returns_n >= 200000: grass.warning('Your search for {1} returns {0} records.\n' 'Unfortunately, the GBIF search API is limited to 200,000 records per request.\n' 'The download will be incomplete. Please consider to split up your search.'.format(returns_n, s)) # Get the number of chunks to download chunks = int(math.ceil(returns_n / float(chunk_size))) grass.verbose('Downloading {0} occurrences for taxon {1}...'.format(returns_n, s)) # Create a map for each species if requested using map name as suffix if species_maps: mapname = '{}_{}'.format(s.replace(' ', '_'), output) new = Vector(mapname) new.open('w', tab_name=mapname, tab_cols=cols) cat = 0 # Download the data from GBIF for c in range(chunks): # Define offset offset = c * chunk_size # Adjust chunk_size to the hard limit of 200,000 records in GBIF API # if necessary if offset + chunk_size >= 200000: chunk_size = 200000 - offset # Get the returns for the next chunk returns = occurrences.search(taxonKey=key, hasGeospatialIssue=hasGeospatialIssue, hasCoordinate=hasCoordinate, institutionCode=institutionCode, basisOfRecord=basisOfRecord, recordedBy=recordedby, eventDate=eventDate, continent=continent, country=country, geometry=pol, limit=chunk_size, offset=offset) # Write the returned data to map and attribute table for res in returns['results']: if target_crs not in latlon_crs: point = ogr.CreateGeometryFromWkt('POINT ({} {})'.format(res['decimalLongitude'], res['decimalLatitude'])) point.Transform(transform) x = point.GetX() y = point.GetY() else: x = res['decimalLongitude'] y = res['decimalLatitude'] point = Point(x, y) for k in dwc_keys: if k not in list(res.keys()): res.update({k: None}) cat = cat + 1 new.write(point, cat=cat, attrs=( '{}'.format(s), res['key'], res['taxonRank'], res['taxonKey'], res['taxonID'], res['scientificName'], res['species'], res['speciesKey'], res['genericName'], res['genus'], res['genusKey'], res['family'], res['familyKey'], res['order'], res['orderKey'], res['class'], res['classKey'], res['phylum'], res['phylumKey'], res['kingdom'], res['kingdomKey'], '{}'.format(res['eventDate']) if res['eventDate'] else None, '{}'.format(res['verbatimEventDate']) if res['verbatimEventDate'] else None, res['startDayOfYear'], res['endDayOfYear'], res['year'], res['month'], res['day'], res['occurrenceID'], res['occurrenceStatus'], res['occurrenceRemarks'], res['Habitat'], res['basisOfRecord'], res['preparations'], res['sex'], res['type'], res['locality'], res['verbatimLocality'], res['decimalLongitude'], res['decimalLatitude'], res['geodeticDatum'], res['higerGeography'], res['continent'], res['country'], res['countryCode'], res['stateProvince'], res['gbifID'], res['protocol'], res['identifier'], res['recordedBy'], res['identificationID'], ','.join(res['identifiers']), '{}'.format(res['dateIdentified']) if res['dateIdentified'] else None, '{}'.format(res['modified']) if res['modified'] else None, res['institutionCode'], '{}'.format(res['lastInterpreted']) if res['lastInterpreted'] else None, '{}'.format(res['lastParsed']) if res['lastParsed'] else None, res['references'], ','.join(res['relations']), res['catalogNumber'], '{}'.format(res['occurrenceDetails']) if res['occurrenceDetails'] else None, res['datasetKey'], res['datasetName'], res['collectionCode'], res['rights'], res['rightsHolder'], res['license'], res['publishingOrgKey'], res['publishingCountry'], '{}'.format(res['lastCrawled']) if res['lastCrawled'] else None, res['specificEpithet'], ','.join(res['facts']), ','.join(res['issues']), ','.join(res['extensions']), res['language'],)) cat = cat + 1 # Close the current map if a map for each species is requested if species_maps: new.table.conn.commit() new.close() if not no_topo: grass.run_command('v.build', map=mapname, option='build') # Close the output map if not a map for each species is requested if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table: new.table.conn.commit() new.close() if not no_topo: grass.run_command('v.build', map=mapname, option='build')
"POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))", "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))", "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))", "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))", "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))", "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))", "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))", "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))", "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))", "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"] results = [] for i in polys: res = [] x = occ.search(taxonKey = nm, geometry = i) res.append(x['results']) while not x['endOfRecords']: x = occ.search(taxonKey = nm, geometry = i, offset = sum([ len(x) for x in res ])) res.append(x['results']) results.append([w for z in res for w in z]) print 'polygon done' allres = [w for z in results for w in z] coords = [ { k: v for k, v in w.items() if k.startswith('decimal') } for w in allres ] from numpy import empty latlon = empty([len(coords),2], dtype=float, order='C') for i , coord in enumerate(coords):
def test_search(): "occurrences.search - basic test" res = occurrences.search(taxonKey=3329049) assert 'dict' == res.__class__.__name__ assert 6 == len(res) assert sorted(keyz) == sorted(res.keys())
print("*" * 40) sppList = ['Accipiter cooperii', 'Myodes gapperi'] # Make an empty list to append each species' records reclst = [] # Make column names for the list lstcols = ['SppName', 'nRecords'] n = 0 # Loop over each species in the full species list in the config file for spp in config.sciNames1590: print('Working on the following species:', spp) recs = occ.search(scientificName=spp, hasCoordinate=True, country='US', geoSpatialIssue=False) # Not all species have COUNT in their occurrence record dictionary # !!!!!!!!! WHAT THE F**K GBIF !!!!!!!!!!!!!!! # Make sure it does otherwise make it 0.9 if 'count' in recs: cnt = recs['count'] n = n + cnt print(' it has', cnt, 'records') else: print(' it has UNKNOWN NUMBER of records', ) cnt = 0.9 # Append to the record list reclst.append([spp, cnt]) print('\n TOTAL NUMBER OF RECORDS FOR THIS SPECIES LIST =', n)
"POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))", "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))", "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))", "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))", "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))", "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))", "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))", "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))", "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))" ] results = [] for i in polys: res = [] x = occ.search(taxonKey=nm, geometry=i) res.append(x['results']) while not x['endOfRecords']: x = occ.search(taxonKey=nm, geometry=i, offset=sum([len(x) for x in res])) res.append(x['results']) results.append([w for z in res for w in z]) print 'polygon done' allres = [w for z in results for w in z] coords = [{k: v for k, v in w.items() if k.startswith('decimal')} for w in allres] from numpy import empty
# Initial Call printProgressBar(0) # Update Progress Bar for i, item in enumerate(iterable): yield item printProgressBar(i + 1) # Print New Line on Complete print() # main limitmb = 512 procs = 6 datasetsDir = './datasets/' indexDir = './index/' results = occ.search(dwca_extension="http://rs.tdwg.org/dwc/terms/ResourceRelationship", limit=0, facet="datasetKey", facetLimit=1000) ix = get_index(indexDir) for r in progressBar(results['facets'][0]['counts'], prefix="Progress", suffix="Complete"): datasetKey = r['name'] dwca_file = f'{datasetsDir}{datasetKey}.zip' if not os.path.isfile(dwca_file) : try: pass dataset = registry.datasets(uuid=datasetKey) dwca_endpoints = [e for e in dataset['endpoints'] if e['type'] == 'DWC_ARCHIVE'] if len(dwca_endpoints) > 0 : url = dwca_endpoints[0]['url'] req = requests.get(url, stream=True)
def test_search_key2(): "occurrences.search - diff taxonKey2" res = occurrences.search(taxonKey=2683264) assert "dict" == res.__class__.__name__ assert 6 == len(res) assert 2683264 == res["results"][0]["taxonKey"]