def active_drifters(bbox=None, time_start=None, time_end=None): bbox = bbox or [-100, -40, 18, 60] time_end = time_end or dt.date.today() time_start = time_start or (time_end - dt.timedelta(days=1)) t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ') t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ') e = ERDDAP(server='OSMC', protocol="tabledap") e.dataset_id = "gdp_interpolated_drifter" # Setting constraints e.constraints = { "time>=": t0, "time<=": t1, 'longitude>=': bbox[0], 'longitude<=': bbox[1], 'latitude>=': bbox[2], 'latitude<=': bbox[3], } # e.variables = [ # "WMO", # "latitude", # "longitude", # "time", # ] try: df = e.to_pandas() except ValueError: return pd.DataFrame() return df
def load_data(self, year='2019'): self.dfs = {} for index, row in self.df.iterrows(): if (self.glider_id in row['Dataset ID']) and (year in row['Dataset ID']): print(row['Dataset ID']) try: e = ERDDAP( server=self.server_url, protocol='tabledap', response='csv', ) e.dataset_id = row['Dataset ID'] e.constraints = self.constraints e.variables = self.variables[row['Dataset ID']] except HTTPError: print('Failed to generate url {}'.format( row['Dataset ID'])) continue self.dfs.update({ row['Dataset ID']: e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ) }) return (self.dfs)
def active_argo_floats(bbox=None, time_start=None, time_end=None, floats=None): """ :param lon_lims: list containing westernmost longitude and easternmost latitude :param lat_lims: list containing southernmost latitude and northernmost longitude :param time_start: time to start looking for floats :param time_end: time to end looking for floats :return: """ bbox = bbox or [-100, -45, 5, 46] time_end = time_end or dt.date.today() time_start = time_start or (time_end - dt.timedelta(days=1)) floats = floats or False constraints = { 'time>=': str(time_start), 'time<=': str(time_end), } if bbox: constraints['longitude>='] = bbox[0] constraints['longitude<='] = bbox[1] constraints['latitude>='] = bbox[2] constraints['latitude<='] = bbox[3] if floats: constraints['platform_number='] = floats variables = [ 'platform_number', 'time', 'pres', 'longitude', 'latitude', 'temp', 'psal', ] e = ERDDAP( server='IFREMER', protocol='tabledap', response='nc' ) e.dataset_id = 'ArgoFloats' e.constraints = constraints e.variables = variables try: df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() except HTTPError: df = pd.DataFrame() return df
def get_erddap_dataset(server, ds_id, variables=None, constraints=None): variables = variables or None constraints = constraints or None e = ERDDAP(server=server, protocol='tabledap', response='nc') e.dataset_id = ds_id if constraints: e.constraints = constraints if variables: e.variables = variables ds = e.to_xarray() ds = ds.sortby(ds.time) return ds
def check_dataset_empty(url_erddap,dataset_id,date_ini,date_end,lon_lim,lat_lim): from erddapy import ERDDAP constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variable_names = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP( server=url_erddap, protocol='tabledap', response='nc' ) e.dataset_id = dataset_id e.constraints = constraints e.variables = variable_names # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) < 4: empty_dataset = True else: empty_dataset = False return empty_dataset
def get_erddap_dataset(ds_id, variables=None, constraints=None, filetype=None): """ Returns a netcdf dataset for a specified dataset ID (or dataframe if dataset cannot be converted to xarray) :param ds_id: dataset ID e.g. ng314-20200806T2040 :param variables: optional list of variables :param constraints: optional list of constraints :param filetype: optional filetype to return, 'nc' (default) or 'dataframe' :return: netcdf dataset """ variables = variables or None constraints = constraints or None filetype = filetype or 'nc' e = ERDDAP(server='NGDAC', protocol='tabledap', response='nc') e.dataset_id = ds_id if constraints: e.constraints = constraints if variables: e.variables = variables if filetype == 'nc': try: ds = e.to_xarray() ds = ds.sortby(ds.time) except OSError: print('No dataset available for specified constraints: {}'.format( ds_id)) ds = [] except TypeError: print('Cannot convert to xarray, providing dataframe: {}'.format( ds_id)) ds = e.to_pandas().dropna() elif filetype == 'dataframe': ds = e.to_pandas().dropna() else: print('Unrecognized filetype: {}. Needs to be "nc" or "dataframe"'. format(filetype)) return ds
def load_data(self,year='2019'): self.dfs = {} for index,row in self.df.iterrows(): if (self.glider_id in row['Dataset ID']) and (year in row['Dataset ID']): print(row['Dataset ID']) try: e = ERDDAP(server=self.server_url, protocol='tabledap', response='csv', ) e.dataset_id=row['Dataset ID'] e.constraints=self.constraints e.variables=self.variables[row['Dataset ID']] except HTTPError: print('Failed to generate url {}'.format(row['Dataset ID'])) continue self.dfs.update({row['Dataset ID']: e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. )}) return(self.dfs)
igrid = 1 #%% Reading bathymetry data ncbath = xr.open_dataset(bath_file) bath_lat = ncbath.variables['lat'][:] bath_lon = ncbath.variables['lon'][:] bath_elev = ncbath.variables['elevation'][:] #%% Looping through all gliders found for id in gliders: print('Reading ' + id ) e.dataset_id = id e.constraints = constraints e.variables = variables # chacking data frame is not empty df = e.to_pandas() if len(df.index) != 0 : # Converting glider data to data frame df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. ).dropna() # Coverting glider vectors into arrays timeg, ind = np.unique(df.index.values,return_index=True)
def read_glider_variables_erddap_server(url_erddap,dataset_id,\ lat_lim,lon_lim,\ variable_names=['time'], **kwargs): """ Created on Tue Nov 3 11:26:05 2020 @author: aristizabal This function reads glider variables from the IOOS and Rutgers erddapp glider servers. Inputs: url_erddap: url address of erddap server Example: 'https://data.ioos.us/gliders/erddap' dataset_id: Example: 'ng231-20190901T0000' variable_names: list of variable names. Example: variable_names = ['depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity'] The default value is variable_names=['time'] lat_lim: latitude limits for the search. Example, lat_lim = [38.0,40.0] lon_lim: longitude limits for the search. Example, lon_lim = [-75.0,-72.0] date_ini: initial date of time window. This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'. Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00' date_end: initial date of time window. This function uses the data format '%Y-%m-%d T %H:%M:%S Z'. Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00' Outputs: df: Pandas data frame with all the variables requested as vectors """ from erddapy import ERDDAP import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variable_names # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) > 3: df = e.to_pandas(parse_dates=True) return df
def read_glider_data_erddap_server(url_erddap,dataset_id,\ lat_lim,lon_lim,scatter_plot,**kwargs): """ Created on Tue Feb 5 10:05:37 2019 @author: aristizabal This function reads glider data from the IOOS Data Assembly Center (DAC). Inputs: url_erddap: url address of thredds server Example: 'https://data.ioos.us/gliders/erddap' dataset_id: this id is retrieved from the glider DAC using the function "retrieve_glider_id_erddap_server". Example: 'ru30-20180705T1825' lat_lim: latitude limits for the search. Example, lat_lim = [38.0,40.0] lon_lim: longitude limits for the search. Example, lon_lim = [-75.0,-72.0] date_ini: initial date of time window. This function accepts the data formats '%Y-%m-%d T %H:%M:%S Z' and '%Y/%m/%d/%H'. Examaple: date_ini = '2018-08-02T00:00:00Z' or '2018/08/02/00' date_end: initial date of time window. This function uses the data format '%Y-%m-%d T %H:%M:%S Z'. Examaple: date_ini = '2018-08-10T00:00:00Z' and '2018/08/10/00' scatter_plot: if equal to 'yes' then a scatter plot of the glider transect is plotted Outputs: tempg: all the glider profiles of temperature within the user defined time window saltg: all the glider profiles of salinity within the user defined time window latg: latitude within the user defined time window long: longitude within the user defined time window timeg: user defined time window depthg: depth vector for all profiles """ from erddapy import ERDDAP import matplotlib.pyplot as plt import matplotlib.dates as mdates import cmocean import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variables # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) > 3: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() # Coverting glider vectors into arrays timeg, ind = np.unique(df.index.values, return_index=True) latg = df['latitude (degrees_north)'].values[ind] long = df['longitude (degrees_east)'].values[ind] dg = df['depth (m)'].values vg1 = df[df.columns[3]].values vg2 = df[df.columns[4]].values zn = np.int(np.max(np.diff(np.hstack([ind, len(dg)])))) depthg = np.empty((zn, len(timeg))) depthg[:] = np.nan tempg = np.empty((zn, len(timeg))) tempg[:] = np.nan saltg = np.empty((zn, len(timeg))) saltg[:] = np.nan for i, ii in enumerate(ind): if i < len(timeg) - 1: depthg[0:len(dg[ind[i]:ind[i + 1]]), i] = dg[ind[i]:ind[i + 1]] tempg[0:len(vg1[ind[i]:ind[i + 1]]), i] = vg1[ind[i]:ind[i + 1]] saltg[0:len(vg2[ind[i]:ind[i + 1]]), i] = vg2[ind[i]:ind[i + 1]] else: depthg[0:len(dg[ind[i]:len(dg)]), i] = dg[ind[i]:len(dg)] tempg[0:len(vg1[ind[i]:len(vg1)]), i] = vg1[ind[i]:len(vg1)] saltg[0:len(vg2[ind[i]:len(vg2)]), i] = vg2[ind[i]:len(vg2)] # Scatter plot if scatter_plot == 'yes': color_map = cmocean.cm.thermal varg = tempg timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1)) ttg = np.ravel(timeg_matrix) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(timeg[0], timeg[-1]) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) color_map = cmocean.cm.haline varg = saltg timeg_matrix = np.tile(timeg.T, (depthg.shape[0], 1)) ttg = np.ravel(timeg_matrix) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(timeg[0], timeg[-1]) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Salinity', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) else: tempg = np.nan saltg = np.nan timeg = np.nan latg = np.nan long = np.nan depthg = np.nan return tempg, saltg, timeg, latg, long, depthg
def get_ndbc(bbox=None, time_start=None, time_end=None, buoy=None): bbox = bbox or [-100, -45, 5, 46] time_end = time_end or dt.date.today() time_start = time_start or (time_end - dt.timedelta(days=1)) buoy = buoy or False time_formatter = '%Y-%m-%dT%H:%M:%SZ' e = ERDDAP( server='CSWC', protocol='tabledap', response='csv' ) e.dataset_id = 'cwwcNDBCMet' e.constraints = { 'time>=': time_start.strftime(time_formatter), 'time<=': time_end.strftime(time_formatter), } if bbox: e.constraints['longitude>='] = bbox[0] e.constraints['longitude<='] = bbox[1] e.constraints['latitude>='] = bbox[2] e.constraints['latitude<='] = bbox[3] e.variables = [ "station", "latitude", "longitude", "time" ] if buoy: e.constraints['station='] = buoy df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() stations = df.station.unique() # e.variables = [ # "station", # "latitude", # "longitude", # "wd", # "wspd", # "gst", # "wvht", # "dpd", # "apd", # "mwd", # "bar", # "atmp", # "wtmp", # "dewp", # # "vis", # # "ptdy", # # "tide", # "wspu", # "wspv", # "time", # ] try: df = e.to_pandas( parse_dates=['time (UTC)'], skiprows=(1,) # units information can be dropped. ).dropna() except HTTPError: df = pd.DataFrame() return df
def active_gliders(bbox=None, time_start=None, time_end=dt.date.today(), glider_id=None): bbox = bbox or [-100, -40, 18, 60] time_start = time_start or (time_end - dt.timedelta(days=1)) t0 = time_start.strftime('%Y-%m-%dT%H:%M:%SZ') t1 = time_end.strftime('%Y-%m-%dT%H:%M:%SZ') glider_id = glider_id or None e = ERDDAP(server='NGDAC') # Grab every dataset available # datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all')) # Search constraints kw = dict() kw['min_time'] = t0 kw['max_time'] = t1 if bbox: kw['min_lon'] = bbox[0] kw['max_lon'] = bbox[1] kw['min_lat'] = bbox[2] kw['max_lat'] = bbox[3] if glider_id: search = glider_id else: search = None search_url = e.get_search_url(search_for=search, response='csv', **kw) try: # Grab the results search = pd.read_csv(search_url) except: # return empty dataframe if there are no results return pd.DataFrame() # Extract the IDs gliders = search['Dataset ID'].values msg = 'Found {} Glider Datasets:\n\n{}'.format print(msg(len(gliders), '\n'.join(gliders))) # Setting constraints constraints = { 'time>=': t0, 'time<=': t1, 'longitude>=': bbox[0], 'longitude<=': bbox[1], 'latitude>=': bbox[2], 'latitude<=': bbox[3], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity', ] e = ERDDAP( server='NGDAC', protocol='tabledap', response='nc' ) glider_dfs = [] for id in gliders: # print('Reading ' + id) e.dataset_id = id e.constraints = constraints e.variables = variables # checking data frame is not empty try: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1,) # units information can be dropped. ).dropna() except: continue df = df.reset_index() df['dataset_id'] = id df = df.set_index(['dataset_id', 'time (UTC)']) glider_dfs.append(df) try: ndf = pd.concat(glider_dfs) except ValueError: return pd.DataFrame() return ndf
def read_glider_data_erddap_Rutgers_server(url_erddap,dataset_id,\ lat_lim,lon_lim,scatter_plot,**kwargs): from erddapy import ERDDAP import matplotlib.pyplot as plt import matplotlib.dates as mdates import cmocean import numpy as np date_ini = kwargs.get('date_ini', None) date_end = kwargs.get('date_end', None) # Find time window of interest if np.logical_or(date_ini == None, date_end == None): constraints = { 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } else: constraints = { 'time>=': date_ini, 'time<=': date_end, 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'depth', 'latitude', 'longitude', 'time', 'temperature', 'salinity' ] e = ERDDAP(server=url_erddap, protocol='tabledap', response='nc') e.dataset_id = dataset_id e.constraints = constraints e.variables = variables # Converting glider data to data frame # Cheching that data frame has data df = e.to_pandas() if len(df) != 0: df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() dg = df['depth (m)'].values tg = df.index.values vg1 = df[df.columns[3]].values vg2 = df[df.columns[4]].values upcast = np.where(np.diff(dg) < 0)[0] oku = np.where(np.diff(upcast) > 1)[0] end_upcast = upcast[oku] downcast = np.where(np.diff(dg) > 0)[0] okd = np.where(np.diff(downcast) > 1)[0] end_downcast = downcast[okd] ind = np.hstack( [0, np.unique(np.hstack([end_upcast, end_downcast])), len(dg)]) zn = np.max(np.diff(ind)) depthg = np.empty((zn, len(ind))) depthg[:] = np.nan timeg = np.empty((zn, len(ind))) timeg[:] = np.nan tempg = np.empty((zn, len(ind))) tempg[:] = np.nan saltg = np.empty((zn, len(ind))) saltg[:] = np.nan for i in np.arange(len(ind)): if i == 0: indd = np.argsort(dg[ind[i]:ind[i + 1] + 2]) depthg[0:len(dg[ind[i]:ind[i + 1] + 2]), i] = dg[ind[i]:ind[i + 1] + 2][indd] timeg[0:len(dg[ind[i]:ind[i + 1] + 2]), i] = mdates.date2num(tg[ind[i]:ind[i + 1] + 2][indd]) tempg[0:len(vg1[ind[i]:ind[i + 1] + 2]), i] = vg1[ind[i]:ind[i + 1] + 2][indd] saltg[0:len(vg2[ind[i]:ind[i + 1] + 2]), i] = vg2[ind[i]:ind[i + 1] + 2][indd] if i < len(ind) - 1: indd = np.argsort(dg[ind[i] + 1:ind[i + 1] + 2]) depthg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]), i] = dg[ind[i] + 1:ind[i + 1] + 2][indd] timeg[0:len(dg[ind[i] + 1:ind[i + 1] + 2]), i] = mdates.date2num(tg[ind[i] + 1:ind[i + 1] + 2][indd]) tempg[0:len(vg1[ind[i] + 1:ind[i + 1] + 2]), i] = vg1[ind[i] + 1:ind[i + 1] + 2][indd] saltg[0:len(vg2[ind[i] + 1:ind[i + 1] + 2]), i] = vg2[ind[i] + 1:ind[i + 1] + 2][indd] else: indd = np.argsort(dg[ind[i] + 1:len(dg)]) depthg[0:len(dg[ind[i] + 1:len(dg)]), i] = dg[ind[i] + 1:len(dg)][indd] timeg[0:len(dg[ind[i] + 1:len(dg)]), i] = mdates.date2num(tg[ind[i] + 1:len(dg)][indd]) tempg[0:len(vg1[ind[i] + 1:len(vg1)]), i] = vg1[ind[i] + 1:len(vg1)][indd] saltg[0:len(vg2[ind[i] + 1:len(vg2)]), i] = vg2[ind[i] + 1:len(vg2)][indd] # Scatter plot if scatter_plot == 'yes': color_map = cmocean.cm.thermal varg = tempg #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1)) ttg = np.ravel(timeg) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg)) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Temperature ($^oC$)', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) color_map = cmocean.cm.haline varg = saltg #timeg_matrix = np.tile(timeg.T,(depthg.shape[0],1)) ttg = np.ravel(timeg) dg = np.ravel(depthg) teg = np.ravel(varg) kw = dict(c=teg, marker='*', edgecolor='none') fig, ax = plt.subplots(figsize=(10, 3)) cs = ax.scatter(ttg, -dg, cmap=color_map, **kw) #fig.colorbar(cs) ax.set_xlim(np.nanmin(ttg), np.nanmax(ttg)) ax.set_ylabel('Depth (m)', fontsize=14) cbar = plt.colorbar(cs) cbar.ax.set_ylabel('Salinity', fontsize=14) ax.set_title(dataset_id, fontsize=16) xfmt = mdates.DateFormatter('%H:%Mh\n%d-%b') ax.xaxis.set_major_formatter(xfmt) plt.ylim([-np.nanmax(dg), 0]) return tempg, saltg, timeg, latg, long, depthg
#https://coastwatch.pfeg.noaa.gov/erddap/info/wocecpr/index.html ### initializing the erddap class instance with the data server address and the connection protocol. e = ERDDAP( server='https://coastwatch.pfeg.noaa.gov/erddap', protocol='tabledap', ) ### specifying the data format of the response. e.response = 'csv' ### specifying the database name we need the data from. e.dataset_id = 'wocecpr' ### specifying the data constraints e.constraints = { 'time>=': '2000-01-15T01:24:00Z', 'time<=': '2010-01-17T13:39:00Z', 'latitude>=': 37.0, 'latitude<=': 43.43, 'longitude>=': 317.56, 'longitude<=': 322.87, } ###specifying the variables(columns name) to be retrived. e.variables = [ 'sample', 'latitude', 'longitude', 'life_stage', 'abundance', 'time', ] ### searching for the server link and doing the handshaking process. search_url = e.get_search_url(response='csv') ### receiving requested data and saving it into a dataframe
import matplotlib.pyplot as plt from erddapy import ERDDAP import pandas as pd import seaborn as sns sns.set(rc={'figure.figsize': (11, 4)}) e = ERDDAP( server='https://erddap.marine.ie/erddap', protocol='tabledap', ) e.dataset_id = 'IWBNetwork' e.constraints = { 'time>=': '2015-06-28T00:00:00Z', 'station_id=': 'M3' } e.variables = [ 'time', 'AtmosphericPressure', 'WindDirection', 'WindSpeed', 'WaveHeight', 'WavePeriod', 'MeanWaveDirection', # 'Hmax', # 'AirTemperature', 'SeaTemperature' ]
from erddapy import ERDDAP path = Path().absolute() fname = path.joinpath("data", "water_level_example.csv") if fname.is_file(): data = pd.read_csv(fname, parse_dates=["time (UTC)"]) else: e = ERDDAP( server="http://erddap.aoos.org/erddap/", protocol="tabledap" ) e.dataset_id = "kotzebue-alaska-water-level" e.constraints = { "time>=": "2018-09-05T21:00:00Z", "time<=": "2019-07-10T19:00:00Z", } e.variables = [ variable_name, "time", "z", ] data = e.to_pandas( index_col="time (UTC)", parse_dates=True, ) data["timestamp"] = data.index.astype("int64") // 1e9 data.to_csv(fname) data.head()
) e.response = 'csv' #e.dataset_id = drifter_year + '_Argos_Drifters_NRT' #use this until we can get location quality back into older years #currently it is only in erddap for 2020 and newer #if int(drifter_years[0]) >= 2020: e.variables = [ 'trajectory_id', 'strain', 'voltage', 'time', 'latitude', 'sst', 'longitude', 'location_quality' ] #else: # e.variables = ['trajectory_id','strain', 'voltage', 'time', 'latitude', 'sst', # 'longitude'] e.constraints = {'trajectory_id=': argos_id} df_years = {} for year in drifter_years: e.dataset_id = year + '_Argos_Drifters_NRT' df = e.to_pandas( index_col='time (UTC)', parse_dates=True, skiprows=(1, ) # units information can be dropped. ) df.columns = [x[1].split()[0] for x in enumerate(df.columns)] df_years[year] = df df = pd.concat(df_years.values()) #get rid of timezone info df = df.tz_localize(None) # # names = ['trajectory_id','strain','voltage','datetime','latitude','sst','longitude'] # # df=pd.read_csv(filename, skiprows=1, header=0, names=names, parse_dates=[3])
def load_data_from_erddap(config, station_id=None, station_data=None): mcf_template = yaml.load(open(config['static_data']['mcf_template'], 'r'), Loader=yaml.FullLoader) es = ERDDAP( server=config['dynamic_data']['erddap_server'], protocol=config['dynamic_data']['erddap_protocol'], ) if station_id is None: #load all station data MCF skeleton stations = {} es.dataset_id = 'allDatasets' # filter out "log in" datasets as the vast majoirty of their available metadata is unavailable es.constraints = {'accessible=': 'public'} stations_df = es.to_pandas() # drop 'allDatasets' row stations_df.drop(labels=0, axis='index', inplace=True) print(stations_df) for index_label, row_series in stations_df.iterrows(): id = row_series['datasetID'] # ensure each station has an independant copy of the MCF skeleton stations[id] = copy.deepcopy(mcf_template) dataset_url = row_series['tabledap'] if row_series[ 'dataStructure'] == 'table' else row_series['griddap'] stations[id]['metadata']['identifier'] = id stations[id]['metadata']['dataseturi'] = dataset_url stations[id]['spatial']['datatype'] = 'textTable' if row_series[ 'dataStructure'] == 'table' else 'grid' stations[id]['spatial']['geomtype'] = row_series['cdm_data_type'] stations[id]['spatial']['bbox'] = '%s,%s,%s,%s' % ( row_series['minLongitude (degrees_east)'], row_series['minLatitude (degrees_north)'], row_series['maxLongitude (degrees_east)'], row_series['maxLatitude (degrees_north)']) stations[id]['identification']['title'] = row_series['title'] stations[id]['identification']['dates']['creation'] = row_series[ 'minTime (UTC)'] stations[id]['identification']['temporal_begin'] = row_series[ 'minTime (UTC)'] stations[id]['identification']['temporal_end'] = row_series[ 'maxTime (UTC)'] stations[id]['identification']['url'] = dataset_url stations[id]['identification']['abstract'] = row_series['summary'] stations[id]['distribution']['erddap']['url'] = dataset_url stations[id]['distribution']['erddap']['name'] = row_series[ 'title'] print('Stations after ERDDAP call...') print(stations) return_value = stations pass else: #load specific station data into MCF skeleton print('Loading ERDDAP metadata for station: %s' % (station_id)) es.dataset_id = station_id metadata_url = es.get_download_url(dataset_id='%s/index' % (station_id), response='csv', protocol='info') metadata = pd.read_csv(filepath_or_buffer=metadata_url) print(metadata_url) print(metadata.head()) # ERDDAP ISO XML provides a list of dataset field names (long & short), data types & units # of measurement, in case this becomes useful for the CIOOS metadata standard we can extend # the YAML skeleton to include these and the template to export them. # # below most varible attributes from ERDDAP are extracted and pivoted to describe the field # actual field data types are extracted seperately and merged into the pivoted dataframe # for completeness columns_pivot = metadata[(metadata['Variable Name'] != 'NC_GLOBAL') & (metadata['Row Type'] != 'variable')].pivot( index='Variable Name', columns='Attribute Name', values='Value') col_data_types = metadata[(metadata['Row Type'] == 'variable')][[ 'Variable Name', 'Data Type' ]] df_merge = pd.merge(columns_pivot, col_data_types, on='Variable Name') station_data['dataset'] = {} for index_label, field_series in df_merge.iterrows(): field_name = field_series['Variable Name'] station_data['dataset'][field_name] = {} station_data['dataset'][field_name]['long_name'] = field_series[ 'long_name'] station_data['dataset'][field_name]['data_type'] = field_series[ 'Data Type'] station_data['dataset'][field_name]['units'] = field_series[ 'units'] station_data['identification']['keywords']['default'][ 'keywords'] = metadata[ (metadata['Variable Name'] == 'NC_GLOBAL') & (metadata['Attribute Name'] == 'keywords')]['Value'].values return_value = station_data return return_value
def GOFS_RTOFS_vs_Argo_floats(lon_forec_track, lat_forec_track, lon_forec_cone, lat_forec_cone, lon_best_track, lat_best_track, lon_lim, lat_lim, folder_fig): #%% User input #GOFS3.1 output model location url_GOFS_ts = 'http://tds.hycom.org/thredds/dodsC/GLBy0.08/expt_93.0/ts3z' # RTOFS files folder_RTOFS = '/home/coolgroup/RTOFS/forecasts/domains/hurricanes/RTOFS_6hourly_North_Atlantic/' nc_files_RTOFS = ['rtofs_glo_3dz_f006_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f012_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f018_6hrly_hvr_US_east.nc',\ 'rtofs_glo_3dz_f024_6hrly_hvr_US_east.nc'] # COPERNICUS MARINE ENVIRONMENT MONITORING SERVICE (CMEMS) url_cmems = 'http://nrt.cmems-du.eu/motu-web/Motu' service_id = 'GLOBAL_ANALYSIS_FORECAST_PHY_001_024-TDS' product_id = 'global-analysis-forecast-phy-001-024' depth_min = '0.493' out_dir = '/home/aristizabal/crontab_jobs' # Bathymetry file #bath_file = '/Users/aristizabal/Desktop/MARACOOS_project/Maria_scripts/nc_files/GEBCO_2014_2D_-100.0_0.0_-60.0_45.0.nc' bath_file = '/home/aristizabal/bathymetry_files/GEBCO_2014_2D_-100.0_0.0_-10.0_50.0.nc' # Argo floats url_Argo = 'http://www.ifremer.fr/erddap' #%% from matplotlib import pyplot as plt import numpy as np import xarray as xr import netCDF4 from datetime import datetime, timedelta import cmocean import matplotlib.dates as mdates from erddapy import ERDDAP import pandas as pd import os # Do not produce figures on screen plt.switch_backend('agg') # Increase fontsize of labels globally plt.rc('xtick', labelsize=14) plt.rc('ytick', labelsize=14) plt.rc('legend', fontsize=14) #%% Reading bathymetry data ncbath = xr.open_dataset(bath_file) bath_lat = ncbath.variables['lat'][:] bath_lon = ncbath.variables['lon'][:] bath_elev = ncbath.variables['elevation'][:] oklatbath = np.logical_and(bath_lat >= lat_lim[0], bath_lat <= lat_lim[-1]) oklonbath = np.logical_and(bath_lon >= lon_lim[0], bath_lon <= lon_lim[-1]) bath_latsub = bath_lat[oklatbath] bath_lonsub = bath_lon[oklonbath] bath_elevs = bath_elev[oklatbath, :] bath_elevsub = bath_elevs[:, oklonbath] #%% Get time bounds for current day #ti = datetime.today() ti = datetime.today() - timedelta(1) - timedelta(hours=6) tini = datetime(ti.year, ti.month, ti.day) te = ti + timedelta(2) tend = datetime(te.year, te.month, te.day) #%% Look for Argo datasets e = ERDDAP(server=url_Argo) # Grab every dataset available #datasets = pd.read_csv(e.get_search_url(response='csv', search_for='all')) kw = { 'min_lon': lon_lim[0], 'max_lon': lon_lim[1], 'min_lat': lat_lim[0], 'max_lat': lat_lim[1], 'min_time': str(tini), 'max_time': str(tend), } search_url = e.get_search_url(response='csv', **kw) # Grab the results search = pd.read_csv(search_url) # Extract the IDs dataset = search['Dataset ID'].values msg = 'Found {} Datasets:\n\n{}'.format print(msg(len(dataset), '\n'.join(dataset))) dataset_type = dataset[0] constraints = { 'time>=': str(tini), 'time<=': str(tend), 'latitude>=': lat_lim[0], 'latitude<=': lat_lim[1], 'longitude>=': lon_lim[0], 'longitude<=': lon_lim[1], } variables = [ 'platform_number', 'time', 'pres', 'longitude', 'latitude', 'temp', 'psal', ] e = ERDDAP(server=url_Argo, protocol='tabledap', response='nc') e.dataset_id = dataset_type e.constraints = constraints e.variables = variables print(e.get_download_url()) df = e.to_pandas( parse_dates=True, skiprows=(1, ) # units information can be dropped. ).dropna() argo_ids = np.asarray(df['platform_number']) argo_times = np.asarray(df['time (UTC)']) argo_press = np.asarray(df['pres (decibar)']) argo_lons = np.asarray(df['longitude (degrees_east)']) argo_lats = np.asarray(df['latitude (degrees_north)']) argo_temps = np.asarray(df['temp (degree_Celsius)']) argo_salts = np.asarray(df['psal (PSU)']) #%% GOGF 3.1 try: GOFS_ts = xr.open_dataset(url_GOFS_ts, decode_times=False) lt_GOFS = np.asarray(GOFS_ts['lat'][:]) ln_GOFS = np.asarray(GOFS_ts['lon'][:]) tt = GOFS_ts['time'] t_GOFS = netCDF4.num2date(tt[:], tt.units) depth_GOFS = np.asarray(GOFS_ts['depth'][:]) except Exception as err: print(err) GOFS_ts = np.nan lt_GOFS = np.nan ln_GOFS = np.nan depth_GOFS = np.nan t_GOFS = ti #%% Map Argo floats lev = np.arange(-9000, 9100, 100) plt.figure() plt.contourf(bath_lonsub, bath_latsub, bath_elevsub, lev, cmap=cmocean.cm.topo) plt.plot(lon_forec_track, lat_forec_track, '.-', color='gold') plt.plot(lon_forec_cone, lat_forec_cone, '.-b', markersize=1) plt.plot(lon_best_track, lat_best_track, 'or', markersize=3) argo_idd = np.unique(argo_ids) for i, id in enumerate(argo_idd): okind = np.where(argo_ids == id)[0] plt.plot(np.unique(argo_lons[okind]), np.unique(argo_lats[okind]), 's', color='darkorange', markersize=5, markeredgecolor='k') plt.title('Argo Floats ' + str(tini)[0:13] + '-' + str(tend)[0:13], fontsize=16) plt.axis('scaled') plt.xlim(lon_lim[0], lon_lim[1]) plt.ylim(lat_lim[0], lat_lim[1]) file = folder_fig + 'ARGO_lat_lon' #file = folder_fig + 'ARGO_lat_lon_' + str(np.unique(argo_times)[0])[0:10] plt.savefig(file, bbox_inches='tight', pad_inches=0.1) #%% Figure argo float vs GOFS and vs RTOFS argo_idd = np.unique(argo_ids) for i, id in enumerate(argo_idd): print(id) okind = np.where(argo_ids == id)[0] argo_time = np.asarray([ datetime.strptime(t, '%Y-%m-%dT%H:%M:%SZ') for t in argo_times[okind] ]) argo_lon = argo_lons[okind] argo_lat = argo_lats[okind] argo_pres = argo_press[okind] argo_temp = argo_temps[okind] argo_salt = argo_salts[okind] # GOFS print('Retrieving variables from GOFS') if isinstance(GOFS_ts, float): temp_GOFS = np.nan salt_GOFS = np.nan else: #oktt_GOFS = np.where(t_GOFS >= argo_time[0])[0][0] ttGOFS = np.asarray([ datetime(t_GOFS[i].year, t_GOFS[i].month, t_GOFS[i].day, t_GOFS[i].hour) for i in np.arange(len(t_GOFS)) ]) tstamp_GOFS = [ mdates.date2num(ttGOFS[i]) for i in np.arange(len(ttGOFS)) ] oktt_GOFS = np.unique( np.round( np.interp(mdates.date2num(argo_time[0]), tstamp_GOFS, np.arange(len(tstamp_GOFS)))).astype(int))[0] oklat_GOFS = np.where(lt_GOFS >= argo_lat[0])[0][0] oklon_GOFS = np.where(ln_GOFS >= argo_lon[0] + 360)[0][0] temp_GOFS = np.asarray(GOFS_ts['water_temp'][oktt_GOFS, :, oklat_GOFS, oklon_GOFS]) salt_GOFS = np.asarray(GOFS_ts['salinity'][oktt_GOFS, :, oklat_GOFS, oklon_GOFS]) # RTOFS #Time window year = int(argo_time[0].year) month = int(argo_time[0].month) day = int(argo_time[0].day) tini = datetime(year, month, day) tend = tini + timedelta(days=1) # Read RTOFS grid and time print('Retrieving coordinates from RTOFS') if tini.month < 10: if tini.day < 10: fol = 'rtofs.' + str(tini.year) + '0' + str( tini.month) + '0' + str(tini.day) else: fol = 'rtofs.' + str(tini.year) + '0' + str(tini.month) + str( tini.day) else: if tini.day < 10: fol = 'rtofs.' + str(tini.year) + str(tini.month) + '0' + str( tini.day) else: fol = 'rtofs.' + str(tini.year) + str(tini.month) + str( tini.day) ncRTOFS = xr.open_dataset(folder_RTOFS + fol + '/' + nc_files_RTOFS[0]) latRTOFS = np.asarray(ncRTOFS.Latitude[:]) lonRTOFS = np.asarray(ncRTOFS.Longitude[:]) depth_RTOFS = np.asarray(ncRTOFS.Depth[:]) tRTOFS = [] for t in np.arange(len(nc_files_RTOFS)): ncRTOFS = xr.open_dataset(folder_RTOFS + fol + '/' + nc_files_RTOFS[t]) tRTOFS.append(np.asarray(ncRTOFS.MT[:])[0]) tRTOFS = np.asarray([mdates.num2date(mdates.date2num(tRTOFS[t])) \ for t in np.arange(len(nc_files_RTOFS))]) oktt_RTOFS = np.where( mdates.date2num(tRTOFS) >= mdates.date2num(argo_time[0]))[0][0] oklat_RTOFS = np.where(latRTOFS[:, 0] >= argo_lat[0])[0][0] oklon_RTOFS = np.where(lonRTOFS[0, :] >= argo_lon[0])[0][0] nc_file = folder_RTOFS + fol + '/' + nc_files_RTOFS[oktt_RTOFS] ncRTOFS = xr.open_dataset(nc_file) #time_RTOFS = tRTOFS[oktt_RTOFS] temp_RTOFS = np.asarray(ncRTOFS.variables['temperature'][0, :, oklat_RTOFS, oklon_RTOFS]) salt_RTOFS = np.asarray(ncRTOFS.variables['salinity'][0, :, oklat_RTOFS, oklon_RTOFS]) #lon_RTOFS = lonRTOFS[0,oklon_RTOFS] #lat_RTOFS = latRTOFS[oklat_RTOFS,0] # Downloading and reading Copernicus output motuc = 'python -m motuclient --motu ' + url_cmems + \ ' --service-id ' + service_id + \ ' --product-id ' + product_id + \ ' --longitude-min ' + str(argo_lon[0]-2/12) + \ ' --longitude-max ' + str(argo_lon[0]+2/12) + \ ' --latitude-min ' + str(argo_lat[0]-2/12) + \ ' --latitude-max ' + str(argo_lat[0]+2/12) + \ ' --date-min ' + '"' + str(tini-timedelta(0.5)) + '"' + \ ' --date-max ' + '"' + str(tend+timedelta(0.5)) + '"' + \ ' --depth-min ' + depth_min + \ ' --depth-max ' + str(np.nanmax(argo_pres)+1000) + \ ' --variable ' + 'thetao' + ' ' + \ ' --variable ' + 'so' + ' ' + \ ' --out-dir ' + out_dir + \ ' --out-name ' + str(id) + '.nc' + ' ' + \ ' --user ' + 'maristizabalvar' + ' ' + \ ' --pwd ' + 'MariaCMEMS2018' os.system(motuc) # Check if file was downloaded COP_file = out_dir + '/' + str(id) + '.nc' # Check if file was downloaded resp = os.system('ls ' + out_dir + '/' + str(id) + '.nc') if resp == 0: COP = xr.open_dataset(COP_file) latCOP = np.asarray(COP.latitude[:]) lonCOP = np.asarray(COP.longitude[:]) depth_COP = np.asarray(COP.depth[:]) tCOP = np.asarray(mdates.num2date(mdates.date2num(COP.time[:]))) else: latCOP = np.empty(1) latCOP[:] = np.nan lonCOP = np.empty(1) lonCOP[:] = np.nan tCOP = np.empty(1) tCOP[:] = np.nan oktimeCOP = np.where( mdates.date2num(tCOP) >= mdates.date2num(tini))[0][0] oklonCOP = np.where(lonCOP >= argo_lon[0])[0][0] oklatCOP = np.where(latCOP >= argo_lat[0])[0][0] temp_COP = np.asarray(COP.variables['thetao'][oktimeCOP, :, oklatCOP, oklonCOP]) salt_COP = np.asarray(COP.variables['so'][oktimeCOP, :, oklatCOP, oklonCOP]) # Figure temp plt.figure(figsize=(5, 6)) plt.plot(argo_temp, -argo_pres, '.-', linewidth=2, label='ARGO Float id ' + str(id)) plt.plot(temp_GOFS, -depth_GOFS, '.-', linewidth=2, label='GOFS 3.1', color='red') plt.plot(temp_RTOFS, -depth_RTOFS, '.-', linewidth=2, label='RTOFS', color='g') plt.plot(temp_COP, -depth_COP, '.-', linewidth=2, label='Copernicus', color='darkorchid') plt.ylim([-1000, 0]) plt.title('Temperature Profile on '+ str(argo_time[0])[0:13] + '\n [lon,lat] = [' \ + str(np.round(argo_lon[0],3)) +',' +\ str(np.round(argo_lat[0],3))+']',\ fontsize=16) plt.ylabel('Depth (m)', fontsize=14) plt.xlabel('$^oC$', fontsize=14) plt.legend(loc='lower right', fontsize=14) file = folder_fig + 'ARGO_vs_GOFS_RTOFS_COP_temp_' + str(id) plt.savefig(file, bbox_inches='tight', pad_inches=0.1) # Figure salt plt.figure(figsize=(5, 6)) plt.plot(argo_salt, -argo_pres, '.-', linewidth=2, label='ARGO Float id ' + str(id)) plt.plot(salt_GOFS, -depth_GOFS, '.-', linewidth=2, label='GOFS 3.1', color='red') plt.plot(salt_RTOFS, -depth_RTOFS, '.-', linewidth=2, label='RTOFS', color='g') plt.plot(salt_COP, -depth_COP, '.-', linewidth=2, label='Copernicus', color='darkorchid') plt.ylim([-1000, 0]) plt.title('Salinity Profile on '+ str(argo_time[0])[0:13] + '\n [lon,lat] = [' \ + str(np.round(argo_lon[0],3)) +',' +\ str(np.round(argo_lat[0],3))+']',\ fontsize=16) plt.ylabel('Depth (m)', fontsize=14) plt.legend(loc='lower right', fontsize=14) file = folder_fig + 'ARGO_vs_GOFS_RTOFS_COP_salt_' + str(id) plt.savefig(file, bbox_inches='tight', pad_inches=0.1)
def get_coordinates(df, **kw): ''' Example ERDDAP TableDAP URL: dataset_url = '%s/tabledap/%s.csvp?latitude,longitude,time&longitude>=-72.0&longitude<=-69&latitude>=38&latitude<=41&time>=1278720000.0&time<=1470787200.0&distinct()' % (all_datasets['server'].iloc[int(i)],all_datasets['Dataset ID'].iloc[int(i)]) ''' df_coords = pd.DataFrame() # alternate approach to above is iterate the original DataFrame passed (df), stopping either # at final_dataset_limit (10 currently) or the max # of rows in df (conclusion of for loop) # previous enclosing while loop is unnecessary as a result final_dataset_limit = 10 datasets_found = 0 if df.shape[0] < final_dataset_limit: final_dataset_limit = df.shape[0] index_random = random.sample(range(0, df.shape[0]), df.shape[0]) print("index_random: {}".format(index_random)) #for i in range(subset_datasets.shape[0]): for i in index_random: server_url = df['server'].iloc[int(i)] dataset_id = df['Dataset ID'].iloc[int(i)] institution = df['Institution'].iloc[int(i)] # skip some difficult datasets for now: if "ROMS" in dataset_id or "DOP" in dataset_id: # skip ROMS model output #print("Skipping %s" % server_url + dataset_id) continue e = ERDDAP(server=server_url, protocol='tabledap', response='csv') try: print("datasets_found: {}".format(datasets_found)) # former config for query, replaced with new code below: #e.variables=["latitude","longitude"]#,"time"] #e.dataset_id = all_datasets['Dataset ID'].iloc[int(i)] #e.constraints = { # "time>=": kw['min_time'], # "time<=": kw['max_time'], # "longitude>=": kw['min_lon'], # "longitude<=": kw['max_lon'], # "latitude>=": kw['min_lat'], # "latitude<=": kw['max_lat'], # "distinct" : () #} # Generate a download URL via e.get_download_url and pass to Pandas DataFrame via read_csv # we need to use e.constraints here rather than in e.get_download_url to allow appending '>=' '<=' to the contstraints keys to match ERDDAP's API # (parameter signature differs from the search API used above) # also add a 'distinct = ()' param, generate a download url, and submit a csv dataset download request to ERDDAP #kw["distinct"] = "()" e.constraints = { "time>=": kw['min_time'], "time<=": kw['max_time'], "longitude>=": kw['min_lon'], "longitude<=": kw['max_lon'], "latitude>=": kw['min_lat'], "latitude<=": kw['max_lat'], "distinct": () } url = e.get_download_url( #constraints=kw, response="csvp", dataset_id=df['Dataset ID'].iloc[int(i)], variables=["latitude", "longitude"]) print("Download URL: {}".format(url)) #coords = pd.read_csv(url, headers=headers) coords = pd.read_csv(url) coords['dataset_count'] = i coords['dataset_download_url'] = url coords['Dataset ID'] = dataset_id coords['Institution'] = institution metadata_url = e.get_info_url( dataset_id=df['Dataset ID'].iloc[int(i)], response='csv') metadata = pd.read_csv(metadata_url) coords['cdm_data_type'] = "".join( metadata.loc[metadata["Attribute Name"] == "cdm_data_type", "Value"]) #get_var_by_attr example (ToDo): #e.get_var_by_attr(dataset_id, standard_name='northward_sea_water_velocity') print(coords.head()) df_coords = pd.concat([df_coords, coords]) # reaching this point in the query means the dataset query was successful, increment # we need to break out of for loop here however if we reach final_dataset_limit to not go over: datasets_found += 1 print("new dataset acquired; datasets_found: {}".format( datasets_found)) if datasets_found == final_dataset_limit: break except Exception as ex: # can happen if the dataset does not have any features within the query window, just log it here: if type(ex).__name__ in ["HTTPError"]: print(ex) #raise pass return df_coords
check = os.path.exists(path) if not check: os.makedirs(path) else: print(f'"{path}" already exists' ) # Need to put everything together into a dataframe data_availability = pd.DataFrame(columns=erd.variables) for i in range(len(days)-1): # Get the time constraints min_time = days[i] max_time = days[i+1] erd.constraints = { 'time>=': min_time, 'time<=': max_time } # Query the relevant data and put into a dataframe data_subset = get_erddap_data(0, erd) # Rename the columns to be consistent with variable names for col in data_subset.columns: data_subset.rename(columns={col: col.split()[0]}, inplace=True) # Get the fill_values fill_values = get_fill_values(erd) # Calculate the % available data for each variable in the day time period available = {} for var in erd.variables:
for i in range(len(wmo_mb)): print(wmo_mb[i]) e = ERDDAP( server='http://osmc.noaa.gov/erddap', protocol='tabledap', ) e.response = 'csv' e.dataset_id = 'OSMC_30day' e.constraints = { 'time>=': str(x.tm_year)+"-"+str(x.tm_mon).zfill(2)+"-"+str(x.tm_mday).zfill(2)+"T"+str(x.tm_hour).zfill(2)+":00:00Z", 'longitude>=': -80.0, 'longitude<=': 80.0, 'platform_type=': "DRIFTING BUOYS (GENERIC)", 'platform_code=': str(wmo_mb[i]), } e.variables = [ 'platform_code', 'time', 'latitude', 'longitude', 'sst', 'slp', ] try: df = e.to_pandas() except: print("Não há dados para o WMO "+str(wmo_mb[i]))
In this notebook we will explore the statistics of the messages IOOS is releasing to GTS. The first step is to download the data. We will use an ERDDAP server that [hosts the CSV files](https://ferret.pmel.noaa.gov/generic/erddap/files/ioos_obs_counts/) with the ingest data. from datetime import date from erddapy import ERDDAP server = "http://osmc.noaa.gov/erddap" e = ERDDAP(server=server, protocol="tabledap") e.dataset_id = "ioos_obs_counts" e.variables = ["time", "locationID", "region", "sponsor", "met", "wave"] e.constraints = { "time>=": "2019-09", "time<": "2020-11", } df = e.to_pandas(parse_dates=True) df["locationID"] = df["locationID"].str.lower() df.tail() The table has all the ingest data from 2019-01-01 to 2020-06-01. We can now explore it grouping the data by IOOS Regional Association (RA). groups = df.groupby("region") ax = groups.sum().plot(kind="bar", figsize=(11, 3.75)) ax.yaxis.get_major_formatter().set_scientific(False) ax.set_ylabel("# observations");