def __init__(self, path=None, xlim=None, ylim=None, tlim=None): # Initializes the variables to default values. The indices 'n', 'k', # 'j' and 'i' refer to the temporal, height, meridional and zonal # coordinates respectively. If one of these indexes is set to 'None', # then it is assumed infinite size, which is relevant for the 'time' # coordinate. self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict() self.data = dict() self.stencil_coeffs = dict() self.stencil_params = dict() self.alias = dict() # Sets global parameters for grid. if path == None: path = ('/home/sebastian/academia/data/ncep.reanalysis2/' 'gaussian_grid') self.params['path'] = path self.params['var_list'] = [] self.params['year_list'] = [] # Generates list of files, tries to match them to the pattern and to # extract the time. To help understanding the naming convetion and # pattern, see the following example: # uwnd.2015.nc file_pattern = '(.*).([0-9]{4}).nc' flist = listdir(self.params['path']) flist, match = reglist(flist, file_pattern) self.params['file_list'] = flist # Gets list of variables from file match. _vars, _years = zip(*match) self.params['var_list'] = unique(_vars) self.params['year_list'] = unique(_years) # Loads data from first variable and loads longitude and latitude data. # We assume that all data is homogeneous throughout the dataset. Then # walks through each year and loads time vector. _var = self.params['var_list'][0] for _i, _year in enumerate(self.params['year_list']): fname = '{}.{}.nc'.format(_var, _year) try: data.close() except: pass data = self._open_file(fname) # if _i == 0: lon = data.variables['lon'].data lat = data.variables['lat'].data time = data.variables['time'].data else: time = hstack([time, data.variables['time'].data]) # Time in dataset is given in `hours since 1800-1-1 00:00:0.0` and we # convert it to matplotlib's date format. if data.variables['time'].units == 'hours since 1800-1-1 00:00:0.0': self.params['t0'] = dates.date2num(dates.datetime.datetime(1800, 1, 1, 0, 0)) time = self.params['t0'] + time / 24. # If lon_0 is set, calculate how many indices have to be moved in # order for latitude array to start at lon_0. lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(lon, lat, xlim, ylim) self.params['xlim'], self.params['ylim'] = xlim, ylim self.params['lon_i'], self.params['lat_j'] = ii, jj # Initializes the grid attributes, dimensions, coordinates and # variables. self.name = 'ncep_reanalysis' self.description = ('NCEP Reanalysis project is analysis/forecast ' 'system to perform data assimilation using past data from 1979 ' 'owards.') self.attributes['institution'] = data.institution self.dimensions = dict(n=time.size, k=0, j=lat.size, i=lon.size) self.coordinates = dict(n='time', k='height', j='latitude', i='longitude') self.variables = dict( time = atlantis.data.Variable(), height = atlantis.data.get_standard_variable('height'), latitude = atlantis.data.get_standard_variable('latitude'), longitude = atlantis.data.get_standard_variable('longitude'), xm = atlantis.data.Variable(), ym = atlantis.data.Variable(), ) # self.variables['time'].data = time self.variables['time'].canonical_units = 'days since 0001-01-01 UTC' # self.variables['height'].data = 0. self.variables['latitude'].data = lat self.variables['longitude'].data = lon # self.variables['xm'].canonical_units = 'km' self.variables['xm'].description = 'Zonal distance.' self.variables['ym'].canonical_units = 'km' self.variables['ym'].description = 'Meridional distance.' self.variables['xm'].data, self.variables['ym'].data = ( metergrid(self.variables['longitude'].data, self.variables['latitude'].data, units='km') ) # data.close() # Walks through each variable file for the first year, reads their # attributes and adds to the dataset definition. self._message('\n') _year = self.params['year_list'][0] for _var in self.params['var_list']: fname = '{}.{}.nc'.format(_var, _year) data = self._open_file(fname) self._message('{}: '.format(_var)) for _key in data.variables.keys(): self._message('{} '.format(_key)) if _key in ['time', 'time_bnds', 'level', 'level_bnds', 'lat', 'lon']: continue try: self.variables[_key] = atlantis.data.get_standard_variable( data.variables[_key].standard_name, units=data.variables[_key].units, long_name=data.variables[_key].long_name, ) except: self._message('* ') self.variables[_key] = atlantis.data.Variable( units=data.variables[_key].units, standard_name=data.variables[_key].standard_name, long_name=data.variables[_key].long_name, description=data.variables[_key].var_desc, ) self.alias[_key] = _var # self._message('\n') data.close() # return
def __init__(self, path=None, mask_file=None, xlim=None, ylim=None, tlim=None, useqd=False): # Initializes the variables to default values. The indices 'n', 'k', # 'j' and 'i' refer to the temporal, height, meridional and zonal # coordinates respectively. If one of these indexes is set to 'None', # then it is assumed infinite size, which is relevant for the 'time' # coordinate. self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict() self.stencil_coeffs = dict() self.stencil_params = dict() if useqd: self.params['datasets'] = [dict(id='h', var='h_qd')] self.params['var_dict'] = dict(h_Grid_0001 = 'h_qd') self.params['var_tcid'] = dict(h_qd=['h', 'h_qd', 'Grid_0001']) else: self.params['datasets'] = [dict(id='h', var='h'), dict(id='uv', var='uv'), dict(id='err', var='err')] self.params['var_dict'] = dict( h_Grid_0001 = 'h', uv_Grid_0001 = 'u', uv_Grid_0002 = 'v', err_Grid_0001 = 'err' ) self.params['var_tcid'] = dict( h = ['h', 'h', 'Grid_0001'], u = ['uv', 'uv', 'Grid_0001'], v = ['uv', 'uv', 'Grid_0002'], err = ['err', 'err', 'Grid_0001'] ) # Creates an universally unique identifiers (UUID) for this instance self.params['uuid'] = str(uuid()) # Sets global parameters for grid. if path == None: path = ('/home/sebastian/academia/data/aviso/msla/merged') self.params['path'] = path self.params['mask_file'] = mask_file self.params['missing_value'] = -9999. # Generates list of files, tries to match them to the pattern and to # extract the time. file_pattern = ('dt_ref_global_merged_msla_(%s)_(\d*)_(\d*)_(\d*)' '.nc.gz' % ('|'.join([item['var'] for item in self.params['datasets']]))) flist = listdir('%s/%s' % (self.params['path'], self.params['datasets'][0]['id'])) flist.sort() flist, match = reglist(flist, file_pattern) # Convert dates to matplotlib format, i.e. days since 0001-01-01 UTC. time_list = array(dates.datestr2num(['%4s-%2s-%2s 12:00' % (item[1][:4], item[1][4:6], item[1][6:]) for item in match])) # If tlim are set, calculate the time limits of the dataset and # corresponding files. if tlim != None: for i, t in enumerate(tlim): if type(t) == str: tlim[i] = dates.datestr2num(t) # t_sel = flatnonzero(((time_list >= tlim[0]) & (time_list <= tlim[1]))) time_list = time_list[t_sel] else: t_sel = range(len(time_list)) fdict = [dict(start=match[n][1], end=match[n][2], creation=match[n][3]) for n in t_sel] self.params['file_list'] = fdict if len(flist) == 0: return # Reads first file in dataset to determine array geometry and # dimenstions (lon, lat) params = dict(path=self.params['path'], dataset=self.params['datasets'][0]['id'], datavar=self.params['datasets'][0]['var'], **self.params['file_list'][0]) fname = self.create_filename(**params) data = self.read_file(fname) lat = data.variables['NbLatitudes'].data lon = data.variables['NbLongitudes'].data # If xlim and ylim are set, calculate how many indices have to be moved # in order for latitude array to start at xlim[0]. lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(lon, lat, xlim, ylim) self.params['xlim'], self.params['ylim'] = xlim, ylim self.params['lon_i'], self.params['lat_j'] = ii, jj self.params['dlon'] = lon[1] - lon[0] self.params['dlat'] = lat[1] - lat[0] # Initializes the grid attributes, dimensions, coordinates and # variables. self.name = 'sea_level_anomaly_geostrophic_velocities' for attr, attr_value in vars(data).iteritems(): if attr in ['mode', 'filename']: continue if type(attr_value) == str: if attr in ['name']: self.name = attr_value elif attr in ['description', 'summary', 'title']: self.description = attr_value else: self.attributes[attr.lower()] = attr_value self.dimensions = dict(n=time_list.size, k=1, j=lat.size, i=lon.size) self.coordinates = dict(n='time', k='height', j='latitude', i='longitude') # self.variables = dict( time = atlantis.data.variable( canonical_units='days since 0001-01-01 UTC', data=time_list, ), height = atlantis.data.get_standard_variable('height', data=[0.]), latitude = atlantis.data.get_standard_variable('latitude', data=lat), longitude = atlantis.data.get_standard_variable('longitude', data=lon), xm = atlantis.data.variable( canonical_units = 'km', description = 'Zonal distance.' ), ym = atlantis.data.variable( canonical_units = 'km', description = 'Meridional distance.' ), ) # self.variables['xm'].data, self.variables['ym'].data = ( metergrid(self.variables['longitude'].data, self.variables['latitude'].data, unit='km') ) # Walks through every dataset to read list of variables. self.params['var_list'] = list() for i, dataset in enumerate(self.params['datasets']): if i > 0: params = dict(path=self.params['path'], dataset=dataset['id'], datavar=dataset['var'], **self.params['file_list'][0]) fname = self.create_filename(**params) data = self.read_file(fname) # Walks through every variable in NetCDF file for var in data.variables.keys(): if var in ['Grid_0001', 'Grid_0002']: nvar = self.params['var_dict']['{0}_{1}'.format( dataset['id'], var)] attribs = dict( missing_value = data.variables[var]._FillValue, canonical_units = data.variables[var].units, description = data.variables[var].long_name, dataset = dataset, variable = var ) self.variables[nvar] = atlantis.data.variable(**attribs) self.params['var_list'].append(nvar) # Closes the data access and removes temporary NetCDF file self.close_file(data) return
def __init__(self, path=None, xlim=None, ylim=None, tlim=None): # Initializes the variables to default values. The indices 'n', 'k', # 'j' and 'i' refer to the temporal, height, meridional and zonal # coordinates respectively. If one of these indexes is set to 'None', # then it is assumed infinite size, which is relevant for the 'time' # coordinate. self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict() self.data = dict() self.stencil_coeffs = dict() self.stencil_params = dict() self.alias = dict() # Sets global parameters for grid. if path == None: path = ('/home/sebastian/academia/data/ncep.reanalysis2/' 'gaussian_grid') self.params['path'] = path self.params['var_list'] = [] self.params['year_list'] = [] # Generates list of files, tries to match them to the pattern and to # extract the time. To help understanding the naming convetion and # pattern, see the following example: # uwnd.2015.nc file_pattern = '(.*).([0-9]{4}).nc' flist = listdir(self.params['path']) flist, match = reglist(flist, file_pattern) self.params['file_list'] = flist # Gets list of variables from file match. _vars, _years = zip(*match) self.params['var_list'] = unique(_vars) self.params['year_list'] = unique(_years) # Loads data from first variable and loads longitude and latitude data. # We assume that all data is homogeneous throughout the dataset. Then # walks through each year and loads time vector. _var = self.params['var_list'][0] for _i, _year in enumerate(self.params['year_list']): fname = '{}.{}.nc'.format(_var, _year) try: data.close() except: pass data = self._open_file(fname) # if _i == 0: lon = data.variables['lon'].data lat = data.variables['lat'].data time = data.variables['time'].data else: time = hstack([time, data.variables['time'].data]) # Time in dataset is given in `hours since 1800-1-1 00:00:0.0` and we # convert it to matplotlib's date format. if data.variables['time'].units == 'hours since 1800-1-1 00:00:0.0': self.params['t0'] = dates.date2num( dates.datetime.datetime(1800, 1, 1, 0, 0)) time = self.params['t0'] + time / 24. # If lon_0 is set, calculate how many indices have to be moved in # order for latitude array to start at lon_0. lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits( lon, lat, xlim, ylim) self.params['xlim'], self.params['ylim'] = xlim, ylim self.params['lon_i'], self.params['lat_j'] = ii, jj # Initializes the grid attributes, dimensions, coordinates and # variables. self.name = 'ncep_reanalysis' self.description = ( 'NCEP Reanalysis project is analysis/forecast ' 'system to perform data assimilation using past data from 1979 ' 'owards.') self.attributes['institution'] = data.institution self.dimensions = dict(n=time.size, k=0, j=lat.size, i=lon.size) self.coordinates = dict(n='time', k='height', j='latitude', i='longitude') self.variables = dict( time=atlantis.data.Variable(), height=atlantis.data.get_standard_variable('height'), latitude=atlantis.data.get_standard_variable('latitude'), longitude=atlantis.data.get_standard_variable('longitude'), xm=atlantis.data.Variable(), ym=atlantis.data.Variable(), ) # self.variables['time'].data = time self.variables['time'].canonical_units = 'days since 0001-01-01 UTC' # self.variables['height'].data = 0. self.variables['latitude'].data = lat self.variables['longitude'].data = lon # self.variables['xm'].canonical_units = 'km' self.variables['xm'].description = 'Zonal distance.' self.variables['ym'].canonical_units = 'km' self.variables['ym'].description = 'Meridional distance.' self.variables['xm'].data, self.variables['ym'].data = (metergrid( self.variables['longitude'].data, self.variables['latitude'].data, units='km')) # data.close() # Walks through each variable file for the first year, reads their # attributes and adds to the dataset definition. self._message('\n') _year = self.params['year_list'][0] for _var in self.params['var_list']: fname = '{}.{}.nc'.format(_var, _year) data = self._open_file(fname) self._message('{}: '.format(_var)) for _key in data.variables.keys(): self._message('{} '.format(_key)) if _key in [ 'time', 'time_bnds', 'level', 'level_bnds', 'lat', 'lon' ]: continue try: self.variables[_key] = atlantis.data.get_standard_variable( data.variables[_key].standard_name, units=data.variables[_key].units, long_name=data.variables[_key].long_name, ) except: self._message('* ') self.variables[_key] = atlantis.data.Variable( units=data.variables[_key].units, standard_name=data.variables[_key].standard_name, long_name=data.variables[_key].long_name, description=data.variables[_key].var_desc, ) self.alias[_key] = _var # self._message('\n') data.close() # return
def __init__(self, path=None, mask_file=None, xlim=None, ylim=None): # Initializes the variables to default values. The indices 'n', 'k', # 'j' and 'i' refer to the temporal, height, meridional and zonal # coordinates respectively. If one of these indexes is set to 'None', # then it is assumed infinite size, which is relevant for the 'time' # coordinate. self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict() self.stencil_coeffs = dict() self.stencil_params = dict() # Sets global parameters for grid. if path == None: path = ('/home/sebastian/academia/data/ncdc.noaa/seawinds/stress/' 'daily') self.params['path'] = path self.params['mask_file'] = mask_file self.params['missing_value'] = -9999. # Generates list of files, tries to match them to the pattern and to # extract the time. file_pattern = 'tauxy([0-9]{8}).nc' flist = listdir(self.params['path']) flist, match = reglist(flist, file_pattern) self.params['file_list'] = flist if len(flist) == 0: return # Convert dates to matplotlib format, i.e. days since 0001-01-01 UTC. time_list = array([dates.datestr2num(item) for item in match]) # Reads first file in dataset to determine array geometry and # dimenstions (lon, lat) data = netcdf( '%s/%s' % (self.params['path'], self.params['file_list'][0]), 'r') for var in data.variables.keys(): if var in ['latitude', 'lat']: lat = data.variables[var].data elif var in ['longitude', 'lon']: lon = data.variables[var].data # If xlim and ylim are set, calculate how many indices have to be moved # in order for latitude array to start at xlim[0]. if (xlim != None) | (ylim != None): if xlim == None: xlim = (lon.min(), lon.max()) if ylim == None: ylim = (lat.min(), lat.max()) # LON = lon_n(lon, xlim[1]) i = argsort(LON) selx = i[flatnonzero((LON[i] >= xlim[0]) & (LON[i] <= xlim[1]))] sely = flatnonzero((lat >= ylim[0]) & (lat <= ylim[1])) ii, jj = meshgrid(selx, sely) lon = LON[selx] lat = lat[sely] self.params['xlim'] = xlim self.params['ylim'] = ylim self.params['lon_i'] = ii self.params['lat_j'] = jj self.params['dlon'] = lon[1] - lon[0] self.params['dlat'] = lat[1] - lat[0] # Initializes the grid attributes, dimensions, coordinates and # variables. self.name = 'sea_surface_wind_stress' for attr, attr_value in vars(data).iteritems(): if attr in ['mode', 'filename']: continue if type(attr_value) == str: if attr in ['name']: self.name = attr_value elif attr in ['description', 'summary']: self.description = attr_value else: self.attributes[attr.lower()] = attr_value self.dimensions = dict(n=time_list.size, k=1, j=lat.size, i=lon.size) self.coordinates = dict(n='time', k='height', j='latitude', i='longitude') # self.variables = dict( time=atlantis.data.variable( canonical_units='days since 0001-01-01 UTC', data=time_list, ), height=atlantis.data.get_standard_variable('height', data=[0.]), latitude=atlantis.data.get_standard_variable('latitude', data=lat), longitude=atlantis.data.get_standard_variable('longitude', data=lon), xm=atlantis.data.variable(canonical_units='km', description='Zonal distance.'), ym=atlantis.data.variable(canonical_units='km', description='Meridional distance.'), ) # self.variables['xm'].data, self.variables['ym'].data = (metergrid( self.variables['longitude'].data, self.variables['latitude'].data, unit='km')) # self.params['var_list'] = list() for var in data.variables.keys(): if var in ['tau', 'taux', 'tauy', 'tau_div', 'tau_curl']: attribs = dict() for attr, attr_value in vars(data.variables[var]).iteritems(): if attr == '_FillValue': attribs['missing_value'] = attr_value elif attr == 'data': continue elif attr == 'long_name': attribs['description'] = attr_value elif attr == 'units': if attr_value == 'N/m**2': a = 'N m-2' else: a = attr_value attribs['canonical_units'] = a else: attribs[attr] = attr_value self.variables[var] = atlantis.data.variable(**attribs) self.params['var_list'].append(var) if self.variables[var].missing_value == None: self.variables[var].missing_value = ( self.params['missing_value']) # data.close() return
def make_index(self, profile=True): """.""" t1 = time() if profile: s = '\rBuilding preliminary time array...' stdout.write(s) stdout.flush() time_mission = dict() time_dataset = dict() N = len(self.params['missions']) for i, mission in enumerate(self.params['missions']): t2 = time() tt1 = time() # mpath = '%s/%s' % (self.params['path'], mission) # Mission path ylist = listdir(mpath) # Year list in mission path Nyear = len(ylist) file_pattern = ('%s_%s_%s_%s_%s_(\d*)_(\d*)_(\d*)_(\d*)_(\d*)_' '(\d*).nc.gz') % ('GW', self.params['level'].upper(), self._labels[self.params['product']], self._labels[mission], self.params['delay'].upper()) # Initializes time mission dictionary time_mission[mission] = dict(data=[], file=[]) for j, yr in enumerate(ylist): tt2 = time() # Lists all the directories in year dlist = listdir('%s/%s' % (mpath, yr)) for dset in dlist: # Lists all the data files in mission in a given year and # matches it with the file pattern. cur_path = '%s/%s/%s' % (mpath, yr, dset) flist = listdir(cur_path) flist.sort() flist, match = reglist(flist, file_pattern) # Convert data and product dates to matplotlib format, i.e. # days since 0001-01-01 UTC and appends to the global # mission and dataset time dictionaries. for k, item in enumerate(match): datetime_start = dates.datestr2num( '%4s-%2s-%2s %2s:%2s:%2s' % (item[0][0:4], item[0][4:6], item[0][6:8], item[1][0:2], item[1][2:4], item[1][4:6]) ) datetime_end = dates.datestr2num( '%4s-%2s-%2s %2s:%2s:%2s' % (item[2][0:4], item[2][4:6], item[2][6:8], item[3][0:2], item[3][2:4], item[3][4:6]) ) time_data = (datetime_start + datetime_end) / 2. cycle = int(item[4]) orbit = int(item[5]) time_mission[mission]['data'].append(time_data) # fname = '%s/%s/%s' % (yr, dset, flist[k]) descriptor = (mission, dset, fname, cycle, orbit) if time_data not in time_dataset.keys(): time_dataset[time_data] = [descriptor] else: time_dataset[time_data].append(descriptor) # time_mission[mission]['file'].append(fname) # # Profiling if profile: s = '\rBuilding preliminary time array for %s: %s ' % ( self._missions[mission], profiler(Nyear, j+1, t0, tt1, tt2), ) stdout.write(s) stdout.flush() # time_mission[mission]['data'] = array( time_mission[mission]['data'] ) time_mission[mission]['file'] = array( time_mission[mission]['file'] ) # Profiling if profile: s = '\rBuilding preliminary time array... %s ' % (profiler(N, i+1, t0, t1, t2),) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() return time_mission, time_dataset
def make_index(self, profile=True): """.""" t1 = time() if profile: s = '\rBuilding preliminary time array...' stdout.write(s) stdout.flush() time_mission = dict() time_dataset = dict() N = len(self.params['missions']) for i, mission in enumerate(self.params['missions']): t2 = time() tt1 = time() # mpath = '%s/%s' % (self.params['path'], mission) # Mission path ylist = listdir(mpath) # Year list in mission path Nyear = len(ylist) file_pattern = ('%s_%s_%s_%s_%s_(\d*)_(\d*)_(\d*)_(\d*)_(\d*)_' '(\d*).nc.gz') % ('GW', self.params['level'].upper( ), self._labels[self.params['product']], self._labels[mission], self.params['delay'].upper()) # Initializes time mission dictionary time_mission[mission] = dict(data=[], file=[]) for j, yr in enumerate(ylist): tt2 = time() # Lists all the directories in year dlist = listdir('%s/%s' % (mpath, yr)) for dset in dlist: # Lists all the data files in mission in a given year and # matches it with the file pattern. cur_path = '%s/%s/%s' % (mpath, yr, dset) flist = listdir(cur_path) flist.sort() flist, match = reglist(flist, file_pattern) # Convert data and product dates to matplotlib format, i.e. # days since 0001-01-01 UTC and appends to the global # mission and dataset time dictionaries. for k, item in enumerate(match): datetime_start = dates.datestr2num( '%4s-%2s-%2s %2s:%2s:%2s' % (item[0][0:4], item[0][4:6], item[0][6:8], item[1][0:2], item[1][2:4], item[1][4:6])) datetime_end = dates.datestr2num( '%4s-%2s-%2s %2s:%2s:%2s' % (item[2][0:4], item[2][4:6], item[2][6:8], item[3][0:2], item[3][2:4], item[3][4:6])) time_data = (datetime_start + datetime_end) / 2. cycle = int(item[4]) orbit = int(item[5]) time_mission[mission]['data'].append(time_data) # fname = '%s/%s/%s' % (yr, dset, flist[k]) descriptor = (mission, dset, fname, cycle, orbit) if time_data not in time_dataset.keys(): time_dataset[time_data] = [descriptor] else: time_dataset[time_data].append(descriptor) # time_mission[mission]['file'].append(fname) # # Profiling if profile: s = '\rBuilding preliminary time array for %s: %s ' % ( self._missions[mission], profiler(Nyear, j + 1, t0, tt1, tt2), ) stdout.write(s) stdout.flush() # time_mission[mission]['data'] = array( time_mission[mission]['data']) time_mission[mission]['file'] = array( time_mission[mission]['file']) # Profiling if profile: s = '\rBuilding preliminary time array... %s ' % (profiler( N, i + 1, t0, t1, t2), ) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() return time_mission, time_dataset
def __init__(self, path=None, mask_file=None, xlim=None, ylim=None): # Initializes the variables to default values. The indices 'n', 'k', # 'j' and 'i' refer to the temporal, height, meridional and zonal # coordinates respectively. If one of these indexes is set to 'None', # then it is assumed infinite size, which is relevant for the 'time' # coordinate. self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict() self.stencil_coeffs = dict() self.stencil_params = dict() # Sets global parameters for grid. if path == None: path = ('/home/sebastian/academia/data/ncdc.noaa/seawinds/stress/' 'daily') self.params['path'] = path self.params['mask_file'] = mask_file self.params['missing_value'] = -9999. # Generates list of files, tries to match them to the pattern and to # extract the time. file_pattern = 'tauxy([0-9]{8}).nc' flist = listdir(self.params['path']) flist, match = reglist(flist, file_pattern) self.params['file_list'] = flist if len(flist) == 0: return # Convert dates to matplotlib format, i.e. days since 0001-01-01 UTC. time_list = array([dates.datestr2num(item) for item in match]) # Reads first file in dataset to determine array geometry and # dimenstions (lon, lat) data = netcdf('%s/%s' % (self.params['path'], self.params['file_list'][0]), 'r') for var in data.variables.keys(): if var in ['latitude', 'lat']: lat = data.variables[var].data elif var in ['longitude', 'lon']: lon = data.variables[var].data # If xlim and ylim are set, calculate how many indices have to be moved # in order for latitude array to start at xlim[0]. if (xlim != None) | (ylim != None): if xlim == None: xlim = (lon.min(), lon.max()) if ylim == None: ylim = (lat.min(), lat.max()) # LON = lon_n(lon, xlim[1]) i = argsort(LON) selx = i[flatnonzero((LON[i] >= xlim[0]) & (LON[i] <= xlim[1]))] sely = flatnonzero((lat >= ylim[0]) & (lat <= ylim[1])) ii, jj = meshgrid(selx, sely) lon = LON[selx] lat = lat[sely] self.params['xlim'] = xlim self.params['ylim'] = ylim self.params['lon_i'] = ii self.params['lat_j'] = jj self.params['dlon'] = lon[1] - lon[0] self.params['dlat'] = lat[1] - lat[0] # Initializes the grid attributes, dimensions, coordinates and # variables. self.name = 'sea_surface_wind_stress' for attr, attr_value in vars(data).iteritems(): if attr in ['mode', 'filename']: continue if type(attr_value) == str: if attr in ['name']: self.name = attr_value elif attr in ['description', 'summary']: self.description = attr_value else: self.attributes[attr.lower()] = attr_value self.dimensions = dict(n=time_list.size, k=1, j=lat.size, i=lon.size) self.coordinates = dict(n='time', k='height', j='latitude', i='longitude') # self.variables = dict( time = atlantis.data.variable( canonical_units='days since 0001-01-01 UTC', data=time_list, ), height = atlantis.data.get_standard_variable('height', data=[0.]), latitude = atlantis.data.get_standard_variable('latitude', data=lat), longitude = atlantis.data.get_standard_variable('longitude', data=lon), xm = atlantis.data.variable( canonical_units = 'km', description = 'Zonal distance.' ), ym = atlantis.data.variable( canonical_units = 'km', description = 'Meridional distance.' ), ) # self.variables['xm'].data, self.variables['ym'].data = ( metergrid(self.variables['longitude'].data, self.variables['latitude'].data, unit='km') ) # self.params['var_list'] = list() for var in data.variables.keys(): if var in ['tau', 'taux', 'tauy', 'tau_div', 'tau_curl']: attribs = dict() for attr, attr_value in vars(data.variables[var]).iteritems(): if attr == '_FillValue': attribs['missing_value'] = attr_value elif attr == 'data': continue elif attr == 'long_name': attribs['description'] = attr_value elif attr == 'units': if attr_value == 'N/m**2': a = 'N m-2' else: a = attr_value attribs['canonical_units'] = a else: attribs[attr] = attr_value self.variables[var] = atlantis.data.variable(**attribs) self.params['var_list'].append(var) if self.variables[var].missing_value == None: self.variables[var].missing_value = ( self.params['missing_value']) # data.close() return
def __init__(self, path=None, mask_file=None, xlim=None, ylim=None, tlim=None, useqd=False): # Initializes the variables to default values. The indices 'n', 'k', # 'j' and 'i' refer to the temporal, height, meridional and zonal # coordinates respectively. If one of these indexes is set to 'None', # then it is assumed infinite size, which is relevant for the 'time' # coordinate. self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict() self.stencil_coeffs = dict() self.stencil_params = dict() if useqd: self.params['datasets'] = [dict(id='h', var='h_qd')] self.params['var_dict'] = dict(h_Grid_0001='h_qd') self.params['var_tcid'] = dict(h_qd=['h', 'h_qd', 'Grid_0001']) else: self.params['datasets'] = [ dict(id='h', var='h'), dict(id='uv', var='uv'), dict(id='err', var='err') ] self.params['var_dict'] = dict(h_Grid_0001='h', uv_Grid_0001='u', uv_Grid_0002='v', err_Grid_0001='err') self.params['var_tcid'] = dict(h=['h', 'h', 'Grid_0001'], u=['uv', 'uv', 'Grid_0001'], v=['uv', 'uv', 'Grid_0002'], err=['err', 'err', 'Grid_0001']) # Creates an universally unique identifiers (UUID) for this instance self.params['uuid'] = str(uuid()) # Sets global parameters for grid. if path == None: path = ('/home/sebastian/academia/data/aviso/msla/merged') self.params['path'] = path self.params['mask_file'] = mask_file self.params['missing_value'] = -9999. # Generates list of files, tries to match them to the pattern and to # extract the time. file_pattern = ( 'dt_ref_global_merged_msla_(%s)_(\d*)_(\d*)_(\d*)' '.nc.gz' % ('|'.join([item['var'] for item in self.params['datasets']]))) flist = listdir( '%s/%s' % (self.params['path'], self.params['datasets'][0]['id'])) flist.sort() flist, match = reglist(flist, file_pattern) # Convert dates to matplotlib format, i.e. days since 0001-01-01 UTC. time_list = array( dates.datestr2num([ '%4s-%2s-%2s 12:00' % (item[1][:4], item[1][4:6], item[1][6:]) for item in match ])) # If tlim are set, calculate the time limits of the dataset and # corresponding files. if tlim != None: for i, t in enumerate(tlim): if type(t) == str: tlim[i] = dates.datestr2num(t) # t_sel = flatnonzero( ((time_list >= tlim[0]) & (time_list <= tlim[1]))) time_list = time_list[t_sel] else: t_sel = range(len(time_list)) fdict = [ dict(start=match[n][1], end=match[n][2], creation=match[n][3]) for n in t_sel ] self.params['file_list'] = fdict if len(flist) == 0: return # Reads first file in dataset to determine array geometry and # dimenstions (lon, lat) params = dict(path=self.params['path'], dataset=self.params['datasets'][0]['id'], datavar=self.params['datasets'][0]['var'], **self.params['file_list'][0]) fname = self.create_filename(**params) data = self.read_file(fname) lat = data.variables['NbLatitudes'].data lon = data.variables['NbLongitudes'].data # If xlim and ylim are set, calculate how many indices have to be moved # in order for latitude array to start at xlim[0]. lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits( lon, lat, xlim, ylim) self.params['xlim'], self.params['ylim'] = xlim, ylim self.params['lon_i'], self.params['lat_j'] = ii, jj self.params['dlon'] = lon[1] - lon[0] self.params['dlat'] = lat[1] - lat[0] # Initializes the grid attributes, dimensions, coordinates and # variables. self.name = 'sea_level_anomaly_geostrophic_velocities' for attr, attr_value in vars(data).iteritems(): if attr in ['mode', 'filename']: continue if type(attr_value) == str: if attr in ['name']: self.name = attr_value elif attr in ['description', 'summary', 'title']: self.description = attr_value else: self.attributes[attr.lower()] = attr_value self.dimensions = dict(n=time_list.size, k=1, j=lat.size, i=lon.size) self.coordinates = dict(n='time', k='height', j='latitude', i='longitude') # self.variables = dict( time=atlantis.data.variable( canonical_units='days since 0001-01-01 UTC', data=time_list, ), height=atlantis.data.get_standard_variable('height', data=[0.]), latitude=atlantis.data.get_standard_variable('latitude', data=lat), longitude=atlantis.data.get_standard_variable('longitude', data=lon), xm=atlantis.data.variable(canonical_units='km', description='Zonal distance.'), ym=atlantis.data.variable(canonical_units='km', description='Meridional distance.'), ) # self.variables['xm'].data, self.variables['ym'].data = (metergrid( self.variables['longitude'].data, self.variables['latitude'].data, unit='km')) # Walks through every dataset to read list of variables. self.params['var_list'] = list() for i, dataset in enumerate(self.params['datasets']): if i > 0: params = dict(path=self.params['path'], dataset=dataset['id'], datavar=dataset['var'], **self.params['file_list'][0]) fname = self.create_filename(**params) data = self.read_file(fname) # Walks through every variable in NetCDF file for var in data.variables.keys(): if var in ['Grid_0001', 'Grid_0002']: nvar = self.params['var_dict']['{0}_{1}'.format( dataset['id'], var)] attribs = dict( missing_value=data.variables[var]._FillValue, canonical_units=data.variables[var].units, description=data.variables[var].long_name, dataset=dataset, variable=var) self.variables[nvar] = atlantis.data.variable(**attribs) self.params['var_list'].append(nvar) # Closes the data access and removes temporary NetCDF file self.close_file(data) return
def __init__(self, delay='dt', missions=None, zone='global', product='sla', variable='vxxc', path=None, profile=True): """ Initializes the dataset class for reading along-track gridded sequential data from the SSALTO/DUACS distributed by Aviso. PARAMETERS delay (text, optional) : Selects whether delayed time products (dt, default) or near-real time products are read. missions (text, array like, optional) : Determines the satellite missions to be selected (i. e. e1, e2, tp, tpn, g2, j1, j1n, j2, en, enn, c2, al) If set to 'none', all available missions are used. zone (text, optional) : Geographic coverage of the selected products, global -- Global geographic coverage; med -- Mediterranean; blacksea -- Black Sea; moz -- Mozambique; arctic -- Arctic; europe -- Europe. product (text, optional) : Variable to be read (sla -- sea level anomaly or adt -- absolute dynamic topography) variable (text, optional) : Either 'vfec' for validated, filtered, sub-sampled and LWE-corrected; or 'vxxc' for validated, non-filtered, non-sub-sampled and LWE-corrected data. path (text, optional) : Path to the dataset files. """ t0 = time() # Checks all the input parameters for consistency if delay not in self._delays.keys(): raise ValueError('Invalid delay parameter "%s".' % (delay)) if missions == None: missions = self._missions.keys() elif type(missions) == str: if missions in self._missions.keys(): missions = [missions] else: raise ValueError('Invalid mission "%s".' % (missions)) elif type(missions) == list: for item in missions: if item not in self._missions.keys(): raise ValueError('Invalid mission "%s".' % (item)) else: raise ValueError('Invalid mission "%s".' % (missions)) if zone not in self._zones.keys(): raise ValueError('Invalid geographic zone "%s".' % (zone)) if product not in self._products.keys(): raise ValueError('Invalid product "%s".' % (product)) if variable not in self._filterings.keys(): raise ValueError('Invalid variable "%s".' % (variable)) # Initializes parameters and attributes in class variable self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict( delay = delay, missions = missions, zone = zone, product = product, variable = variable ) # Creates an universally unique identifiers (UUID) for this instance self.params['uuid'] = str(uuid()) # Sets path and missing value parameters if path == None: path = '%s/%s/%s/%s/%s' % ('/academia/data/raw/aviso', self._delays[delay], 'along-track', self._filterings[variable], product) self.params['path'] = path self.params['missing_value'] = -9999. # Determines the temporal range of the whole data set per mission t1 = time() if profile: s = '\rBuilding preliminary time array...' stdout.write(s) stdout.flush() time_mission = dict() time_dataset = dict() N = len(self.params['missions']) for i, mission in enumerate(self.params['missions']): t2 = time() # mpath = '%s/%s' % (path, mission) # Mission path ylist = listdir(mpath) # Year list in mission path file_pattern = '%s_%s_%s_%s_%s_(\d*)_(\d*).nc.gz' % (delay, zone, mission, product, variable) time_mission[mission] = dict(data=[], product=[], file=[]) for yr in ylist: # Lists all the data files in mission in a given year and # matches it with the file pattern. flist = listdir('%s/%s' % (mpath, yr)) flist.sort() flist, match = reglist(flist, file_pattern) # Convert data and product dates to matplotlib format, i.e. # days since 0001-01-01 UTC and appends to the global mission # and dataset time dictionaries. for j, item in enumerate(match): time_data = dates.datestr2num('%4s-%2s-%2s 12:00' % (item[0][:4], item[0][4:6], item[0][6:])) time_mission[mission]['data'].append(time_data) fname = '%s/%s' % (yr, flist[j]) descriptor = (mission, fname) if time_data not in time_dataset.keys(): time_dataset[time_data] = [descriptor] else: time_dataset[time_data].append(descriptor) # time_product = dates.datestr2num('%4s-%2s-%2s 12:00' % (item[1][:4], item[1][4:6], item[1][6:])) time_mission[mission]['product'].append(time_product) # time_mission[mission]['file'].append(fname) # time_mission[mission]['data'] = array( time_mission[mission]['data'] ) time_mission[mission]['product'] = array( time_mission[mission]['product'] ) time_mission[mission]['file'] = array( time_mission[mission]['file'] ) # Profiling if profile: s = '\rBuilding preliminary time array... %s ' % (profiler(N, i+1, t0, t1, t2),) stdout.write(s) stdout.flush() # if profile: stdout.write('\n') stdout.flush() # self.attributes['time_mission'] = time_mission self.attributes['time_dataset'] = time_dataset # Updates dimensions, coordinates and creates time variable self.dimensions['n'] = len(time_dataset) self.coordinates['n'] ='time' self.variables['time'] = atlantis.data.variable( canonical_units = 'days since 0001-01-01 UTC', data = array(sorted(time_dataset.keys())), height = atlantis.data.get_standard_variable('height', data=[0.]), latitude = atlantis.data.get_standard_variable('latitude'), longitude = atlantis.data.get_standard_variable('longitude'), ) return None