def setUp(self): ''' create two test variables ''' # create axis and variable instances (make *copies* of data and attributes!) x1 = np.random.randn(180) xax1 = Axis(name='X1-Axis', units='X Units', length=len(x1)) var1 = Variable(axes=(xax1, ), data=x1.copy(), atts=dict(name='blue', units='units')) self.var1 = var1 self.xax1 = xax1 x2 = np.random.randn(180) xax2 = Axis(name='X2-Axis', units='X Units', length=len(x2)) var2 = Variable(name='purple', units='units', axes=(xax2, ), data=x2) self.var2 = var2 self.xax2 = xax2 # actual normal distribution self.dist = 'norm' distvar = VarRV(name=self.dist, units='units', dist=self.dist, params=(0, 1)) self.distVar = distvar # add to list self.vars = [var1, var2] self.axes = [xax1, xax2]
def setUp(self): ''' create two test variables ''' # create axis and variable instances (make *copies* of data and attributes!) x1 = np.linspace(0,10,15); x2 = np.linspace(2,8,18); if self.ldatetime: start_datetime, end_datetime = pd.to_datetime('1981-05-01'), pd.to_datetime('1981-05-16') t1 = np.arange(start_datetime, end_datetime, dtype='datetime64[D]') xax1 = Axis(name='Time1-Axis', units='X Time', coord=t1) t2 = np.arange(start_datetime, end_datetime+np.timedelta64(3, 'D'), dtype='datetime64[D]') xax2 = Axis(name='Time2-Axis', units='X Time', coord=t2) else: xax1 = Axis(name='X1-Axis', units='X Units', coord=x1) xax2 = Axis(name='X2-Axis', units='X Units', coord=x2) var0 = Variable(axes=(xax1,), data=np.sin(x1), atts=dict(name='relative', units='')) var1 = Variable(axes=(xax1,), data=x1.copy(), atts=dict(name='blue', units='units')) self.var0 = var0; self.var1 = var1; self.xax1 = xax1 var2 = Variable(name='purple',units='units',axes=(xax2,), data=(x2**2)/5.) self.var2 = var2; self.xax2 = xax2 # create error variables with random noise noise1 = np.random.rand(len(xax1))*var1.data_array.std()/2. err1 = Variable(axes=(xax1,), data=noise1, atts=dict(name='blue_std', units='units')) noise2 = np.random.rand(len(xax2))*var2.data_array.std()/2. err2 = Variable(name='purple',units='units',axes=(xax2,), data=noise2) self.err1 = err1; self.err2 = err2 # add to list self.vars = [var1, var2] self.errs = [err1, err2] self.axes = [xax1, xax2]
def timeAxis(start_date=None, end_date=None, sampling=None, date_range=None, time_axis=None, llastIncl=True, ntime=None, varatts=None): ''' figure out type and dimensions of time axis ''' # check time input if date_range: start_date, end_date, sampling = date_range if start_date and end_date and sampling: start_year, start_month, start_day = convertDate(start_date) start_datetime = np.datetime64( dt.datetime(year=start_year, month=start_month, day=start_day), sampling) end_year, end_month, end_day = convertDate(end_date) end_datetime = np.datetime64( dt.datetime(year=end_year, month=end_month, day=end_day), sampling) if llastIncl: end_datetime += np.timedelta64(1, sampling) date_range = np.arange(start_datetime, end_datetime, dtype='datetime64[{}]'.format(sampling)) assert date_range[0] == start_datetime, date_range[0] if ntime: if ntime > len(date_range): raise ArgumentError(date_range) else: # trim date_range = date_range[0:ntime] else: ntime = len(date_range) elif time_axis == 'datetime': raise ArgumentError('Insufficient time axis information!') # construct time axis atts = varatts['time'] if time_axis.lower() == 'simple': time = Axis(atts=atts, coord=np.arange(1, ntime + 1)) elif time_axis.lower() == 'datetime': if sampling.lower() == 'y' or sampling.lower() == '1y': units = 'year' elif sampling.lower() == 'm' or sampling.lower() == '1m': units = 'month' elif sampling.lower() == 'd' or sampling.lower() == '1d': units = 'day' elif sampling.lower() == 'h' or sampling.lower() == '1h': units = 'hour' else: units = sampling long_name = '{}s since {}'.format(units.title(), str( date_range[0])) # hope this makes sense... atts.update(long_name=long_name, units=units) time = Axis(atts=atts, coord=date_range) else: raise ArgumentError(time_axis) # return time axis return time
def setUp(self): ''' create a 2D test variable ''' # create axis and variable instances (make *copies* of data and attributes!) xax = Axis(name='X-Axis', units='X Units', coord=np.linspace(0,10,15)) yax = Axis(name='Y-Axis', units='Y Units', coord=np.linspace(2,8,18)) xx,yy = np.meshgrid(yax[:],xax[:],) # create mesh (transposed w.r.t. values) var0 = Variable(axes=(xax,yax), data=np.sin(xx)*np.cos(yy), atts=dict(name='Color', units='Color Units')) var1 = Variable(axes=(xax,yax), data=np.cos(xx)*np.sin(yy), atts=dict(name='Contour', units='Contour Units')) self.var0 = var0; self.var1 = var1; self.xax = xax; self.yax = yax # add to list self.axes = [xax, yax] self.vars = [var0, var1]
def loadNARR_LTM(name=dataset_name, varlist=None, grid=None, interval='monthly', varatts=None, filelist=None, folder=ltmfolder): ''' Get a properly formatted dataset of daily or monthly NARR climatologies (LTM). ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # prepare input if varatts is None: varatts = ltmvaratts.copy() if varlist is None: varlist = ltmvarlist if interval == 'monthly': pfx = '.mon.ltm.nc'; tlen = 12 elif interval == 'daily': pfx = '.day.ltm.nc'; tlen = 365 else: raise DatasetError, "Selected interval '%s' is not supported!"%interval # translate varlist if varlist and varatts: varlist = translateVarNames(varlist, varatts) # axes dictionary, primarily to override time axis axes = dict(time=Axis(name='time',units='day',coord=(1,tlen,tlen)),load=True) if filelist is None: # generate default filelist filelist = [special[var]+pfx if var in special else var+pfx for var in varlist if var not in nofile] # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, axes=axes, atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC') # add projection projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name)) dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, folder=grid_folder) else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder raise NotImplementedError, "Need to implement loading neatly formatted and regridded time-series!" # return formatted dataset return dataset
def loadNARR_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted NARR dataset with monthly mean time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: varlist = tsvarlist if varlist and varatts: varlist = translateVarNames(varlist, varatts) if filelist is None: # generate default filelist filelist = [orig_ts_file.format(special[var]) if var in special else orig_ts_file.format(var) for var in varlist if var not in nofile and var in varatts] # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC') # replace time axis with number of month since Jan 1979 data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add projection projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name)) dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, gridfolder=grid_folder) else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
def setUp(self): ''' create two test variables ''' # create axis and variable instances (make *copies* of data and attributes!) x1 = np.linspace(0, 10, 11) xax1 = Axis(name='X1-Axis', units='X Units', coord=x1) var0 = Variable(axes=(xax1, ), data=np.sin(x1), atts=dict(name='relative', units='')) var1 = Variable(axes=(xax1, ), data=x1.copy(), atts=dict(name='blue', units='units')) self.var0 = var0 self.var1 = var1 self.xax1 = xax1 x2 = np.linspace(2, 8, 13) xax2 = Axis(name='X2-Axis', units='X Units', coord=x2) var2 = Variable(name='purple', units='units', axes=(xax2, ), data=(x2**2) / 5.) self.var2 = var2 self.xax2 = xax2 # create error variables with random noise noise1 = np.random.rand(len(xax1)) * var1.data_array.std() / 2. err1 = Variable(axes=(xax1, ), data=noise1, atts=dict(name='blue_std', units='units')) noise2 = np.random.rand(len(xax2)) * var2.data_array.std() / 2. err2 = Variable(name='purple', units='units', axes=(xax2, ), data=noise2) self.err1 = err1 self.err2 = err2 # add to list self.vars = [var1, var2] self.errs = [err1, err2] self.axes = [xax1, xax2]
def setUp(self): ''' create a reference and two test variables for Taylor plot''' self.thetamin = 0.; self.Rmin = 0.; self.thetamax = np.pi/2.; self.Rmax = 2. # create axis and variable instances (make *copies* of data and attributes!) self.x1 = np.linspace(0,10,11); self.xax1 = Axis(name='X1-Axis', units='X Units', coord=self.x1) self.data0 = np.sin(self.x1) self.var0 = Variable(axes=(self.xax1,), data=self.data0, atts=dict(name='Reference', units='units')) # create error variables with random noise self.data1 = self.data0 + ( np.random.rand(len(self.xax1))-0.5 )*0.5 self.var1 = Variable(axes=(self.xax1,), data=self.data1, atts=dict(name='Blue', units='units')) self.data2 = self.data0 + ( np.random.rand(len(self.xax1))-0.5 )*1.5 self.var2 = Variable(axes=(self.xax1,), data=self.data2, atts=dict(name='Red', units='units')) self.data3 = 1. + np.random.rand(len(self.xax1))*1.5 self.var3 = Variable(axes=(self.xax1,), data=self.data3, atts=dict(name='Random', units='units')) # add to list self.vars = [self.var0, self.var1, self.var2, self.var3] self.data = [self.data0, self.data1, self.data2, self.data3] self.axes = [self.xax1,]
def setUp(self): ''' create two test variables ''' # define plot ranges self.thetamin = 0.; self.Rmin = 0.; self.thetamax = 2*np.pi; self.Rmax = 2. # create theta axis and variable instances (values are radius values, I believe) theta1 = np.linspace(self.thetamin,self.thetamax,361) thax1 = Axis(atts=dict(name='$\\theta$-Axis', units='Radians'), coord=theta1) var0 = Variable(axes=(thax1,), data=np.sin(theta1), atts=dict(name='Blue', units='units')) tmp = theta1.copy()*(self.Rmax-self.Rmin)/(self.thetamax-self.thetamin) var1 = Variable(axes=(thax1,), data=tmp, atts=dict(name='Red', units='units')) self.var0 = var0; self.var1 = var1; self.xax1 = theta1 # create error variables with random noise noise0 = np.random.rand(len(thax1))*var0.data_array.std()/2. err0 = Variable(axes=(thax1,), data=noise0, atts=dict(name='Blue Noise', units='units')) noise1 = np.random.rand(len(thax1))*var1.data_array.std()/2. err1 = Variable(axes=(thax1,), data=noise1, atts=dict(name='Red Noise', units='units')) self.err1 = err1; self.err0 = err0 # add to list self.vars = [var0, var1] self.errs = [err0, err1] self.axes = [thax1,]
def loadHGS_StnTS(station=None, varlist=None, varatts=None, folder=None, name=None, title=None, start_date=None, end_date=None, run_period=15, period=None, lskipNaN=False, lcheckComplete=True, basin=None, WSC_station=None, basin_list=None, filename=None, prefix=None, scalefactors=None, **kwargs): ''' Get a properly formatted WRF dataset with monthly time-series at station locations; as in the hgsrun module, the capitalized kwargs can be used to construct folders and/or names ''' if folder is None or ( filename is None and station is None ): raise ArgumentError # try to find meta data for gage station from WSC HGS_station = station if basin is not None and basin_list is not None: station_name = station station = getGageStation(basin=basin, station=station if WSC_station is None else WSC_station, basin_list=basin_list) # only works with registered basins if station_name is None: station_name = station.name # backup, in case we don't have a HGS station name metadata = station.getMetaData() # load station meta data if metadata is None: raise GageStationError(name) else: metadata = dict(); station = None; station_name = None # prepare name expansion arguments (all capitalized) expargs = dict(ROOT_FOLDER=root_folder, STATION=HGS_station, NAME=name, TITLE=title, PREFIX=prefix, BASIN=basin, WSC_STATION=WSC_station) for key,value in metadata.items(): if isinstance(value,basestring): expargs['WSC_'+key.upper()] = value # in particular, this includes WSC_ID if 'WSC_ID' in expargs: if expargs['WSC_ID'][0] == '0': expargs['WSC_ID0'] = expargs['WSC_ID'][1:] else: raise DatasetError('Expected leading zero in WSC station ID: {}'.format(expargs['WSC_ID'])) # exparg preset keys will get overwritten if capitalized versions are defined for key,value in kwargs.items(): KEY = key.upper() # we only use capitalized keywords, and non-capitalized keywords are only used/converted if KEY == key or KEY not in kwargs: expargs[KEY] = value # if no capitalized version is defined # read folder and infer prefix, if necessary folder = folder.format(**expargs) if not os.path.exists(folder): raise IOError(folder) if expargs['PREFIX'] is None: with open('{}/{}'.format(folder,prefix_file), 'r') as pfx: expargs['PREFIX'] = prefix = ''.join(pfx.readlines()).strip() # now assemble file name for station timeseries filename = filename.format(**expargs) filepath = '{}/{}'.format(folder,filename) if not os.path.exists(filepath): IOError(filepath) if station_name is None: station_name = filename[filename.index('hydrograph.')+1:-4] if station is None else station # set meta data (and allow keyword expansion of name and title) metadata['problem'] = prefix metadata['station_name'] = metadata.get('long_name', station_name) if name is not None: name = name.format(**expargs) # name expansion with capitalized keyword arguments else: name = 'HGS_{:s}'.format(station_name) metadata['name'] = name; expargs['Name'] = name.title() # name in title format if title is None: title = '{{Name:s}} (HGS, {problem:s})'.format(**metadata) title = title.format(**expargs) # name expansion with capitalized keyword arguments metadata['long_name'] = metadata['title'] = title # now determine start data for date_parser if end_date is None: if start_date and run_period: end_date = start_date + run_period elif period: end_date = period[1] else: raise ArgumentError("Need to specify either 'start_date' & 'run_period' or 'period' to infer 'end_date'.") end_year,end_month,end_day = convertDate(end_date) if start_date is None: if end_date and run_period: start_date = end_date - run_period elif period: start_date = period[0] else: raise ArgumentError("Need to specify either 'end_date' & 'run_period' or 'period' to infer 'start_date'.") start_year,start_month,start_day = convertDate(start_date) if start_day != 1 or end_day != 1: raise NotImplementedError('Currently only monthly data is supported.') # import functools # date_parser = functools.partial(date_parser, year=start_year, month=start_month, day=start_day) # # now load data using pandas ascii reader # data_frame = pd.read_table(filepath, sep='\s+', header=2, dtype=np.float64, index_col=['time'], # date_parser=date_parser, names=ascii_varlist) # # resample to monthly data # data_frame = data_frame.resample(resampling).agg(np.mean) # data = data_frame[flowvar].values # parse header if varlist is None: varlist = variable_list[:] # default list with open(filepath, 'r') as f: line = f.readline(); lline = line.lower() # 1st line if not "hydrograph" in lline: raise GageStationError(line,filepath) # parse variables and determine columns line = f.readline(); lline = line.lower() # 2nd line if not "variables" in lline: raise GageStationError(line) variable_order = [v.strip('"').lower() for v in line[line.find('"'):].strip().split(',')] # figure out varlist and data columns if variable_order[0] == 'time': del variable_order[0] # only keep variables else: raise GageStationError(variable_order) variable_order = [hgs_variables[v] for v in variable_order] # replace HGS names with GeoPy names vardict = {v:i+1 for i,v in enumerate(variable_order)} # column mapping; +1 because time was removed variable_order = [v for v in variable_order if v in varlist or flow_to_flux[v] in varlist] usecols = tuple(vardict[v] for v in variable_order) # variable columns that need to loaded (except time, which is col 0) assert 0 not in usecols, usecols # load data as tab separated values data = np.genfromtxt(filepath, dtype=np.float64, delimiter=None, skip_header=3, usecols = (0,)+usecols) assert data.shape[1] == len(usecols)+1, data.shape if lskipNaN: data = data[np.isnan(data).sum(axis=1)==0,:] elif np.any( np.isnan(data) ): raise DataError("Missing values (NaN) encountered in hydrograph file; use 'lskipNaN' to ignore.\n('{:s}')".format(filepath)) time_series = data[:,0]; flow_data = data[:,1:] assert flow_data.shape == (len(time_series),len(usecols)), flow_data.shape # original time deltas in seconds time_diff = time_series.copy(); time_diff[1:] = np.diff(time_series) # time period between time steps assert np.all( time_diff > 0 ), filepath time_diff = time_diff.reshape((len(time_diff),1)) # reshape to make sure broadcasting works # integrate flow over time steps before resampling flow_data[1:,:] -= np.diff(flow_data, axis=0)/2. # get average flow between time steps flow_data *= time_diff # integrate flow in time interval by multiplying average flow with time period flow_data = np.cumsum(flow_data, axis=0) # integrate by summing up total flow per time interval # generate regular monthly time steps start_datetime = np.datetime64(dt.datetime(year=start_year, month=start_month, day=start_day), 'M') end_datetime = np.datetime64(dt.datetime(year=end_year, month=end_month, day=end_day), 'M') time_monthly = np.arange(start_datetime, end_datetime+np.timedelta64(1, 'M'), dtype='datetime64[M]') assert time_monthly[0] == start_datetime, time_monthly[0] assert time_monthly[-1] == end_datetime, time_monthly[-1] # convert monthly time series to regular array of seconds since start date time_monthly = ( time_monthly.astype('datetime64[s]') - start_datetime.astype('datetime64[s]') ) / np.timedelta64(1,'s') assert time_monthly[0] == 0, time_monthly[0] # interpolate integrated flow to new time axis #flow_data = np.interp(time_monthly, xp=time_series[:,0], fp=flow_data[:,0],).reshape((len(time_monthly),1)) time_series = np.concatenate(([0],time_series), axis=0) # integrated flow at time zero must be zero... flow_data = np.concatenate(([[0,]*len(usecols)],flow_data), axis=0) # ... this is probably better than interpolation # N.B.: we are adding zeros here so we don't have to extrapolate to the left; on the right we just fill in NaN's if ( time_monthly[-1] - time_series[-1] ) > 3*86400. and lcheckComplete: warn("Data record ends more than 3 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.)) elif (time_monthly[-1]-time_series[-1]) > 5*86400.: if lcheckComplete: raise DataError("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.)) else: warn("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.)) flow_interp = si.interp1d(x=time_series, y=flow_data, kind='linear', axis=0, copy=False, bounds_error=False, fill_value=np.NaN, assume_sorted=True) flow_data = flow_interp(time_monthly) # evaluate with call # compute monthly flow rate from interpolated integrated flow flow_data = np.diff(flow_data, axis=0) / np.diff(time_monthly, axis=0).reshape((len(time_monthly)-1,1)) flow_data *= 1000 # convert from m^3/s to kg/s # construct time axis start_time = 12*(start_year - 1979) + start_month -1 end_time = 12*(end_year - 1979) + end_month -1 time = Axis(name='time', units='month', atts=dict(long_name='Month since 1979-01'), coord=np.arange(start_time, end_time)) # not including the last, e.g. 1979-01 to 1980-01 is 12 month assert len(time_monthly) == end_time-start_time+1 assert flow_data.shape == (len(time),len(variable_order)), (flow_data.shape,len(time),len(variable_order)) # construct dataset dataset = Dataset(atts=metadata) dataset.station = station # add gage station object, if available (else None) for i,flowvar in enumerate(variable_order): data = flow_data[:,i] fluxvar = flow_to_flux[flowvar] if flowvar in varlist: flowatts = variable_attributes[flowvar] # convert variables and put into dataset (monthly time series) if flowatts['units'] != 'kg/s': raise VariableError("Hydrograph data is read as kg/s; flow variable does not match.\n{}".format(flowatts)) dataset += Variable(data=data, axes=(time,), **flowatts) if fluxvar in varlist and 'shp_area' in metadata: # compute surface flux variable based on drainage area fluxatts = variable_attributes[fluxvar] if fluxatts['units'] == 'kg/s' and fluxatts['units'] != 'kg/m^2/s': raise VariableError(fluxatts) data = data / metadata['shp_area'] # need to make a copy dataset += Variable(data=data, axes=(time,), **fluxatts) # apply analysis period if period is not None: dataset = dataset(years=period) # adjust scalefactors, if necessary if scalefactors: if isinstance(scalefactors,dict): dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None) elif isNumber(scalefactors): scalelist = ('discharge','seepage','flow') dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors) else: raise TypeError(scalefactors) # return completed dataset return dataset
def loadEnKF_StnTS(folder=None, varlist='all', varatts=None, name='enkf', title='EnKF', basin=None, start_date=None, end_date=None, sampling=None, period=None, date_range=None, llastIncl=True, WSC_station=None, basin_list=None, filenames=None, prefix=None, time_axis='datetime', scalefactors=None, metadata=None, lkgs=False, out_dir='out/', yaml_file='../input_data/obs_meta.yaml', lYAML=True, nreal=None, ntime=None, **kwargs): ''' load EnKF ensemble data as formatted GeoPy Dataset ''' out_folder = os.path.join(folder, 'out/') # default output folder if not os.path.exists(out_folder): raise IOError(out_folder) # default values if isinstance(varlist, str) and varlist == 'hydro': varlist = Hydro.varlist elif isinstance(varlist, str) and varlist == 'obs': varlist = Obs.varlist elif isinstance(varlist, str) and varlist == 'all': varlist = Hydro.varlist + Obs.varlist elif not isinstance(varlist, (tuple, list)): raise TypeError(varlist) if varatts is None: varatts = variable_attributes.copy() varmap = { varatt['name']: enkf_name for enkf_name, varatt in list(varatts.items()) } varlist = [varmap[var] for var in varlist] # load WSC station meta data pass # initialize Dataset dataset = Dataset(name=name, title=title if title else name.title(), atts=metadata) ensemble = None time = None observation = None # load observation/innovation data if any([var in Obs.atts for var in varlist]): # load data vardata = loadObs(varlist=[var for var in varlist if var in Obs.atts], folder=out_folder, lpandas=False) ntime, nobs, nreal = list(vardata.values())[0].shape # create Axes if time is None: # figure out time axis time = timeAxis(start_date=start_date, end_date=end_date, sampling=sampling, date_range=date_range, time_axis=time_axis, llastIncl=llastIncl, ntime=ntime, varatts=varatts) elif len(time) != ntime: raise AxisError(time) if ensemble is None: # construct ensemble axis ensemble = Axis(atts=varatts['ensemble'], coord=np.arange(1, nreal + 1)) elif len(ensemble) != nreal: raise AxisError(ensemble) if observation is None: # construct ensemble axis observation = Axis(atts=varatts['observation'], coord=np.arange(1, nobs + 1)) elif len(observation) != nobs: raise AxisError(observation) # create variables for varname, data in list(vardata.items()): dataset += Variable(atts=varatts[varname], data=data, axes=(time, observation, ensemble)) # load YAML data, if available if lYAML: # load YAML file yaml_path = os.path.join(out_folder, yaml_file) if not os.path.exists(yaml_path): raise IOError(yaml_path) with open(yaml_path, 'r') as yf: obs_meta = yaml.load(yf) if obs_meta is None: raise IOError(yaml_path) # not a YAML file? # constant create variables for cvar, cval in list(obs_meta[0].items()): if isinstance(cval, str): dtype, missing = np.string_, '' elif isinstance(cval, (np.integer, int)): dtype, missing = np.int_, 0 elif isinstance(cval, (np.inexact, float)): dtype, missing = np.float_, np.NaN else: dtype = None # skip if dtype: data = np.asarray([ missing if obs[cvar] is None else obs[cvar] for obs in obs_meta ], dtype=dtype) if cvar in varatts: atts = varatts[cvar] else: atts = dict(name=cvar, units='') dataset += Variable(atts=atts, data=data, axes=(observation, )) elif ntime is None: # try to infer time dimension from backup.info file backup_info = os.path.join(folder, 'backup.info') if os.path.exists(backup_info): with open(backup_info, 'r') as bf: ntime = int(bf.readline()) # load discharge/hydrograph data if 'discharge' in varlist: data = loadHydro(folder=out_folder, nreal=nreal, ntime=ntime) ntime, nreal = data.shape if time is None: # figure out time axis time = timeAxis(start_date=start_date, end_date=end_date, sampling=sampling, date_range=date_range, time_axis=time_axis, llastIncl=llastIncl, ntime=ntime, varatts=varatts) elif len(time) != ntime: raise AxisError(time) if ensemble is None: # construct ensemble axis ensemble = Axis(atts=varatts['ensemble'], coord=np.arange(1, nreal + 1)) elif len(ensemble) != nreal: raise AxisError(ensemble) atts = varatts['discharge'] if lkgs: data *= 1000. if atts['units'] == 'm^3/s': atts['units'] = 'kg/s' dataset += Variable(atts=atts, data=data, axes=(time, ensemble)) # return formatted Dataset if scalefactors is not None and scalefactors != 1: raise NotImplementedError return dataset
def loadGPCC_TS(name=dataset_name, grid=None, varlist=None, resolution='25', varatts=None, filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted dataset with the monthly GPCC time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # prepare input if resolution not in ('05', '10', '25'): raise DatasetError, "Selected resolution '%s' is not available!" % resolution # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) if filelist is None: # generate default filelist filelist = [] if 'p' in varlist: filelist.append(orig_ts_file.format('precip', resolution)) if 's' in varlist: filelist.append(orig_ts_file.format('statio', resolution)) # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, multifile=False, ncformat='NETCDF4_CLASSIC') # replace time axis with number of month since Jan 1979 data = np.arange(0, len(dataset.time), 1, dtype='int16') + ( 1901 - 1979) * 12 # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add GDAL info dataset = addGDALtoDataset(dataset, projection=None, geotransform=None) # N.B.: projection should be auto-detected as geographic else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder grid, resolution = checkGridRes(grid, resolution, period=None, lclim=False) dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
def loadCRU_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, folder=None, lautoregrid=None): ''' Get a properly formatted CRU dataset with monthly mean time-series. ''' if grid is None: # load from original time-series files if folder is None: folder = orig_ts_folder # translate varlist if varatts is None: varatts = tsvaratts.copy() if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) # assemble filelist if filelist is None: # generate default filelist filelist = [ orig_ts_file.format(var) for var in varlist if var not in nofile ] # load dataset dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, multifile=False, ncformat='NETCDF4_CLASSIC') # replace time axis with number of month since Jan 1979 data = np.arange(0, len(dataset.time), 1, dtype='int16') + ( 1901 - 1979) * 12 # month since 1979 (Jan 1979 = 0) timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01')) dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False) # add projection dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder) # N.B.: projection should be auto-detected as geographic else: # load from neatly formatted and regridded time-series files if folder is None: folder = avgfolder dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, filelist=filelist, lautoregrid=lautoregrid, mode='time-series') # return formatted dataset return dataset
## convert from XLS files to netcdf elif mode == 'convert_XLS': # imports from glob import glob from geodata.base import Dataset, Axis, Variable from geodata.netcdf import writeNetCDF # load list if well files and generate list of wells well_files = glob(os.path.join(data_folder, 'W*.xlsx')) well_files.sort() wells = [os.path.basename(name[:-5]) for name in well_files] print(wells) # dataset time_ax = Axis(coord=np.arange(12 * (period[1] - period[0])) + 252, **varatts['time']) # origin: 1979-01 well_ax = Axis(coord=np.arange(len(wells)) + 1, name='well', units='') dataset = Dataset(name=conservation_authority, title=conservation_authority + ' Observation Wells') # add meta data meta_dicts = [ loadMetadata(well, conservation_authority=conservation_authority) for well in wells ] for key in meta_dicts[0].keys(): if key in varatts: atts = varatts[key] elif key.lower() in varatts: atts = varatts[key.lower()] else: atts = dict(name=key, units='') if atts['units']: data = np.asarray([wmd[key] for wmd in meta_dicts], dtype=np.float64)
def rasterDataset(name=None, title=None, vardefs=None, axdefs=None, atts=None, projection=None, griddef=None, lgzip=None, lgdal=True, lmask=True, fillValue=None, lskipMissing=True, lgeolocator=True, file_pattern=None, lfeedback=True, **kwargs): ''' function to load a set of variables that are stored in raster format in a systematic directory tree into a Dataset Variables and Axis are defined as follows: vardefs[varname] = dict(name=string, units=string, axes=tuple of strings, atts=dict, plot=dict, dtype=np.dtype, fillValue=value) axdefs[axname] = dict(name=string, units=string, atts=dict, coord=array or list) or None The path to raster files is constructed as variable_pattern+axes_pattern, where axes_pattern is defined through the axes, (as in rasterVarialbe) and variable_pattern takes the special keywords VAR, which is the variable key in vardefs. ''' ## prepare input data and axes if griddef: xlon, ylat = griddef.xlon, griddef.ylat if projection is None: projection = griddef.projection elif projection != griddef.projection: raise ArgumentError("Conflicting projection and GridDef!") geotransform = griddef.geotransform isProjected = griddef.isProjected else: xlon = ylat = geotransform = None isProjected = False if projection is None else True # construct axes dict axes = dict() for axname, axdef in axdefs.items(): assert 'coord' in axdef, axdef assert ('name' in axdef and 'units' in axdef) or 'atts' in axdef, axdef if axdef is None: axes[axname] = None else: ax = Axis(**axdef) axes[ax.name] = ax # check for map Axis if isProjected: if 'x' not in axes: axes['x'] = xlon if 'y' not in axes: axes['y'] = ylat else: if 'lon' not in axes: axes['lon'] = xlon if 'lat' not in axes: axes['lat'] = ylat ## load raster data into Variable objects varlist = [] for varname, vardef in vardefs.items(): # check definitions assert 'axes' in vardef and 'dtype' in vardef, vardef assert ('name' in vardef and 'units' in vardef) or 'atts' in vardef, vardef # determine relevant axes vardef = vardef.copy() axes_list = [ None if ax is None else axes[ax] for ax in vardef.pop('axes') ] # define path parameters (with varname) path_params = vardef.pop('path_params', None) path_params = dict() if path_params is None else path_params.copy() if 'VAR' not in path_params: path_params['VAR'] = varname # a special key # add kwargs and relevant axis indices relaxes = [ax.name for ax in axes_list if ax is not None] # relevant axes for key, value in kwargs.items(): if key not in axes or key in relaxes: vardef[key] = value # create Variable object var = rasterVariable(projection=projection, griddef=griddef, file_pattern=file_pattern, lgzip=lgzip, lgdal=lgdal, lmask=lmask, lskipMissing=lskipMissing, axes=axes_list, path_params=path_params, lfeedback=lfeedback, **vardef) # vardef components: name, units, atts, plot, dtype, fillValue varlist.append(var) # check that map axes are correct for ax in var.xlon, var.ylat: if axes[ax.name] is None: axes[ax.name] = ax elif axes[ax.name] != ax: raise AxisError("{} axes are incompatible.".format(ax.name)) if griddef is None: griddef = var.griddef elif griddef != var.griddef: raise AxisError("GridDefs are inconsistent.") if geotransform is None: geotransform = var.geotransform elif geotransform != var.geotransform: raise AxisError( "Conflicting geotransform (from Variable) and GridDef!\n {} != {}" .format(var.geotransform, geotransform)) ## create Dataset # create dataset dataset = Dataset(name=name, title=title, varlist=varlist, axes=axes, atts=atts) # add GDAL functionality dataset = addGDALtoDataset(dataset, griddef=griddef, projection=projection, geotransform=geotransform, gridfolder=None, lwrap360=None, geolocator=lgeolocator, lforce=False) # N.B.: for some reason we also need to pass the geotransform, otherwise it is recomputed internally and some consistency # checks fail due to machine-precision differences # return GDAL-enabled Dataset return dataset
def loadGageStation(basin=None, station=None, varlist=None, varatts=None, mode='climatology', aggregation=None, filetype='monthly', folder=None, name=None, period=None, basin_list=None, lcheck=True, lexpand=True, lfill=True, lflatten=True, lkgs=True, scalefactors=None, title=None): ''' function to load hydrograph climatologies and timeseries for a given basin ''' ## resolve input if mode == 'timeseries' and aggregation: raise ArgumentError('Timeseries does not support aggregation.') # get GageStation instance station = getGageStation(basin=basin, station=station, name=name, folder=folder, river=None, basin_list=basin_list, lcheck=True) # variable attributes if varlist is None: varlist = variable_list elif not isinstance(varlist,(list,tuple)): raise TypeError varlist = list(varlist) # make copy of varlist to avoid interference if varatts is None: if aggregation is None: varatts = variable_attributes_kgs if lkgs else variable_attributes_mms else: varatts = agg_varatts_kgs if lkgs else agg_varatts_mms elif not isinstance(varatts,dict): raise TypeError ## read csv data # time series data and time coordinates lexpand = True; lfill = True if mode == 'climatology': lexpand = False; lfill = False; lflatten = False data, time = station.getTimeseriesData(units='kg/s' if lkgs else 'm^3/s', lcheck=True, lexpand=lexpand, lfill=lfill, period=period, lflatten=lflatten) # station meta data metadata = station.getMetaData(lcheck=True) den = metadata['shp_area'] if lkgs else ( metadata['shp_area'] / 1000. ) ## create dataset for station dataset = Dataset(name='WSC', title=title or metadata['Station Name'], varlist=[], atts=metadata,) if mode.lower() in ('timeseries','time-series'): time = time.flatten(); data = data.flatten() # just to make sure... # make time axis based on time coordinate from csv file timeAxis = Axis(name='time', units='month', coord=time, # time series centered at 1979-01 atts=dict(long_name='Month since 1979-01')) dataset += timeAxis # load mean discharge dataset += Variable(axes=[timeAxis], data=data, atts=varatts['discharge']) # load mean runoff doa = data / den dataset += Variable(axes=[timeAxis], data=doa, atts=varatts['runoff']) elif mode == 'climatology': # N.B.: this is primarily for backwards compatibility; it should not be used anymore... # make common time axis for climatology te = 12 # length of time axis: 12 month climAxis = Axis(name='time', units='month', length=12, coord=np.arange(1,te+1,1)) # monthly climatology dataset.addAxis(climAxis, copy=False) # extract variables (min/max/mean are separate variables) # N.B.: this is mainly for backwards compatibility doa = data / den if aggregation is None or aggregation.lower() == 'mean': # load mean discharge tmpdata = nf.nanmean(data, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discharge']) dataset.addVariable(tmpvar, copy=False) # load mean runoff tmpdata = nf.nanmean(doa, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['runoff']) dataset.addVariable(tmpvar, copy=False) if aggregation is None or aggregation.lower() == 'std': # load discharge standard deviation tmpdata = nf.nanstd(data, axis=0, ddof=1) # very few values means large uncertainty! tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discstd']) dataset.addVariable(tmpvar, copy=False) # load runoff standard deviation tmpdata = nf.nanstd(doa, axis=0, ddof=1) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_std']) dataset.addVariable(tmpvar, copy=False) if aggregation is None or aggregation.lower() == 'sem': # load discharge standard deviation tmpdata = nf.nansem(data, axis=0, ddof=1) # very few values means large uncertainty! tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discsem']) dataset.addVariable(tmpvar, copy=False) # load runoff standard deviation tmpdata = nf.nansem(doa, axis=0, ddof=1) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_sem']) dataset.addVariable(tmpvar, copy=False) if aggregation is None or aggregation.lower() == 'max': # load maximum discharge tmpdata = nf.nanmax(data, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discmax']) dataset.addVariable(tmpvar, copy=False) # load maximum runoff tmpdata = nf.nanmax(doa, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_max']) dataset.addVariable(tmpvar, copy=False) if aggregation is None or aggregation.lower() == 'min': # load minimum discharge tmpdata = nf.nanmin(data, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discmin']) dataset.addVariable(tmpvar, copy=False) # load minimum runoff tmpdata = nf.nanmin(doa, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_min']) dataset.addVariable(tmpvar, copy=False) else: raise NotImplementedError, "Time axis mode '{}' is not supported.".format(mode) # adjust scalefactors, if necessary if scalefactors: if isinstance(scalefactors,dict): dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None) elif isNumber(scalefactors): scalelist = ('discharge','StdDisc','SEMDisc','MaxDisc','MinDisc',) dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors) else: raise TypeError(scalefactors) # return station dataset return dataset