def get_sst_filename(config): base_filename = config['sst_filename'] sst_time = get_sst_time(config) sst_filename = sub_date(base_filename, init_time=sst_time) return sst_filename
def main(): # merge command-line and file-specified arguments config = conf.config(__doc__, sys.argv[1:]) logger = loghelper.create(LOGGER, log_level=config.get('log.level'), log_fmt=config.get('log.format')) if config.get('log.file'): log_file = config['log.file'] logger.addHandler( loghelper.file_handler(log_file, config['log.level'], config['log.format'])) logger.debug('now logging to file') dry_run = config.get('dry-run') # either the start time is exactly specified, or else we calculate it if config.get('start'): init_time = config['start'] else: init_time = shared.get_time(base_time=config.get('base-time'), delay=config.get('delay'), round=config.get('cycles')) if config.get('end'): end_init = config['end'] init_interval = config['init_interval'] init_times = list( rrule.rrule(freq=rrule.HOURLY, interval=init_interval, dtstart=init_time, until=end_init)) else: init_times = [init_time] for init_time in init_times: # one-argument function to do initial-time substitution in strings expand = lambda s: substitute.sub_date(s, init_time=init_time) if type( s) == type("") else s # dictionary of replacements e.g. %iY : 2015 date_replacements = substitute.date_replacements(init_time=init_time) source = expand(config['source']) target = expand(config['target']) assert (_are_compatible(source, target)) _recursive_replace(source, target, date_replacements)
def get_bdy_filenames(grb_fmt, bdy_times): """ Creates a list of boundary conditions filenames based on the information in config. In general this will be called once per forecast, so there is only one init_time. Arguments: config -- dictionary containing various configuration options """ logger = get_logger() logger.debug('*** GENERATING BOUNDARY CONDITION FILENAMES ***') filelist = [sub_date(grb_fmt, init_time=bdy_times[0], valid_time=b) for b in bdy_times] logger.debug(filelist) return filelist
def main(): # merge command-line and file-specified arguments config = conf.config(__doc__, sys.argv[1:]) logger = loghelper.create(LOGGER, log_level=config.get('log.level'), log_fmt=config.get('log.format')) if config.get('log.file'): log_file = config['log.file'] logger.addHandler(loghelper.file_handler(log_file, config['log.level'], config['log.format'])) logger.debug('now logging to file') dry_run = config.get('dry-run') # either the start time is exactly specified, or else we calculate it if config.get('start'): init_time = config['start'] else: init_time = shared.get_time(base_time=config.get('base-time'), delay=config.get('delay'), round=config.get('cycles')) if config.get('end'): end_init = config['end'] init_interval = config['init_interval'] init_times = list(rrule.rrule(freq=rrule.HOURLY, interval=init_interval, dtstart=init_time, until=end_init)) else: init_times = [init_time] for init_time in init_times: # one-argument function to do initial-time substitution in strings expand = lambda s : substitute.sub_date(s, init_time=init_time) if type(s)==type("") else s # dictionary of replacements e.g. %iY : 2015 date_replacements = substitute.date_replacements(init_time=init_time) source = expand(config['source']) target = expand(config['target']) assert(_are_compatible(source,target)) _recursive_replace(source, target, date_replacements)
def ncdump(config): logger = loghelper.get(LOGGER) # _listify ensures arguments are enclosed within a list # to simplify treatement in following code files = nctools._listify(config['<files>']) vars = nctools._listify(config.get('vars')) global_atts = nctools._listify(config.get('global-atts')) var_atts = nctools._listify(config.get('var-atts')) coord_vars = nctools._listify(config.get('coords')) sort_by = nctools._listify(config.get('sort-by')) order_by = nctools._listify(config.get('order-by')) out = config.get('out') pivot = config.get('pivot') valid_time = config.get('valid-time') format = config.get('format') filter = config.get('filter') split_by = config.get('split-by') concat = config.get('concat') start = config.get('start') delay = config.get('delay') cycles = nctools._listify(config.get('cycles')) basetime = start if start else datetime.datetime.today() prior = _prior_time(basetime, delay=delay, hours=cycles) logger.debug("using %s as a start time" % prior) if files == []: logger.info("no files specified, finding using options") file_pattern = config.get('file-pattern') if not file_pattern: raise nctools.ConfigError( 'either supply files or specify file-pattern') expanded = substitute.sub_date(file_pattern, init_time=prior) files = glob.glob(expanded) if files == []: raise IOError("no files found") frame = nctools.melt(files, vars, global_atts, var_atts, coord_vars, missing=MISSING_ATTS) if valid_time: logger.debug("adding valid time into frame") frame['valid_time'] = frame[ 'reftime'] + frame['leadtime'] * datetime.timedelta(0, 60 * 60) if filter: frame = nctools.filter(frame, filter) if concat: nctools.concat(frame, concat, name='variable', inplace=True) if pivot: frame = pd.pivot_table(frame, index=['reftime', 'leadtime', 'location'], columns='variable', values='value') frame.reset_index(inplace=True) if sort_by: frame.sort(sort_by, inplace=True) if order_by: frame = frame[order_by] if out: out = substitute.sub_date(out, init_time=prior) if split_by: gb = frame.groupby(split_by) for key, group in gb: if out: new_name = _merge_name(out, key) save(gb.get_group(key), new_name, config['format'], float_format=config.get('float-format')) else: print gb.get_group(key).to_string() print '\n\n\n' elif out: save(frame, out, config['format'], float_format=config.get('float-format')) else: print frame.to_string()
def power(config): """Reads 'time series' from netcdf time series file, and adds power as a variable. """ if __name__ == "__main__": logger = loghelper.create_logger(config) else: logger = loghelper.get_logger(config['log.name']) # listify ensures they are returned as a list, even if it is one file files = shared._listify(config['<files>']) # Number of samples to use should be in here # Whether to normalise power should be in here start = config.get('start') delay = config.get('delay') cycles = shared._listify(config.get('cycles')) pnorm = config.get('pnorm') pdist = config.get('pdist') sstd = config.get('sstd') dstd = config.get('dstd') pquants = config.get('pquants') quantiles = np.array(pquants) pcurve_dir = config.get('pcurve-dir') ts_dir = config.get('tseries-dir') out = config.get('out') metadata = config.get('metadata') basetime = start if start else datetime.datetime.today() prior = shared._prior_time(basetime, delay=delay, hours=cycles) logger.debug("using %s as a start time" % prior) if not files: logger.debug("no files specified, finding using options") file_pattern = config.get('file-pattern') if not file_pattern: raise ConfigError('either supply files or specify file-pattern') expanded = substitute.sub_date(file_pattern, init_time=prior) files = glob.glob(expanded) print("hello") logger.debug(files) # if we get to this point and there are still no files, then we have a problem if not files: raise IOError("no files found") logger.debug("input files: ") logger.debug(files) for f in files: logger.debug("\t%s" % f) # if pdist if pdist: n=pdist #grid_id = config['grid_id'] out_pattern = config.get('out') for tseries_file in files: dataset_in = Dataset(tseries_file, 'a') # Get dimensions dims = dataset_in.dimensions nreftime = len(dims['reftime']) ntime = len(dims['leadtime']) nloc = len(dims['location']) nheight = len(dims['height']) loc_str_len = len(dims['loc_str_length']) # Get coordinate variables reftime = dataset_in.variables['reftime'] leadtime = dataset_in.variables['leadtime'] validtime = nctools._valid_time(reftime, leadtime) refdt = num2date(reftime[:], reftime.units) power_file = substitute.sub_date(out, init_time=refdt[0]) logger.info('Estimating power from time series: %s ' % tseries_file) logger.info('Writing power time series to: %s ' % power_file) location = [''.join(l.filled(' ')).strip() for l in dataset_in.variables['location']] height = dataset_in.variables['height'] if power_file == tseries_file: dataset_out = dataset_in else: dataset_out = Dataset(power_file, 'w') # Get number of quantiles nq = len(quantiles) pdata = np.ma.zeros((ntime,nloc,nheight,nq+1), np.float) # mean will be 1st value use_locs = [] # loop through locations and look for power-curve file for l,loc in enumerate(location): pcurve_file = '%s/%s.csv' %(pcurve_dir, loc) # mask power data if no power curve found for this park if not os.path.exists(pcurve_file): #logger.debug("Power curve: %s not found, skipping" % pcurve_file) pdata[:,l,:,:] = np.ma.masked continue logger.info('Predicting power output for %s' % loc ) # # Open power curve # use_locs.append(l) pcurve = from_file(pcurve_file) for h in range(nheight): speed = dataset_in.variables['SPEED'][0,:,l,h] direction = dataset_in.variables['DIRECTION'][0,:,l,h] #pwr = pcurve.power(speed,direction) # pdist will create a distribution for each timetep based on sampling # n times from a normal distribution. pdist = pcurve.power_dist(speed, direction, sstd=sstd,dstd=dstd,n=n, normalise=pnorm) pmean = np.mean(pdist, axis=1) pquants = scipy.stats.mstats.mquantiles(pdist, prob=quantiles/100.0,axis=1, alphap=0.5, betap=0.5) pdata[:,l,h,0] = pmean pdata[:,l,h,1:] = pquants[:,:] #logger.info('finished %s' % loc) use_inds = np.array(use_locs) if dataset_out != dataset_in: dataset_out.createDimension('reftime', None) dataset_out.createVariable('reftime', 'float', ('reftime',)) dataset_out.variables['reftime'][:] = reftime[:] dataset_out.variables['reftime'].units = reftime.units dataset_out.variables['reftime'].calendar = reftime.calendar dataset_out.variables['reftime'].long_name = reftime.long_name dataset_out.variables['reftime'].standard_name = reftime.standard_name dataset_out.createDimension('leadtime', len(leadtime)) dataset_out.createVariable('leadtime', 'int', ('leadtime',)) dataset_out.variables['leadtime'][:] = leadtime[:] dataset_out.variables['leadtime'].units = leadtime.units dataset_out.variables['leadtime'].long_name = leadtime.long_name dataset_out.variables['leadtime'].standard_name = leadtime.standard_name dataset_out.createDimension('location', len(use_locs)) dataset_out.createDimension('loc_str_length', loc_str_len) loc_data =np.array([list(l.ljust(loc_str_len, ' ')) for l in location]) dataset_out.createVariable('location', 'c', ('location', 'loc_str_length')) dataset_out.variables['location'][:] = loc_data[use_inds,:] dataset_out.createDimension('height', nheight) dataset_out.createVariable('height', 'i', ('height',)) dataset_out.variables['height'][:] = height[:] dataset_out.GRID_ID = dataset_in.GRID_ID dataset_out.DX = dataset_in.DX dataset_out.DY = dataset_in.DY try: dataset_out.variables['height'].units = height.units except Exception: logger.warn("height units missing") pdata = pdata[:, use_inds, :, :] for key in metadata.keys(): key = key.upper() dataset_out.setncattr(key,dataset_in.getncattr(key)) pavg = dataset_out.createVariable('POWER','f',('reftime','leadtime','location','height')) pavg.units = 'kW' pavg.description = 'forecast power output' pavg[0,:,:,:] = pdata[:,:,:,0] for q, qval in enumerate(quantiles): varname = 'POWER.P%02d' % qval var = dataset_out.createVariable(varname,'f',('reftime','leadtime','location','height')) if pnorm: var.units = 'ratio' else: var.units = 'kW' var.description = 'forecast power output' var[0,:,:,:] = pdata[:,:,:,q+1] #logger.debug(dataset_out) dataset_in.close() if dataset_out!=dataset_in: dataset_out.close()
def ncdump(config): logger = loghelper.get(LOGGER) # _listify ensures arguments are enclosed within a list # to simplify treatement in following code files = nctools._listify(config['<files>']) vars = nctools._listify(config.get('vars')) global_atts = nctools._listify(config.get('global-atts')) var_atts = nctools._listify(config.get('var-atts')) coord_vars = nctools._listify(config.get('coords')) sort_by = nctools._listify(config.get('sort-by')) order_by = nctools._listify(config.get('order-by')) out = config.get('out') pivot = config.get('pivot') valid_time = config.get('valid-time') format = config.get('format') filter = config.get('filter') split_by = config.get('split-by') concat = config.get('concat') start = config.get('start') delay = config.get('delay') cycles = nctools._listify(config.get('cycles')) basetime = start if start else datetime.datetime.today() prior = _prior_time(basetime, delay=delay, hours=cycles) logger.debug("using %s as a start time" % prior) if files==[]: logger.info("no files specified, finding using options") file_pattern = config.get('file-pattern') if not file_pattern: raise nctools.ConfigError('either supply files or specify file-pattern') expanded = substitute.sub_date(file_pattern, init_time=prior) files = glob.glob(expanded) if files==[]: raise IOError("no files found") frame = nctools.melt(files, vars, global_atts, var_atts, coord_vars, missing=MISSING_ATTS) if valid_time: logger.debug("adding valid time into frame") frame['valid_time'] = frame['reftime'] + frame['leadtime']*datetime.timedelta(0,60*60) if filter: frame = nctools.filter(frame, filter) if concat: nctools.concat(frame, concat, name='variable', inplace=True) if pivot: frame = pd.pivot_table(frame, index=['reftime','leadtime','location'], columns='variable', values='value') frame.reset_index(inplace=True) if sort_by: frame.sort(sort_by, inplace=True) if order_by: frame = frame[order_by] if out: out = substitute.sub_date(out, init_time=prior) if split_by: gb = frame.groupby(split_by) for key,group in gb: if out: new_name = _merge_name(out,key) save(gb.get_group(key), new_name, config['format'], float_format=config.get('float-format')) else: print gb.get_group(key).to_string() print '\n\n\n' elif out: save(frame, out, config['format'], float_format=config.get('float-format')) else: print frame.to_string()
def power(config): """Reads 'time series' from netcdf time series file, and adds power as a variable. """ if __name__ == "__main__": logger = loghelper.create_logger(config) else: logger = loghelper.get_logger(config['log.name']) # listify ensures they are returned as a list, even if it is one file files = shared._listify(config['<files>']) # Number of samples to use should be in here # Whether to normalise power should be in here start = config.get('start') delay = config.get('delay') cycles = shared._listify(config.get('cycles')) pnorm = config.get('pnorm') pdist = config.get('pdist') sstd = config.get('sstd') dstd = config.get('dstd') pquants = config.get('pquants') quantiles = np.array(pquants) pcurve_dir = config.get('pcurve-dir') ts_dir = config.get('tseries-dir') out = config.get('out') metadata = config.get('metadata') basetime = start if start else datetime.datetime.today() prior = shared._prior_time(basetime, delay=delay, hours=cycles) logger.debug("using %s as a start time" % prior) if not files: logger.debug("no files specified, finding using options") file_pattern = config.get('file-pattern') if not file_pattern: raise ConfigError('either supply files or specify file-pattern') expanded = substitute.sub_date(file_pattern, init_time=prior) files = glob.glob(expanded) print("hello") logger.debug(files) # if we get to this point and there are still no files, then we have a problem if not files: raise IOError("no files found") logger.debug("input files: ") logger.debug(files) for f in files: logger.debug("\t%s" % f) # if pdist if pdist: n = pdist #grid_id = config['grid_id'] out_pattern = config.get('out') for tseries_file in files: dataset_in = Dataset(tseries_file, 'a') # Get dimensions dims = dataset_in.dimensions nreftime = len(dims['reftime']) ntime = len(dims['leadtime']) nloc = len(dims['location']) nheight = len(dims['height']) loc_str_len = len(dims['loc_str_length']) # Get coordinate variables reftime = dataset_in.variables['reftime'] leadtime = dataset_in.variables['leadtime'] validtime = nctools._valid_time(reftime, leadtime) refdt = num2date(reftime[:], reftime.units) power_file = substitute.sub_date(out, init_time=refdt[0]) logger.info('Estimating power from time series: %s ' % tseries_file) logger.info('Writing power time series to: %s ' % power_file) location = [ ''.join(l.filled(' ')).strip() for l in dataset_in.variables['location'] ] height = dataset_in.variables['height'] if power_file == tseries_file: dataset_out = dataset_in else: dataset_out = Dataset(power_file, 'w') # Get number of quantiles nq = len(quantiles) pdata = np.ma.zeros((ntime, nloc, nheight, nq + 1), np.float) # mean will be 1st value use_locs = [] # loop through locations and look for power-curve file for l, loc in enumerate(location): pcurve_file = '%s/%s.csv' % (pcurve_dir, loc) # mask power data if no power curve found for this park if not os.path.exists(pcurve_file): #logger.debug("Power curve: %s not found, skipping" % pcurve_file) pdata[:, l, :, :] = np.ma.masked continue logger.info('Predicting power output for %s' % loc) # # Open power curve # use_locs.append(l) pcurve = from_file(pcurve_file) for h in range(nheight): speed = dataset_in.variables['SPEED'][0, :, l, h] direction = dataset_in.variables['DIRECTION'][0, :, l, h] #pwr = pcurve.power(speed,direction) # pdist will create a distribution for each timetep based on sampling # n times from a normal distribution. pdist = pcurve.power_dist(speed, direction, sstd=sstd, dstd=dstd, n=n, normalise=pnorm) pmean = np.mean(pdist, axis=1) pquants = scipy.stats.mstats.mquantiles(pdist, prob=quantiles / 100.0, axis=1, alphap=0.5, betap=0.5) pdata[:, l, h, 0] = pmean pdata[:, l, h, 1:] = pquants[:, :] #logger.info('finished %s' % loc) use_inds = np.array(use_locs) if dataset_out != dataset_in: dataset_out.createDimension('reftime', None) dataset_out.createVariable('reftime', 'float', ('reftime', )) dataset_out.variables['reftime'][:] = reftime[:] dataset_out.variables['reftime'].units = reftime.units dataset_out.variables['reftime'].calendar = reftime.calendar dataset_out.variables['reftime'].long_name = reftime.long_name dataset_out.variables[ 'reftime'].standard_name = reftime.standard_name dataset_out.createDimension('leadtime', len(leadtime)) dataset_out.createVariable('leadtime', 'int', ('leadtime', )) dataset_out.variables['leadtime'][:] = leadtime[:] dataset_out.variables['leadtime'].units = leadtime.units dataset_out.variables['leadtime'].long_name = leadtime.long_name dataset_out.variables[ 'leadtime'].standard_name = leadtime.standard_name dataset_out.createDimension('location', len(use_locs)) dataset_out.createDimension('loc_str_length', loc_str_len) loc_data = np.array( [list(l.ljust(loc_str_len, ' ')) for l in location]) dataset_out.createVariable('location', 'c', ('location', 'loc_str_length')) dataset_out.variables['location'][:] = loc_data[use_inds, :] dataset_out.createDimension('height', nheight) dataset_out.createVariable('height', 'i', ('height', )) dataset_out.variables['height'][:] = height[:] dataset_out.GRID_ID = dataset_in.GRID_ID dataset_out.DX = dataset_in.DX dataset_out.DY = dataset_in.DY try: dataset_out.variables['height'].units = height.units except Exception: logger.warn("height units missing") pdata = pdata[:, use_inds, :, :] for key in metadata.keys(): key = key.upper() dataset_out.setncattr(key, dataset_in.getncattr(key)) pavg = dataset_out.createVariable( 'POWER', 'f', ('reftime', 'leadtime', 'location', 'height')) pavg.units = 'kW' pavg.description = 'forecast power output' pavg[0, :, :, :] = pdata[:, :, :, 0] for q, qval in enumerate(quantiles): varname = 'POWER.P%02d' % qval var = dataset_out.createVariable( varname, 'f', ('reftime', 'leadtime', 'location', 'height')) if pnorm: var.units = 'ratio' else: var.units = 'kW' var.description = 'forecast power output' var[0, :, :, :] = pdata[:, :, :, q + 1] #logger.debug(dataset_out) dataset_in.close() if dataset_out != dataset_in: dataset_out.close()