def _pre_flight_table(table): """Check to ensure that the batch config csv table is valid. Parameters ---------- table : pd.dataframe Extracted batch config csv. Must have "job" index (1st column) and "set_tag" and "files" columns. """ if table.index.name != 'job': msg = 'Batch CSV config must have "job" as the first column.' logger.error(msg) raise ConfigError(msg) if 'set_tag' not in table or 'files' not in table: msg = 'Batch CSV config must have "set_tag" and "files" columns' logger.error(msg) raise ConfigError(msg) if (len(table.set_tag.unique()) != len(table) or len(table.index.unique()) != len(table)): msg = ('Batch CSV config must have completely ' 'unique "set_tag" and "job" columns') logger.error(msg) raise ConfigError(msg)
def source_files(self): """ Returns ------- source_files : list list of source files to collect from """ if self._source_files is not None: if isinstance(self._source_files, (list, tuple)): source_files = self._source_files elif self._source_files == "PIPELINE": source_files = Pipeline.parse_previous(self._dirout, 'multi-year', target='fpath') else: raise ConfigError("source_files must be a list, tuple, " "or 'PIPELINE'") else: if self._source_dir and self._source_prefix: source_files = [] for file in os.listdir(self._source_dir): if (file.startswith(self._source_prefix) and file.endswith('.h5') and '_node' not in file): source_files.append(os.path.join(self._source_dir, file)) else: raise ConfigError("source_files or both source_dir and " "source_prefix must be provided") if not any(source_files): raise FileNotFoundError('Could not find any source files for ' 'multi-year collection group: "{}"' .format(self.name)) return source_files
def _check_pipeline(self): """Check the pipeline config file in the batch config.""" if 'pipeline_config' not in self: raise ConfigError('Batch config needs "pipeline_config" arg!') if not os.path.exists(self['pipeline_config']): raise ConfigError('Could not find the pipeline config file: {}' .format(self['pipeline_config']))
def _check_points_config_mapping(self): """ Check to ensure the project points (df) and SAM configs (sam_config_obj) are compatible. Update as necessary or break """ # Extract unique config refences from project_points DataFrame df_configs = self.df['config'].unique() sam_configs = self.sam_files # Checks to make sure that the same number of SAM config .json files # as references in project_points DataFrame if len(df_configs) > len(sam_configs): msg = ('Points references {} configs while only ' '{} SAM configs were provided!'.format( len(df_configs), len(sam_configs))) logger.error(msg) raise ConfigError(msg) # If project_points DataFrame was created from a list, # config will be None and needs to be added to _df from sam_configs if len(df_configs) == 1: if df_configs[0] is None: self._df['config'] = list(sam_configs.values())[0] df_configs = self.df['config'].unique() # Check to see if config references in project_points DataFrame # are valid file paths, if compare with SAM configs # and update as needed configs = {} for config in df_configs: if os.path.isfile(config): configs[config] = config elif config in sam_configs: configs[config] = sam_configs[config] else: msg = ('{} does not map to a valid configuration file'.format( config)) logger.error(msg) raise ConfigError(msg) # If configs has any keys that are not in sam_configs then # something really weird happened so raise an error. if any(set(configs) - set(sam_configs)): msg = ( 'A wild config has appeared! Requested config keys for ' 'ProjectPoints are {} and previous config keys are {}'.format( list(configs.keys()), list(sam_configs.keys()))) logger.error(msg) raise ConfigError(msg)
def parse_res_files(self): """Get a list of the resource files with years filled in. Returns ------- res_files : list List of config-specified resource files. Resource files with {} formatting will be filled with the specified year(s). This return value is a list with len=1 for a single year run. """ if self._res_files is None: # get base filename, may have {} for year format fname = self.resource_file if '{}' in fname: # need to make list of res files for each year self._res_files = [fname.format(year) for year in self.years] else: # only one resource file request, still put in list self._res_files = [fname] if len(self._res_files) != len(self.years): raise ConfigError('The number of resource files does not match ' 'the number of analysis years!' '\n\tResource files: \n\t\t{}' '\n\tYears: \n\t\t{}'.format( self._res_files, self.years)) return self._res_files
def make_fout(name, year): """Make an appropriate file output from name and year. Parameters ---------- name : str Job name. year : int | str Analysis year. Returns ------- fout : str .h5 output file based on name and year """ try: match = parse_year(name) except RuntimeError: match = False # if the year isn't in the name, add it before setting the file output if match and year: if str(year) != str(match): raise ConfigError( 'Tried to submit gen job for {}, but found a ' 'different year in the base job name: "{}". ' 'Please remove the year from the job name.'.format(year, name)) if year: fout = '{}{}.h5'.format(name, '_{}'.format(year) if not match else '') else: fout = '{}.h5'.format(name) return fout
def execution_control(self): """Get the execution control object. Returns ------- _ec : BaseExecutionConfig | EagleConfig reV execution config object specific to the execution_control option. """ if self._ec is None: ec = self['execution_control'] # static map of avail execution options with corresponding classes ec_config_types = { 'local': BaseExecutionConfig, 'slurm': SlurmConfig, 'eagle': SlurmConfig, } if 'option' in ec: try: # Try setting the attribute to the appropriate exec option self._ec = ec_config_types[ec['option'].lower()](ec) except KeyError: # Option not found raise ConfigError('Execution control option not ' 'recognized: "{}". ' 'Available options are: {}.'.format( ec['option'].lower(), list(ec_config_types.keys()))) else: # option not specified, default to a base execution (local) warn('Execution control option not specified. ' 'Defaulting to a local run.') self._ec = BaseExecutionConfig(ec) return self._ec
def _check_pipeline(self): """Check pipeline steps input. ConfigError if bad input.""" if 'pipeline' not in self: raise ConfigError('Could not find required key "pipeline" in the ' 'pipeline config.') if not isinstance(self.pipeline_steps, list): raise ConfigError('Config arg "pipeline" must be a list of ' '(command, f_config) pairs, but received "{}".' .format(type(self.pipeline_steps))) for di in self.pipeline_steps: for f_config in di.values(): if not os.path.exists(f_config): raise ConfigError('Pipeline step depends on non-existent ' 'file: {}'.format(f_config))
def _sc_preflight(self): """Perform pre-flight checks on the SC config inputs""" missing = [] for req in self.REQUIREMENTS: if self.get(req, None) is None: missing.append(req) if any(missing): raise ConfigError('Supply Curve config missing the following ' 'keys: {}'.format(missing))
def _preflight(self): """Run a preflight check on the config.""" if 'project_control' in self: msg = ('config "project_control" block is no ' 'longer used. All project control keys should be placed at ' 'the top config level.') logger.error(msg) raise ConfigError(msg) missing = [] for req in self.REQUIREMENTS: if req not in self: missing.append(req) if any(missing): e = ('{} missing the following keys: {}'.format( self.__class__.__name__, missing)) logger.error(e) raise ConfigError(e)
def _sc_agg_preflight(self): """Perform pre-flight checks on the SC agg config inputs""" with h5py.File(self.excl_fpath, mode='r') as f: dsets = list(f) if self.tm_dset not in dsets and self.res_fpath is None: raise ConfigError('Techmap dataset "{}" not found in exclusions ' 'file, resource file input "res_fpath" is ' 'required to create the techmap file.' .format(self.tm_dset))
def _pre_flight_fp(config): """Check to see that a valid config filepath was input Parameters ---------- config : str File path to config csv (str). """ if not isinstance(config, str): msg = ('Batch config can only take a str filepath, ' 'but received a "{}".'.format(type(config))) logger.error(msg) raise ConfigError(msg) if not config.endswith('.csv'): msg = ('BatchCsv config needs a csv filepath but received: {}' .format(config)) logger.error(msg) raise ConfigError(msg)
def _check_keys(self): """ Check on config keys to ensure they match available properties """ for key, value in self.items(): if isinstance(value, str) and key not in self._keys: msg = ('{} is not a valid config entry for {}! Must be one of:' '\n{}'.format(key, self.__class__.__name__, self._keys)) logger.error(msg) raise ConfigError(msg)
def parse_cf_files(self): """Get the capacity factor files (reV generation output data). Returns ------- cf_files : list Target paths for capacity factor files (reV generation output data) for input to reV LCOE calculation. """ if self._cf_files is None: # get base filename, may have {} for year format fname = self.cf_file if '{}' in fname: # need to make list of res files for each year self._cf_files = [fname.format(year) for year in self.years] elif 'PIPELINE' in fname: self._cf_files = Pipeline.parse_previous(super().dirout, 'econ', target='fpath') else: # only one resource file request, still put in list self._cf_files = [fname] self.check_files(self._cf_files) # check year/cf_file matching if not a pipeline input if 'PIPELINE' not in fname: if len(self._cf_files) != len(self.years): raise ConfigError('The number of cf files does not match ' 'the number of analysis years!' '\n\tCF files: \n\t\t{}' '\n\tYears: \n\t\t{}'.format( self._cf_files, self.years)) for year in self.years: if str(year) not in str(self._cf_files): raise ConfigError('Could not find year {} in cf ' 'files: {}'.format( year, self._cf_files)) return self._cf_files
def status_file(self): """Get status file path. Returns ------- _status_file : str reV status file path. """ if self._dirout is None: raise ConfigError('Pipeline has not yet been initialized.') return os.path.join(self._dirout, '{}_status.json'.format(self.name))
def _analysis_config_preflight(self): """Check for required config blocks""" if 'directories' not in self: w = ('reV config does not have "directories" block, ' 'default directories being used.') logger.warning(w) warn(w, ConfigWarning) if 'execution_control' not in self: e = 'reV config must have "execution_control" block!' logger.error(e) raise ConfigError(e)
def __init__(self, config): """ Parameters ---------- config : str File path to config json (str). """ if not isinstance(config, str): raise ConfigError('Batch config can only take a str filepath, ' 'but received a "{}".'.format(type(config))) super().__init__(config) self._pre_flight()
def _pre_flight_fp(config): """Check to see that a valid config filepath was input Parameters ---------- config : str File path to config json or csv (str). """ if not isinstance(config, str): msg = ('Batch config can only take a str filepath, ' 'but received a "{}".'.format(type(config))) logger.error(msg) raise ConfigError(msg) if not config.endswith('.json') and not config.endswith('.csv'): msg = ('Batch config needs to be .json or .csv but received: {}' .format(config)) logger.error(msg) raise ConfigError(msg) if not os.path.exists(config): msg = 'Batch config does not exist: {}'.format(config) logger.error(msg) raise FileNotFoundError(msg)
def _parse_dirout(self): """Parse pipeline steps for common dirout and unique job names.""" dirouts = [] names = [] for di in self.pipeline_steps: for f_config in di.values(): config = AnalysisConfig(f_config, check_keys=False) dirouts.append(config.dirout) if 'name' in config: names.append(config.name) if len(set(dirouts)) != 1: raise ConfigError('Pipeline steps must have a common output ' 'directory but received {} different ' 'directories.'.format(len(set(dirouts)))) else: self._dirout = dirouts[0] if len(set(names)) != len(names): raise ConfigError('Pipeline steps must have a unique job names ' 'directory but received {} duplicate names.' .format(len(names) - len(set(names))))
def check_overwrite_keys(self, primary_key, *overwrite_keys): """ Check for overwrite keys and raise a ConfigError if present Parameters ---------- primary_key : str Primary key that overwrites overwrite_keys, used for error message overwrite_keys : str Key(s) to overwrite """ overwrite = [] for key in overwrite_keys: if key in self: overwrite.append(key) if overwrite: msg = ('A value for "{}" was provided which overwrites the ' ' following key: "{}", please remove them from the config'. format(primary_key, ', '.join(overwrite))) logger.error(msg) raise ConfigError(msg)
def index(self, gid): """Get the index location (iloc not loc) for a resource gid found in the project points. Parameters ---------- gid : int Resource GID found in the project points gid column. Returns ------- ind : int Row index of gid in the project points dataframe. """ if gid not in self._df['gid'].values: e = ('Requested resource gid {} is not present in the project ' 'points dataframe. Cannot return row index.'.format(gid)) logger.error(e) raise ConfigError(e) ind = np.where(self._df['gid'] == gid)[0][0] return ind
def _parse_sam_config(sam_config): """ Create SAM files dictionary. Parameters ---------- sam_config : dict | str | list | SAMConfig SAM input configuration ID(s) and file path(s). Keys are the SAM config ID(s), top level value is the SAM path. Can also be a single config file str. If it's a list, it is mapped to the sorted list of unique configs requested by points csv. Can also be a pre loaded SAMConfig object. Returns ------- _sam_config_obj : reV.config.sam_config.SAMConfig SAM configuration object. """ if isinstance(sam_config, SAMConfig): return sam_config else: if isinstance(sam_config, dict): config_dict = sam_config elif isinstance(sam_config, str): config_dict = {sam_config: sam_config} else: raise ValueError('Cannot parse SAM configs from {}'.format( type(sam_config))) for key, value in config_dict.items(): if not os.path.isfile(value): raise ConfigError('Invalid SAM config {}: {} does not ' 'exist'.format(key, value)) return SAMConfig(config_dict)
def get_file(fname): """Read the config file. Parameters ---------- fname : str Full path + filename. Must be a .json file. Returns ------- config : dict Config data. """ logger.debug('Getting "{}"'.format(fname)) if os.path.exists(fname) and fname.endswith('.json'): config = safe_json_load(fname) elif os.path.exists(fname) is False: raise FileNotFoundError( 'Configuration file does not exist: "{}"'.format(fname)) else: raise ConfigError( 'Unknown error getting configuration file: "{}"'.format(fname)) return config
def _check_sets(self): """Check the batch sets for required inputs and valid files.""" if 'sets' not in self: raise ConfigError('Batch config needs "sets" arg!') if not isinstance(self['sets'], list): raise ConfigError('Batch config needs "sets" arg to be a list!') for s in self['sets']: if not isinstance(s, dict): raise ConfigError('Batch sets must be dictionaries.') if 'args' not in s: raise ConfigError('All batch sets must have "args" key.') if 'files' not in s: raise ConfigError('All batch sets must have "files" key.') for fpath in s['files']: if not os.path.exists(fpath): raise ConfigError('Could not find file to modify in batch ' 'jobs: {}'.format(fpath))
def from_config(ctx, config_file, verbose): """Run reV gen from a config file.""" name = ctx.obj['NAME'] verbose = any([verbose, ctx.obj['VERBOSE']]) # Instantiate the config object config = GenConfig(config_file) # take name from config if not default if config.name.lower() != 'rev': name = config.name ctx.obj['NAME'] = name # Enforce verbosity if logging level is specified in the config if config.log_level == logging.DEBUG: verbose = True # make output directory if does not exist if not os.path.exists(config.dirout): os.makedirs(config.dirout) # initialize loggers. init_mult(name, config.logdir, modules=[__name__, 'reV.generation.generation', 'reV.config', 'reV.utilities', 'reV.SAM', 'rex.utilities'], verbose=verbose) # Initial log statements logger.info('Running reV Generation from config file: "{}"' .format(config_file)) logger.info('Target output directory: "{}"'.format(config.dirout)) logger.info('Target logging directory: "{}"'.format(config.logdir)) logger.info('The following project points were specified: "{}"' .format(config.get('project_points', None))) logger.info('The following SAM configs are available to this run:\n{}' .format(pprint.pformat(config.get('sam_files', None), indent=4))) logger.debug('The full configuration input is as follows:\n{}' .format(pprint.pformat(config, indent=4))) # set config objects to be passed through invoke to direct methods ctx.obj['TECH'] = config.technology ctx.obj['POINTS'] = config.project_points ctx.obj['SAM_FILES'] = config.sam_files ctx.obj['DIROUT'] = config.dirout ctx.obj['LOGDIR'] = config.logdir ctx.obj['OUTPUT_REQUEST'] = config.output_request ctx.obj['TIMEOUT'] = config.timeout ctx.obj['SITES_PER_WORKER'] = config.execution_control.sites_per_worker ctx.obj['MAX_WORKERS'] = config.execution_control.max_workers ctx.obj['MEM_UTIL_LIM'] = \ config.execution_control.mememory_utilization_limit # get downscale request and raise exception if not NSRDB ctx.obj['DOWNSCALE'] = config.downscale if config.downscale is not None and 'pv' not in config.technology.lower(): raise ConfigError('User requested downscaling for a non-solar ' 'technology. reV does not have this capability at ' 'the current time. Please contact a developer for ' 'more information on this feature.') ctx.obj['CURTAILMENT'] = None if config.curtailment is not None: # pass through the curtailment file, not the curtailment object ctx.obj['CURTAILMENT'] = config['curtailment'] for i, year in enumerate(config.years): submit_from_config(ctx, name, year, config, i, verbose=verbose)