Пример #1
0
    def _pre_flight_table(table):
        """Check to ensure that the batch config csv table is valid.

        Parameters
        ----------
        table : pd.dataframe
            Extracted batch config csv. Must have "job" index (1st column)
            and "set_tag" and "files" columns.
        """
        if table.index.name != 'job':
            msg = 'Batch CSV config must have "job" as the first column.'
            logger.error(msg)
            raise ConfigError(msg)

        if 'set_tag' not in table or 'files' not in table:
            msg = 'Batch CSV config must have "set_tag" and "files" columns'
            logger.error(msg)
            raise ConfigError(msg)

        if (len(table.set_tag.unique()) != len(table)
                or len(table.index.unique()) != len(table)):
            msg = ('Batch CSV config must have completely '
                   'unique "set_tag" and "job" columns')
            logger.error(msg)
            raise ConfigError(msg)
Пример #2
0
    def source_files(self):
        """
        Returns
        -------
        source_files : list
            list of source files to collect from
        """
        if self._source_files is not None:
            if isinstance(self._source_files, (list, tuple)):
                source_files = self._source_files
            elif self._source_files == "PIPELINE":
                source_files = Pipeline.parse_previous(self._dirout,
                                                       'multi-year',
                                                       target='fpath')
            else:
                raise ConfigError("source_files must be a list, tuple, "
                                  "or 'PIPELINE'")
        else:
            if self._source_dir and self._source_prefix:
                source_files = []
                for file in os.listdir(self._source_dir):
                    if (file.startswith(self._source_prefix)
                            and file.endswith('.h5') and '_node' not in file):
                        source_files.append(os.path.join(self._source_dir,
                                                         file))
            else:
                raise ConfigError("source_files or both source_dir and "
                                  "source_prefix must be provided")

        if not any(source_files):
            raise FileNotFoundError('Could not find any source files for '
                                    'multi-year collection group: "{}"'
                                    .format(self.name))

        return source_files
Пример #3
0
    def _check_pipeline(self):
        """Check the pipeline config file in the batch config."""

        if 'pipeline_config' not in self:
            raise ConfigError('Batch config needs "pipeline_config" arg!')

        if not os.path.exists(self['pipeline_config']):
            raise ConfigError('Could not find the pipeline config file: {}'
                              .format(self['pipeline_config']))
Пример #4
0
    def _check_points_config_mapping(self):
        """
        Check to ensure the project points (df) and SAM configs
        (sam_config_obj) are compatible. Update as necessary or break
        """
        # Extract unique config refences from project_points DataFrame
        df_configs = self.df['config'].unique()
        sam_configs = self.sam_files

        # Checks to make sure that the same number of SAM config .json files
        # as references in project_points DataFrame
        if len(df_configs) > len(sam_configs):
            msg = ('Points references {} configs while only '
                   '{} SAM configs were provided!'.format(
                       len(df_configs), len(sam_configs)))
            logger.error(msg)
            raise ConfigError(msg)

        # If project_points DataFrame was created from a list,
        # config will be None and needs to be added to _df from sam_configs
        if len(df_configs) == 1:
            if df_configs[0] is None:
                self._df['config'] = list(sam_configs.values())[0]

                df_configs = self.df['config'].unique()

        # Check to see if config references in project_points DataFrame
        # are valid file paths, if compare with SAM configs
        # and update as needed
        configs = {}
        for config in df_configs:
            if os.path.isfile(config):
                configs[config] = config
            elif config in sam_configs:
                configs[config] = sam_configs[config]
            else:
                msg = ('{} does not map to a valid configuration file'.format(
                    config))
                logger.error(msg)
                raise ConfigError(msg)

        # If configs has any keys that are not in sam_configs then
        # something really weird happened so raise an error.
        if any(set(configs) - set(sam_configs)):
            msg = (
                'A wild config has appeared! Requested config keys for '
                'ProjectPoints are {} and previous config keys are {}'.format(
                    list(configs.keys()), list(sam_configs.keys())))
            logger.error(msg)
            raise ConfigError(msg)
Пример #5
0
    def parse_res_files(self):
        """Get a list of the resource files with years filled in.

        Returns
        -------
        res_files : list
            List of config-specified resource files. Resource files with {}
            formatting will be filled with the specified year(s). This return
            value is a list with len=1 for a single year run.
        """
        if self._res_files is None:
            # get base filename, may have {} for year format
            fname = self.resource_file
            if '{}' in fname:
                # need to make list of res files for each year
                self._res_files = [fname.format(year) for year in self.years]
            else:
                # only one resource file request, still put in list
                self._res_files = [fname]

        if len(self._res_files) != len(self.years):
            raise ConfigError('The number of resource files does not match '
                              'the number of analysis years!'
                              '\n\tResource files: \n\t\t{}'
                              '\n\tYears: \n\t\t{}'.format(
                                  self._res_files, self.years))

        return self._res_files
Пример #6
0
def make_fout(name, year):
    """Make an appropriate file output from name and year.

    Parameters
    ----------
    name : str
        Job name.
    year : int | str
        Analysis year.

    Returns
    -------
    fout : str
        .h5 output file based on name and year
    """

    try:
        match = parse_year(name)
    except RuntimeError:
        match = False

    # if the year isn't in the name, add it before setting the file output
    if match and year:
        if str(year) != str(match):
            raise ConfigError(
                'Tried to submit gen job for {}, but found a '
                'different year in the base job name: "{}". '
                'Please remove the year from the job name.'.format(year, name))
    if year:
        fout = '{}{}.h5'.format(name, '_{}'.format(year) if not match else '')
    else:
        fout = '{}.h5'.format(name)
    return fout
Пример #7
0
    def execution_control(self):
        """Get the execution control object.

        Returns
        -------
        _ec : BaseExecutionConfig | EagleConfig
            reV execution config object specific to the execution_control
            option.
        """
        if self._ec is None:
            ec = self['execution_control']
            # static map of avail execution options with corresponding classes
            ec_config_types = {
                'local': BaseExecutionConfig,
                'slurm': SlurmConfig,
                'eagle': SlurmConfig,
            }
            if 'option' in ec:
                try:
                    # Try setting the attribute to the appropriate exec option
                    self._ec = ec_config_types[ec['option'].lower()](ec)
                except KeyError:
                    # Option not found
                    raise ConfigError('Execution control option not '
                                      'recognized: "{}". '
                                      'Available options are: {}.'.format(
                                          ec['option'].lower(),
                                          list(ec_config_types.keys())))
            else:
                # option not specified, default to a base execution (local)
                warn('Execution control option not specified. '
                     'Defaulting to a local run.')
                self._ec = BaseExecutionConfig(ec)
        return self._ec
Пример #8
0
    def _check_pipeline(self):
        """Check pipeline steps input. ConfigError if bad input."""

        if 'pipeline' not in self:
            raise ConfigError('Could not find required key "pipeline" in the '
                              'pipeline config.')

        if not isinstance(self.pipeline_steps, list):
            raise ConfigError('Config arg "pipeline" must be a list of '
                              '(command, f_config) pairs, but received "{}".'
                              .format(type(self.pipeline_steps)))

        for di in self.pipeline_steps:
            for f_config in di.values():
                if not os.path.exists(f_config):
                    raise ConfigError('Pipeline step depends on non-existent '
                                      'file: {}'.format(f_config))
Пример #9
0
 def _sc_preflight(self):
     """Perform pre-flight checks on the SC config inputs"""
     missing = []
     for req in self.REQUIREMENTS:
         if self.get(req, None) is None:
             missing.append(req)
     if any(missing):
         raise ConfigError('Supply Curve config missing the following '
                           'keys: {}'.format(missing))
Пример #10
0
    def _preflight(self):
        """Run a preflight check on the config."""
        if 'project_control' in self:
            msg = ('config "project_control" block is no '
                   'longer used. All project control keys should be placed at '
                   'the top config level.')
            logger.error(msg)
            raise ConfigError(msg)

        missing = []
        for req in self.REQUIREMENTS:
            if req not in self:
                missing.append(req)

        if any(missing):
            e = ('{} missing the following keys: {}'.format(
                self.__class__.__name__, missing))
            logger.error(e)
            raise ConfigError(e)
Пример #11
0
    def _sc_agg_preflight(self):
        """Perform pre-flight checks on the SC agg config inputs"""
        with h5py.File(self.excl_fpath, mode='r') as f:
            dsets = list(f)

        if self.tm_dset not in dsets and self.res_fpath is None:
            raise ConfigError('Techmap dataset "{}" not found in exclusions '
                              'file, resource file input "res_fpath" is '
                              'required to create the techmap file.'
                              .format(self.tm_dset))
Пример #12
0
    def _pre_flight_fp(config):
        """Check to see that a valid config filepath was input

        Parameters
        ----------
        config : str
            File path to config csv (str).
        """

        if not isinstance(config, str):
            msg = ('Batch config can only take a str filepath, '
                   'but received a "{}".'.format(type(config)))
            logger.error(msg)
            raise ConfigError(msg)

        if not config.endswith('.csv'):
            msg = ('BatchCsv config needs a csv filepath but received: {}'
                   .format(config))
            logger.error(msg)
            raise ConfigError(msg)
Пример #13
0
 def _check_keys(self):
     """
     Check on config keys to ensure they match available
     properties
     """
     for key, value in self.items():
         if isinstance(value, str) and key not in self._keys:
             msg = ('{} is not a valid config entry for {}! Must be one of:'
                    '\n{}'.format(key, self.__class__.__name__, self._keys))
             logger.error(msg)
             raise ConfigError(msg)
Пример #14
0
    def parse_cf_files(self):
        """Get the capacity factor files (reV generation output data).

        Returns
        -------
        cf_files : list
            Target paths for capacity factor files (reV generation output
            data) for input to reV LCOE calculation.
        """

        if self._cf_files is None:
            # get base filename, may have {} for year format
            fname = self.cf_file
            if '{}' in fname:
                # need to make list of res files for each year
                self._cf_files = [fname.format(year) for year in self.years]
            elif 'PIPELINE' in fname:
                self._cf_files = Pipeline.parse_previous(super().dirout,
                                                         'econ',
                                                         target='fpath')
            else:
                # only one resource file request, still put in list
                self._cf_files = [fname]

            self.check_files(self._cf_files)

            # check year/cf_file matching if not a pipeline input
            if 'PIPELINE' not in fname:
                if len(self._cf_files) != len(self.years):
                    raise ConfigError('The number of cf files does not match '
                                      'the number of analysis years!'
                                      '\n\tCF files: \n\t\t{}'
                                      '\n\tYears: \n\t\t{}'.format(
                                          self._cf_files, self.years))
                for year in self.years:
                    if str(year) not in str(self._cf_files):
                        raise ConfigError('Could not find year {} in cf '
                                          'files: {}'.format(
                                              year, self._cf_files))

        return self._cf_files
Пример #15
0
    def status_file(self):
        """Get status file path.

        Returns
        -------
        _status_file : str
            reV status file path.
        """
        if self._dirout is None:
            raise ConfigError('Pipeline has not yet been initialized.')

        return os.path.join(self._dirout, '{}_status.json'.format(self.name))
Пример #16
0
    def _analysis_config_preflight(self):
        """Check for required config blocks"""

        if 'directories' not in self:
            w = ('reV config does not have "directories" block, '
                 'default directories being used.')
            logger.warning(w)
            warn(w, ConfigWarning)

        if 'execution_control' not in self:
            e = 'reV config must have "execution_control" block!'
            logger.error(e)
            raise ConfigError(e)
Пример #17
0
    def __init__(self, config):
        """
        Parameters
        ----------
        config : str
            File path to config json (str).
        """

        if not isinstance(config, str):
            raise ConfigError('Batch config can only take a str filepath, '
                              'but received a "{}".'.format(type(config)))

        super().__init__(config)
        self._pre_flight()
Пример #18
0
    def _pre_flight_fp(config):
        """Check to see that a valid config filepath was input

        Parameters
        ----------
        config : str
            File path to config json or csv (str).
        """
        if not isinstance(config, str):
            msg = ('Batch config can only take a str filepath, '
                   'but received a "{}".'.format(type(config)))
            logger.error(msg)
            raise ConfigError(msg)

        if not config.endswith('.json') and not config.endswith('.csv'):
            msg = ('Batch config needs to be .json or .csv but received: {}'
                   .format(config))
            logger.error(msg)
            raise ConfigError(msg)

        if not os.path.exists(config):
            msg = 'Batch config does not exist: {}'.format(config)
            logger.error(msg)
            raise FileNotFoundError(msg)
Пример #19
0
    def _parse_dirout(self):
        """Parse pipeline steps for common dirout and unique job names."""

        dirouts = []
        names = []
        for di in self.pipeline_steps:
            for f_config in di.values():
                config = AnalysisConfig(f_config, check_keys=False)
                dirouts.append(config.dirout)

                if 'name' in config:
                    names.append(config.name)

        if len(set(dirouts)) != 1:
            raise ConfigError('Pipeline steps must have a common output '
                              'directory but received {} different '
                              'directories.'.format(len(set(dirouts))))
        else:
            self._dirout = dirouts[0]

        if len(set(names)) != len(names):
            raise ConfigError('Pipeline steps must have a unique job names '
                              'directory but received {} duplicate names.'
                              .format(len(names) - len(set(names))))
Пример #20
0
    def check_overwrite_keys(self, primary_key, *overwrite_keys):
        """
        Check for overwrite keys and raise a ConfigError if present

        Parameters
        ----------
        primary_key : str
            Primary key that overwrites overwrite_keys, used for error message
        overwrite_keys : str
            Key(s) to overwrite
        """
        overwrite = []
        for key in overwrite_keys:
            if key in self:
                overwrite.append(key)

        if overwrite:
            msg = ('A value for "{}" was provided which overwrites the '
                   ' following key: "{}", please remove them from the config'.
                   format(primary_key, ', '.join(overwrite)))
            logger.error(msg)
            raise ConfigError(msg)
Пример #21
0
    def index(self, gid):
        """Get the index location (iloc not loc) for a resource gid found in
        the project points.

        Parameters
        ----------
        gid : int
            Resource GID found in the project points gid column.

        Returns
        -------
        ind : int
            Row index of gid in the project points dataframe.
        """
        if gid not in self._df['gid'].values:
            e = ('Requested resource gid {} is not present in the project '
                 'points dataframe. Cannot return row index.'.format(gid))
            logger.error(e)
            raise ConfigError(e)

        ind = np.where(self._df['gid'] == gid)[0][0]

        return ind
Пример #22
0
    def _parse_sam_config(sam_config):
        """
        Create SAM files dictionary.

        Parameters
        ----------
        sam_config : dict | str | list | SAMConfig
            SAM input configuration ID(s) and file path(s). Keys are the SAM
            config ID(s), top level value is the SAM path. Can also be a single
            config file str. If it's a list, it is mapped to the sorted list
            of unique configs requested by points csv. Can also be a
            pre loaded SAMConfig object.

        Returns
        -------
        _sam_config_obj : reV.config.sam_config.SAMConfig
            SAM configuration object.
        """

        if isinstance(sam_config, SAMConfig):
            return sam_config

        else:
            if isinstance(sam_config, dict):
                config_dict = sam_config
            elif isinstance(sam_config, str):
                config_dict = {sam_config: sam_config}
            else:
                raise ValueError('Cannot parse SAM configs from {}'.format(
                    type(sam_config)))

            for key, value in config_dict.items():
                if not os.path.isfile(value):
                    raise ConfigError('Invalid SAM config {}: {} does not '
                                      'exist'.format(key, value))

            return SAMConfig(config_dict)
Пример #23
0
    def get_file(fname):
        """Read the config file.

        Parameters
        ----------
        fname : str
            Full path + filename. Must be a .json file.

        Returns
        -------
        config : dict
            Config data.
        """

        logger.debug('Getting "{}"'.format(fname))
        if os.path.exists(fname) and fname.endswith('.json'):
            config = safe_json_load(fname)
        elif os.path.exists(fname) is False:
            raise FileNotFoundError(
                'Configuration file does not exist: "{}"'.format(fname))
        else:
            raise ConfigError(
                'Unknown error getting configuration file: "{}"'.format(fname))
        return config
Пример #24
0
    def _check_sets(self):
        """Check the batch sets for required inputs and valid files."""

        if 'sets' not in self:
            raise ConfigError('Batch config needs "sets" arg!')

        if not isinstance(self['sets'], list):
            raise ConfigError('Batch config needs "sets" arg to be a list!')

        for s in self['sets']:
            if not isinstance(s, dict):
                raise ConfigError('Batch sets must be dictionaries.')
            if 'args' not in s:
                raise ConfigError('All batch sets must have "args" key.')
            if 'files' not in s:
                raise ConfigError('All batch sets must have "files" key.')

            for fpath in s['files']:
                if not os.path.exists(fpath):
                    raise ConfigError('Could not find file to modify in batch '
                                      'jobs: {}'.format(fpath))
Пример #25
0
def from_config(ctx, config_file, verbose):
    """Run reV gen from a config file."""
    name = ctx.obj['NAME']
    verbose = any([verbose, ctx.obj['VERBOSE']])

    # Instantiate the config object
    config = GenConfig(config_file)

    # take name from config if not default
    if config.name.lower() != 'rev':
        name = config.name
        ctx.obj['NAME'] = name

    # Enforce verbosity if logging level is specified in the config
    if config.log_level == logging.DEBUG:
        verbose = True

    # make output directory if does not exist
    if not os.path.exists(config.dirout):
        os.makedirs(config.dirout)

    # initialize loggers.
    init_mult(name, config.logdir,
              modules=[__name__, 'reV.generation.generation',
                       'reV.config', 'reV.utilities', 'reV.SAM',
                       'rex.utilities'],
              verbose=verbose)

    # Initial log statements
    logger.info('Running reV Generation from config file: "{}"'
                .format(config_file))
    logger.info('Target output directory: "{}"'.format(config.dirout))
    logger.info('Target logging directory: "{}"'.format(config.logdir))
    logger.info('The following project points were specified: "{}"'
                .format(config.get('project_points', None)))
    logger.info('The following SAM configs are available to this run:\n{}'
                .format(pprint.pformat(config.get('sam_files', None),
                                       indent=4)))
    logger.debug('The full configuration input is as follows:\n{}'
                 .format(pprint.pformat(config, indent=4)))

    # set config objects to be passed through invoke to direct methods
    ctx.obj['TECH'] = config.technology
    ctx.obj['POINTS'] = config.project_points
    ctx.obj['SAM_FILES'] = config.sam_files
    ctx.obj['DIROUT'] = config.dirout
    ctx.obj['LOGDIR'] = config.logdir
    ctx.obj['OUTPUT_REQUEST'] = config.output_request
    ctx.obj['TIMEOUT'] = config.timeout
    ctx.obj['SITES_PER_WORKER'] = config.execution_control.sites_per_worker
    ctx.obj['MAX_WORKERS'] = config.execution_control.max_workers
    ctx.obj['MEM_UTIL_LIM'] = \
        config.execution_control.mememory_utilization_limit

    # get downscale request and raise exception if not NSRDB
    ctx.obj['DOWNSCALE'] = config.downscale
    if config.downscale is not None and 'pv' not in config.technology.lower():
        raise ConfigError('User requested downscaling for a non-solar '
                          'technology. reV does not have this capability at '
                          'the current time. Please contact a developer for '
                          'more information on this feature.')

    ctx.obj['CURTAILMENT'] = None
    if config.curtailment is not None:
        # pass through the curtailment file, not the curtailment object
        ctx.obj['CURTAILMENT'] = config['curtailment']

    for i, year in enumerate(config.years):
        submit_from_config(ctx, name, year, config, i, verbose=verbose)