def _collect_offshore(self, fpath_out): """Collect aggregated offshore data to initialized file. Parameters ---------- fpath_out : str Output filepath. """ if any(self.offshore_gids): offshore_bool = np.isin(self.meta_out['gid'].values, self.offshore_gids) offshore_locs = np.where(offshore_bool)[0] offshore_slice = slice(offshore_locs.min(), offshore_locs.max() + 1) with Outputs(self._gen_fpath, mode='r') as source: dsets = [ d for d in source.datasets if d not in ('meta', 'time_index') ] with Outputs(fpath_out, mode='a') as out: shapes = {d: out.get_dset_properties(d)[0] for d in dsets} for dset in dsets: logger.info( 'Writing offshore output data for "{}".'.format(dset)) if len(shapes[dset]) == 1: out[dset, offshore_slice] = self.out[dset] else: out[dset, :, offshore_slice] = self.out[dset]
def combine_meta(self): """ Load and combine meta data from .h5 """ with Outputs(self._h5_out, mode='a') as f: if 'meta' in f.datasets: self._check_meta(f.meta) else: with Outputs(self.h5_files[0], mode='r') as f_in: global_attrs = f_in.get_attrs() meta_attrs = f_in.get_attrs('meta') for key, value in global_attrs.items(): f._h5.attrs[key] = value meta = [ DatasetCollector.parse_meta(file) for file in self.h5_files ] meta = pd.concat(meta, axis=0) meta = self._check_meta(meta) logger.info('Writing meta data with shape {}'.format( meta.shape)) f._set_meta('meta', meta, attrs=meta_attrs)
def _parse_lcoe_inputs(site_df, cf_file, year): """Parse for non-site-specific LCOE inputs. Parameters ---------- site_df : pd.DataFrame Dataframe of site-specific input variables. Row index corresponds to site number/gid (via df.loc not df.iloc), column labels are the variable keys that will be passed forward as SAM parameters. cf_file : str reV generation capacity factor output file with path. year : int | str | None reV generation year to calculate econ for. Looks for cf_mean_{year} or cf_profile_{year}. None will default to a non-year-specific cf dataset (cf_mean, cf_profile). Returns ------- site_gids : list List of all site gid values from the cf_file. calc_aey : bool Flag to require calculation of the annual energy yield before running LCOE. cf_arr : np.ndarray Array of cf_mean values for all sites in the cf_file for the given year. """ # get the cf_file meta data gid's to use as indexing tools with Outputs(cf_file) as cfh: site_gids = list(cfh.meta['gid']) calc_aey = False if 'annual_energy' not in site_df: # annual energy yield has not been input, flag to calculate site_df.loc[:, 'annual_energy'] = np.nan calc_aey = True # make sure capacity factor is present in site-specific data if 'capacity_factor' not in site_df: site_df.loc[:, 'capacity_factor'] = np.nan # pull all cf mean values for LCOE calc with Outputs(cf_file) as cfh: if 'cf_mean' in cfh.datasets: cf_arr = cfh['cf_mean'] elif 'cf_mean-{}'.format(year) in cfh.datasets: cf_arr = cfh['cf_mean-{}'.format(year)] elif 'cf_mean_{}'.format(year) in cfh.datasets: cf_arr = cfh['cf_mean_{}'.format(year)] elif 'cf' in cfh.datasets: cf_arr = cfh['cf'] else: raise KeyError('Could not find cf_mean values for LCOE. ' 'Available datasets: {}'.format(cfh.datasets)) return site_gids, calc_aey, cf_arr
def _init_h5(self, mode='w'): """Initialize the single h5 output file with all output requests. Parameters ---------- mode : str Mode to instantiate h5py.File instance """ if self._fpath is not None: if 'w' in mode: logger.info('Initializing full output file: "{}" with mode: {}' .format(self._fpath, mode)) elif 'a' in mode: logger.info('Appending data to output file: "{}" with mode: {}' .format(self._fpath, mode)) attrs = {d: {} for d in self.output_request} chunks = {} dtypes = {} shapes = {} # flag to write time index if profiles are being output write_ti = False for dset in self.output_request: tmp = 'other' if dset in self.OUT_ATTRS: tmp = dset attrs[dset]['units'] = self.OUT_ATTRS[tmp].get('units', 'unknown') attrs[dset]['scale_factor'] = \ self.OUT_ATTRS[tmp].get('scale_factor', 1) chunks[dset] = self.OUT_ATTRS[tmp].get('chunks', None) dtypes[dset] = self.OUT_ATTRS[tmp].get('dtype', 'float32') shapes[dset] = self._get_data_shape(dset, len(self.meta)) if len(shapes[dset]) > 1: write_ti = True # only write time index if profiles were found in output request if write_ti: ti = self.time_index else: ti = None Outputs.init_h5(self._fpath, self.output_request, shapes, attrs, chunks, dtypes, self.meta, time_index=ti, configs=self.sam_metas, run_attrs=self.run_attrs, mode=mode)
def save_agg_to_h5(self, out_fpath, aggregation): """ Save aggregated data to disc in .h5 format Parameters ---------- out_fpath : str Output .h5 file path aggregation : dict Aggregated values for each aggregation dataset """ agg_out = aggregation.copy() meta = agg_out.pop('meta') for c in meta.columns: try: meta[c] = pd.to_numeric(meta[c]) except (ValueError, TypeError): pass dsets = [] shapes = {} attrs = {} chunks = {} dtypes = {} time_index = None with Resource(self._h5_fpath) as f: for dset, data in agg_out.items(): dsets.append(dset) shape = data.shape shapes[dset] = shape if len(data.shape) == 2: if ('time_index' in f) and (shape[0] == f.shape[0]): if time_index is None: time_index = f.time_index attrs[dset] = f.get_attrs(dset=dset) _, dtype, chunk = f.get_dset_properties(dset) chunks[dset] = chunk dtypes[dset] = dtype Outputs.init_h5(out_fpath, dsets, shapes, attrs, chunks, dtypes, meta, time_index=time_index) with Outputs(out_fpath, mode='a') as out: for dset, data in agg_out.items(): out[dset] = data
def _collect(self): """Simple & robust serial collection optimized for low memory usage.""" with Outputs(self._h5_file, mode='a') as f_out: for fp in self._source_files: with Outputs(fp, mode='r') as f_source: x = self._get_source_gid_chunks(f_source) all_source_gids, source_gid_chunks = x for source_gids in source_gid_chunks: self._collect_chunk(all_source_gids, source_gids, f_out, f_source, fp) log_mem(logger, log_level='DEBUG')
def test_bad_shape(): """Negative test for bad data shapes""" with tempfile.TemporaryDirectory() as td: fp = os.path.join(td, 'outputs.h5') with Outputs(fp, 'w') as f: f.meta = meta f.time_index = time_index with pytest.raises(HandlerValueError): Outputs.add_dataset(fp, 'dset3', np.ones(10), None, float) with pytest.raises(HandlerValueError): Outputs.add_dataset(fp, 'dset3', np.ones((10, 10)), None, float)
def get_r1_profiles(year=2012): """Get the first 100 reV 1.0 ri pv generation profiles.""" rev1 = os.path.join(TESTDATADIR, 'ri_pv', 'profile_outputs', 'pv_{}_0.h5'.format(year)) with Outputs(rev1) as cf: data = cf['cf_profile'][...] / 10000 return data
def test_fout(year): """Gen PV CF profiles with write to disk and compare against rev1.""" cf_file = os.path.join(TESTDATADIR, 'gen_out/gen_ri_pv_{}_x000.h5'.format(year)) sam_files = os.path.join(TESTDATADIR, 'SAM/i_lcoe_naris_pv_1axis_inv13.json') r1f = os.path.join(TESTDATADIR, 'ri_pv/scalar_outputs/project_outputs.h5') dirout = os.path.join(TESTDATADIR, 'lcoe_out') fout = 'lcoe_out_{}.h5'.format(year) fpath = os.path.join(dirout, fout) points = slice(0, 100) Econ.reV_run(points=points, sam_files=sam_files, cf_file=cf_file, year=year, output_request='lcoe_fcr', max_workers=1, sites_per_worker=25, points_range=None, fout=fout, dirout=dirout) with Outputs(fpath) as f: lcoe = f['lcoe_fcr'] with h5py.File(r1f, mode='r') as f: year_rows = {'2012': 0, '2013': 1} r1_lcoe = f['pv']['lcoefcr'][year_rows[str(year)], 0:100] * 1000 result = np.allclose(lcoe, r1_lcoe, rtol=RTOL, atol=ATOL) if PURGE_OUT: os.remove(fpath) assert result
def time_index(self): """Get the source time index.""" if self._time_index is None: with Outputs(self._gen_fpath, mode='r') as out: self._time_index = out.time_index return self._time_index
def _init_offshore_out_arrays(self): """Get a dictionary of initialized output arrays for offshore outputs. Returns ------- out_arrays : dict Dictionary of output arrays filled with zeros for offshore data. Has keys for all datasets present in gen_fpath. """ out_arrays = {} with Outputs(self._gen_fpath, mode='r') as out: dsets = [ d for d in out.datasets if d not in ('time_index', 'meta') ] for dset in dsets: shape = out.get_dset_properties(dset)[0] if len(shape) == 1: dset_shape = (len(self.meta_out_offshore), ) else: dset_shape = (shape[0], len(self.meta_out_offshore)) logger.debug('Initializing offshore output data array for ' '"{}" with shape {}.'.format(dset, dset_shape)) out_arrays[dset] = np.zeros(dset_shape, dtype=np.float32) return out_arrays
def meta(self): """Get meta data from the source capacity factors file. Returns ------- _meta : pd.DataFrame Meta data from capacity factor outputs file. """ if self._meta is None and self.cf_file is not None: with Outputs(self.cf_file) as cfh: # only take meta that belongs to this project's site list self._meta = cfh.meta[cfh.meta['gid'].isin( self.points_control.sites)] if 'offshore' in self._meta: if self._meta['offshore'].sum() > 1: w = ('Found offshore sites in econ meta data. ' 'This functionality has been deprecated. ' 'Please run the reV offshore module to ' 'calculate offshore wind lcoe.') warn(w, OffshoreWindInputWarning) logger.warning(w) elif self._meta is None and self.cf_file is None: self._meta = pd.DataFrame({'gid': self.points_control.sites}) return self._meta
def is_profile(source_files, dset): """ Check dataset in source files to see if it is a profile. Parameters ---------- source_files : list List of .h5 files to collect datasets from dset : str Dataset to collect Returns ------- is_profile : bool True if profile, False if not. """ with Outputs(source_files[0]) as f: if dset not in f.datasets: raise KeyError( 'Dataset "{}" not found in source file: "{}"'.format( dset, source_files[0])) shape, _, _ = f.get_dset_properties(dset) return len(shape) == 2
def test_my_collection(dset, group): """ Collect the desired dset Parameters ---------- dset : str dset to collect from H5_Files group : str | NoneType group to collect datasets into """ my_out = os.path.join(TEMP_DIR, "{}-MY.h5".format(dset)) my_dsets = [ 'meta', ] my_dsets.extend(['{}-{}'.format(dset, year) for year in YEARS]) if 'profile' in dset: MultiYear.collect_profiles(my_out, H5_FILES, dset, group=group) my_dsets.extend(["time_index-{}".format(year) for year in YEARS]) else: MultiYear.collect_means(my_out, H5_FILES, dset, group=group) my_dsets.extend( ["{}-{}".format(dset, val) for val in ['means', 'stdev']]) if group is not None: my_dsets = ['{}/{}'.format(group, ds) for ds in my_dsets] with Outputs(my_out, mode='r') as f: out_dsets = f.datasets msg = "Missing datasets after collection" assert np.in1d(my_dsets, out_dsets).all(), msg if PURGE_OUT: os.remove(my_out)
def flush(self): """Flush the output data in self.out attribute to disk in .h5 format. The data to be flushed is accessed from the instance attribute "self.out". The disk target is based on the instance attributes "self._fpath". Data is not flushed if _fpath is None or if .out is empty. """ # handle output file request if file is specified and .out is not empty if isinstance(self._fpath, str) and self._out: logger.info('Flushing outputs to disk, target file: "{}"' .format(self._fpath)) # get the slice of indices to write outputs to islice = slice(self.out_chunk[0], self.out_chunk[1] + 1) # open output file in append mode to add output results to with Outputs(self._fpath, mode='a') as f: # iterate through all output requests writing each as a dataset for dset, arr in self._out.items(): if len(arr.shape) == 1: # write array of scalars f[dset, islice] = arr else: # write 2D array of profiles f[dset, :, islice] = arr logger.debug('Flushed output successfully to disk.')
def _copy_dset(self, source_h5, dset, meta=None): """ Copy dset_in from source_h5 to multiyear .h5 Parameters ---------- source_h5 : str Path to source .h5 file to copy data from dset : str Dataset to copy meta : pandas.DataFrame If provided confirm that source meta matches given meta """ dset_out = self._create_dset_name(source_h5, dset) if dset_out not in self.datasets: logger.debug("- Collecting {} from {}".format( dset, os.path.basename(source_h5))) with Outputs(source_h5, unscale=False, mode='r') as f_in: if meta is not None: cols = ['latitude', 'longitude'] source_meta = f_in.meta if not meta[cols].equals(source_meta[cols]): raise HandlerRuntimeError('Coordinates do not match') _, ds_dtype, ds_chunks = f_in.get_dset_properties(dset) ds_attrs = f_in.get_attrs(dset=dset) ds_data = f_in[dset] self._create_dset(dset_out, ds_data.shape, ds_dtype, chunks=ds_chunks, attrs=ds_attrs, data=ds_data)
def collect(self, source_files, dset, profiles=False, pass_through=False): """ Collect dataset dset from given list of h5 files Parameters ---------- source_files : list List of .h5 files to collect datasets from NOTE: .h5 file names much indicate the year the data pertains to dset : str Dataset to collect profiles : bool Boolean flag to indicate if profiles are being collected If True also collect time_index pass_through : bool Flag to just pass through dataset without name modifications (no differences between years, no means or stdevs) """ with Outputs(source_files[0], mode='r') as f_in: meta = f_in.h5['meta'][...] if 'meta' not in self.datasets: logger.debug("Copying meta") self._create_dset('meta', meta.shape, meta.dtype, data=meta) meta = pd.DataFrame(meta) for year_h5 in source_files: if profiles: self._copy_time_index(year_h5) self._copy_dset(year_h5, dset, meta=meta, pass_through=pass_through)
def _collect_onshore(self, fpath_out): """Collect non-aggregated onshore data to initialized file. Parameters ---------- fpath_out : str Output filepath. """ with Outputs(self._gen_fpath, mode='r') as source: dsets = [ d for d in source.datasets if d not in ('meta', 'time_index') ] if any(self.onshore_gids): for dset in dsets: logger.debug('Collecting onshore data for "{}"'.format(dset)) DatasetCollector.collect_dset(fpath_out, [self._gen_fpath], self.onshore_gids, dset) else: logger.debug('No onshore data in source file to collect.') for dset in dsets: logger.debug( 'Initializing offshore dataset "{}".'.format(dset)) DatasetCollector(fpath_out, [self._gen_fpath], self.offshore_gids, dset)
def _parse_cf_meta(gen_fpath): """Parse cf meta dataframe and get masks for onshore/offshore points. Parameters ---------- gen_fpath : str Full filepath to reV gen h5 output file. Returns ------- meta : pd.DataFrame Full meta data from gen_fpath with "offshore" column. onshore_mask : pd.Series Boolean series indicating where onshore sites are. offshore_mask : pd.Series Boolean series indicating where offshore sites are. """ with Outputs(gen_fpath, mode='r') as out: meta = out.meta if 'offshore' not in meta: e = ('Offshore module cannot run without "offshore" flag in meta ' 'data of gen_fpath: {}'.format(gen_fpath)) logger.error(e) raise KeyError(e) onshore_mask = meta['offshore'] == 0 offshore_mask = meta['offshore'] == 1 return meta, onshore_mask, offshore_mask
def test_append_data(year): """Gen PV CF profiles with write to disk and compare against rev1.""" original_file = os.path.join(TESTDATADIR, 'gen_out/gen_ri_pv_{}_x000.h5'.format(year)) cf_file = os.path.join(TESTDATADIR, 'gen_out/copy_gen_ri_pv_{}_x000.h5'.format(year)) shutil.copy(original_file, cf_file) sam_files = os.path.join(TESTDATADIR, 'SAM/i_lcoe_naris_pv_1axis_inv13.json') r1f = os.path.join(TESTDATADIR, 'ri_pv/scalar_outputs/project_outputs.h5') points = slice(0, 100) Econ.reV_run(points=points, sam_files=sam_files, cf_file=cf_file, cf_year=year, output_request='lcoe_fcr', max_workers=1, sites_per_worker=25, points_range=None, append=True) with Outputs(cf_file) as f: new_dsets = f.dsets cf_profile = f['cf_profile'] lcoe = f['lcoe_fcr'] meta = f.meta ti = f.time_index with Outputs(original_file) as f: og_dsets = f.dsets og_profiles = f['cf_profile'] og_meta = f.meta og_ti = f.time_index with h5py.File(r1f, mode='r') as f: year_rows = {'2012': 0, '2013': 1} r1_lcoe = f['pv']['lcoefcr'][year_rows[str(year)], 0:100] * 1000 if PURGE_OUT: os.remove(cf_file) assert np.allclose(lcoe, r1_lcoe, rtol=RTOL, atol=ATOL) assert np.allclose(cf_profile, og_profiles) assert_frame_equal(meta, og_meta) assert all(ti == og_ti) assert all([d in new_dsets for d in og_dsets])
def time_index(self): """Get the generation resource time index data.""" if self._time_index is None and self.cf_file is not None: with Outputs(self.cf_file) as cfh: if 'time_index' in cfh.datasets: self._time_index = cfh.time_index return self._time_index
def combine_time_index(self): """ Extract time_index, None if not present in .h5 files """ with Outputs(self.h5_files[0], mode='r') as f: if 'time_index' in f.datasets: time_index = f.time_index attrs = f.get_attrs('time_index') else: time_index = None warn( "'time_index' was not processed as it is not " "present in .h5 files to be combined.", CollectionWarning) if time_index is not None: with Outputs(self._h5_out, mode='a') as f: f._set_time_index('time_index', time_index, attrs=attrs)
def _pre_collect(self): """Run a pre-collection check and get relevant dset attrs. Returns ------- attrs : dict Dictionary of dataset attributes for the dataset being collected. axis : int Axis size (1 is 1D array, 2 is 2D array) site_mem_req : float Memory requirement in bytes to collect a single site from one source file. """ with Outputs(self._source_files[0], mode='r') as f: shape, dtype, chunks = f.get_dset_properties(self._dset_in) attrs = f.get_attrs(self._dset_in) axis = len(f[self._dset_in].shape) with Outputs(self._h5_file, mode='a') as f: if axis == 1: dset_shape = (len(f), ) elif axis == 2: if 'time_index' in f.datasets: dset_shape = f.shape else: m = ("'time_index' must be combined " "before profiles can be " "combined.") logger.error(m) raise CollectionRuntimeError(m) else: m = ('Cannot collect dset "{}" with ' 'axis {}'.format(self._dset_in, axis)) logger.error(m) raise CollectionRuntimeError(m) if self._dset_out not in f.datasets: f._create_dset(self._dset_out, dset_shape, dtype, chunks=chunks, attrs=attrs) site_mem_req = self._get_site_mem_req(shape, dtype) return attrs, axis, site_mem_req
def add_dataset(cls, h5_file, h5_dir, dset_name, dset_out=None, file_prefix=None, mem_util_lim=0.7): """ Collect and add dataset to h5_file from h5_dir Parameters ---------- h5_file : str Path to .h5 file into which data will be collected h5_dir : str Root directory containing .h5 files to combine dset_name : str Dataset to be collected. If source shape is 2D, time index will be collected. dset_out : str Dataset to collect means into file_prefix : str .h5 file prefix, if None collect all files on h5_dir mem_util_lim : float Memory utilization limit (fractional). This sets how many sites will be collected at a time. """ if file_prefix is None: h5_files = "*.h5" else: h5_files = "{}*.h5".format(file_prefix) logger.info( 'Collecting "{}" from {} files in {} and adding to {}'.format( dset_name, h5_files, h5_dir, h5_file)) ts = time.time() with Outputs(h5_file, mode='r') as f: points = f.meta clt = cls(h5_file, h5_dir, points, file_prefix=file_prefix) dset_shape = clt.get_dset_shape(dset_name) if len(dset_shape) > 1: clt.combine_time_index() logger.debug("\t- 'time_index' collected") DatasetCollector.collect_dset(clt._h5_out, clt.h5_files, clt.gids, dset_name, dset_out=dset_out, mem_util_lim=mem_util_lim) logger.debug("\t- Collection of '{}' complete".format(dset_name)) tt = (time.time() - ts) / 60 logger.info('{} collected'.format(dset_name)) logger.debug('\t- Collection took {:.4f} minutes'.format(tt))
def test_gen_from_config(runner, tech): # noqa: C901 """Gen PV CF profiles with write to disk and compare against rev1.""" job_name = 'config_test_{}'.format(tech) if tech == 'pv': fconfig = 'local_pv.json' elif tech == 'wind': fconfig = 'local_wind.json' config = os.path.join(TESTDATADIR, 'config/{}'.format(fconfig)).replace('\\', '/') config_obj = GenConfig(config) result = runner.invoke(main, ['-n', job_name, '-c', config, 'generation']) msg = ('Failed with error {}'.format( traceback.print_exception(*result.exc_info))) assert result.exit_code == 0, msg # get reV 2.0 generation profiles from disk rev2_profiles = None flist = os.listdir(config_obj.dirout) for fname in flist: if job_name in fname and fname.endswith('.h5'): with Outputs(os.path.join(config_obj.dirout, fname), 'r') as cf: msg = 'cf_profile not written to disk' assert 'cf_profile' in cf.datasets, msg rev2_profiles = cf['cf_profile'] msg = 'monthly_energy not written to disk' assert 'monthly_energy' in cf.datasets, msg monthly = cf['monthly_energy'] assert monthly.shape == (12, 10) break if rev2_profiles is None: msg = ('reV gen from config failed for "{}"! Could not find ' 'output file in flist: {}'.format(tech, flist)) raise RuntimeError(msg) # get reV 1.0 generation profiles rev1_profiles = get_r1_profiles(year=config_obj.years[0], tech=tech) rev1_profiles = rev1_profiles[:, config_obj.parse_points_control().sites] result = np.allclose(rev1_profiles, rev2_profiles, rtol=RTOL, atol=ATOL) LOGGERS.clear() if result and PURGE_OUT: # remove output files if test passes. flist = os.listdir(config_obj.dirout) for fname in flist: os.remove(os.path.join(config_obj.dirout, fname)) msg = ('reV generation from config input failed for "{}" module!'.format( tech)) assert result is True, msg
def _init_fout(self, fpath_out): """ Initialize the offshore aggregated output file and collect non-aggregated onshore data. Parameters ---------- fpath_out : str Output filepath. """ logger.debug('Initializing offshore output file: {}'.format(fpath_out)) with Outputs(self._gen_fpath, mode='r') as source: meta_attrs = source.get_attrs(dset='meta') ti_attrs = source.get_attrs(dset='time_index') with Outputs(fpath_out, mode='w') as out: out._set_meta('meta', self.meta_out, attrs=meta_attrs) out._set_time_index('time_index', self.time_index, attrs=ti_attrs)
def _purge_chunks(self): """Remove the chunked files (after collection). Will not delete files if any datasets were not collected.""" with Outputs(self._h5_out, mode='r') as out: dsets_collected = out.datasets with Outputs(self.h5_files[0], mode='r') as out: dsets_source = out.datasets missing = [d for d in dsets_source if d not in dsets_collected] if any(missing): w = ('Not purging chunked output files. These dsets ' 'have not been collected: {}'.format(missing)) warn(w, CollectionWarning) logger.warning(w) else: for fpath in self.h5_files: os.remove(fpath)
def _get_gen_profile(site, site_df, cf_file, cf_year, inputs): """Get the single-site generation time series and add to inputs dict. Parameters ---------- site : int Site gid. site_df : pd.DataFrame Dataframe of site-specific input variables. Row index corresponds to site number/gid (via df.loc not df.iloc), column labels are the variable keys that will be passed forward as SAM parameters. cf_file : str reV generation capacity factor output file with path. cf_year : int | str | None reV generation year to calculate econ for. Looks for cf_mean_{year} or cf_profile_{year}. None will default to a non-year-specific cf dataset (cf_mean, cf_profile). inputs : dict Dictionary of SAM input parameters. Returns ------- inputs : dict Dictionary of SAM input parameters with the generation profile added. """ # get the system capacity sys_cap = Economic._parse_sys_cap(site, inputs, site_df) # Retrieve the generation profile for single owner input with Outputs(cf_file) as cfh: # get the index location of the site in question site_gids = list(cfh.meta['gid']) isite = site_gids.index(site) # look for the cf_profile dataset if 'cf_profile' in cfh.datasets: gen = cfh['cf_profile', :, isite] * sys_cap elif 'cf_profile-{}'.format(cf_year) in cfh.datasets: gen = (cfh['cf_profile-{}'.format(cf_year), :, isite] * sys_cap) elif 'cf_profile_{}'.format(cf_year) in cfh.datasets: gen = (cfh['cf_profile_{}'.format(cf_year), :, isite] * sys_cap) else: raise KeyError('Could not find cf_profile values for ' 'SingleOwner. Available datasets: {}'.format( cfh.datasets)) # add to input dict inputs['gen'] = gen return inputs
def test_append_multi_node(node): """Test econ multi node with append flag ON using a real reV run from 8/17/2020""" original_file = os.path.join( TESTDATADIR, 'gen_out/pv_atb20_gen_1998_node0{}.h5'.format(node)) cf_file = os.path.join( TESTDATADIR, 'gen_out/copy_pv_atb20_gen_1998_node0{}.h5'.format(node)) shutil.copy(original_file, cf_file) sam_files = {'default': os.path.join( TESTDATADIR, 'SAM/pv_tracking_atb2020.json')} year = 1998 points = os.path.join( TESTDATADIR, 'config/nsrdb_projpoints_atb2020_capcostmults_subset.csv') site_data = os.path.join( TESTDATADIR, 'config/nsrdb_sitedata_atb2020_capcostmults_subset.csv') econ = Econ.reV_run(points=points, sam_files=sam_files, cf_file=cf_file, year=year, output_request=('lcoe_fcr', 'capital_cost'), max_workers=1, sites_per_worker=25, points_range=None, append=True, site_data=site_data) with Outputs(original_file) as out: data_baseline = out['lcoe_fcr'] with Outputs(cf_file) as out: meta = out.meta data_test = out['lcoe_fcr'] test_cap_cost = out['capital_cost'] if PURGE_OUT: os.remove(cf_file) assert np.allclose(data_baseline, data_test) site_data = pd.read_csv(site_data) sd_cap_cost = site_data.loc[site_data.gid.isin(meta.gid), 'capital_cost'] assert np.allclose(test_cap_cost, sd_cap_cost) assert np.allclose(econ.out['capital_cost'], sd_cap_cost)
def _get_farm_gen_data(gen_fpath, meta, ignore=('meta', 'time_index', 'lcoe_fcr')): """Get the aggregated generation data for a single wind farm. Parameters ---------- gen_fpath : str Full filepath to reV gen h5 output file. meta : pd.DataFrame Offshore resource meta data for resource pixels belonging to the single wind farm. The meta index should correspond to the gids in the gen_fpath. ignore : list | tuple List of datasets to ignore and not retrieve. Returns ------- gen_data : dict Dictionary of all available generation datasets. Keys are reV gen output dataset names, values are spatial averages - scalar resource data (cf_mean) gets averaged to one offshore farm value (float), profiles (cf_profile) gets averaged to one offshore farm profile (1D arrays). """ gen_data = {} with Outputs(gen_fpath, mode='r', unscale=True) as out: dsets = [d for d in out.datasets if d not in ignore] if 'cf_mean' not in dsets: m = ('Offshore wind data aggregation needs cf_mean but reV ' 'gen output file only had: {}'.format(out.datasets)) logger.error(m) raise KeyError(m) for dset in dsets: shape = out.get_dset_properties(dset)[0] if len(shape) == 1: gen_data[dset] = out[dset, meta.index.values].mean() else: arr = out[dset, :, meta.index.values] gen_data[dset] = arr.mean(axis=1) return gen_data