Exemplo n.º 1
0
    def _check_dset_shape(self, dset_data):
        """
        Check to ensure that dataset array is of the proper shape

        Parameters
        ----------
        dset_data : ndarray
            Dataset data array
        """
        dset_shape = dset_data.shape
        if len(dset_shape) == 1:
            shape = len(self)
            if shape:
                shape = (shape,)
                if dset_shape != shape:
                    raise HandlerValueError("data is not of the proper shape:"
                                            " {}".format(shape))
            else:
                raise HandlerRuntimeError("'meta' has not been loaded")
        else:
            shape = self.shape
            if shape:
                if dset_shape != shape:
                    raise HandlerValueError("data is not of the proper shape:"
                                            " {}".format(shape))
            else:
                raise HandlerRuntimeError("'meta' and 'time_index' have not "
                                          "been loaded")
Exemplo n.º 2
0
    def _compute_stdev(self, dset_out, means=None):
        """
        Compute multi-year standard deviation for given dataset

        Parameters
        ----------
        dset_out : str
            Multi-year stdev dataset name
        means : ndarray
            Array of pre-computed means

        Returns
        -------
        my_stdev : ndarray
            Array of multi-year standard deviations
        """
        if means is None:
            means = self._compute_means("{}-means".format(dset_out))

        source_dsets = self._get_source_dsets(dset_out)

        my_stdev = np.zeros(means.shape, dtype='float32')
        for ds in source_dsets:
            if self.h5[ds].shape == my_stdev.shape:
                my_stdev += (self[ds] - means)**2
            else:
                raise HandlerRuntimeError("{} shape {} should be {}"
                                          .format(ds, self.h5[ds].shape,
                                                  my_stdev.shape))

        my_stdev = np.sqrt(my_stdev / len(source_dsets))
        self._update_dset(dset_out, my_stdev)

        return my_stdev
Exemplo n.º 3
0
    def _compute_means(self, dset_out):
        """
        Compute multi-year means for given dataset

        Parameters
        ----------
        dset_out : str
            Multi-year means dataset name

        Returns
        -------
        my_means : ndarray
            Array of multi-year means
        """
        source_dsets = self._get_source_dsets(dset_out)
        logger.debug('\t- Computing {} from {}'.format(dset_out, source_dsets))

        my_means = np.zeros(len(self), dtype='float32')
        for ds in source_dsets:
            if self.h5[ds].shape == my_means.shape:
                my_means += self[ds]
            else:
                raise HandlerRuntimeError("{} shape {} should be {}"
                                          .format(ds, self.h5[ds].shape,
                                                  my_means.shape))
        my_means /= len(source_dsets)
        self._update_dset(dset_out, my_means)

        return my_means
Exemplo n.º 4
0
    def _check_data_dtype(data, dtype, scale_factor=1):
        """
        Check data dtype and scale if needed

        Parameters
        ----------
        data : ndarray
            Data to be written to disc
        dtype : str
            dtype of data on disc
        scale_factor : int
            Scale factor to scale data to integer (if needed)

        Returns
        -------
        data : ndarray
            Data ready for writing to disc:
            - Scaled and converted to dtype
        """
        if not np.issubdtype(data.dtype, np.dtype(dtype)):
            if scale_factor == 1:
                raise HandlerRuntimeError("A scale_factor is needed to"
                                          "scale data to {}.".format(dtype))

            # apply scale factor and dtype
            data = np.multiply(data, scale_factor)
            if np.issubdtype(dtype, np.integer):
                data = np.round(data)

            data = data.astype(dtype)

        return data
Exemplo n.º 5
0
    def _copy_dset(self, source_h5, dset, meta=None):
        """
        Copy dset_in from source_h5 to multiyear .h5

        Parameters
        ----------
        source_h5 : str
            Path to source .h5 file to copy data from
        dset : str
            Dataset to copy
        meta : pandas.DataFrame
            If provided confirm that source meta matches given meta
        """
        dset_out = self._create_dset_name(source_h5, dset)
        if dset_out not in self.datasets:
            logger.debug("- Collecting {} from {}".format(
                dset, os.path.basename(source_h5)))
            with Outputs(source_h5, unscale=False, mode='r') as f_in:
                if meta is not None:
                    cols = ['latitude', 'longitude']
                    source_meta = f_in.meta
                    if not meta[cols].equals(source_meta[cols]):
                        raise HandlerRuntimeError('Coordinates do not match')

                _, ds_dtype, ds_chunks = f_in.get_dset_properties(dset)
                ds_attrs = f_in.get_attrs(dset=dset)
                ds_data = f_in[dset]

            self._create_dset(dset_out,
                              ds_data.shape,
                              ds_dtype,
                              chunks=ds_chunks,
                              attrs=ds_attrs,
                              data=ds_data)
Exemplo n.º 6
0
    def available_capacity(self, gid):
        """
        Get available capacity for given line

        Parameters
        ----------
        gid : int
            Unique id of feature of interest

        Returns
        -------
        avail_cap : float
            Available capacity = capacity * available fraction
            default = 10%
        """

        feature = self[gid]

        if 'avail_cap' in feature:
            avail_cap = feature['avail_cap']

        elif 'lines' in feature:
            avail_cap = self._substation_capacity(feature['lines'])

        else:
            msg = ('Could not parse available capacity from feature: {}'.
                   format(feature))
            logger.error(msg)
            raise HandlerRuntimeError(msg)

        return avail_cap
Exemplo n.º 7
0
    def _create_dset(self, ds_name, shape, dtype, chunks=None, attrs=None,
                     data=None, replace=True):
        """
        Initialize dataset

        Parameters
        ----------
        ds_name : str
            Dataset name
        shape : tuple
            Dataset shape
        dtype : str
            Dataset numpy dtype
        chunks : tuple
            Dataset chunk size
        attrs : dict
            Dataset attributes
        data : ndarray
            Dataset data array
        replace : bool
            If previous dataset exists with the same name, it will be replaced.
        """
        if self.writable:
            if ds_name in self.datasets and replace:
                del self.h5[ds_name]

            elif ds_name in self.datasets:
                old_shape, old_dtype, _ = self.get_dset_properties(ds_name)
                if old_shape != shape or old_dtype != dtype:
                    e = ('Trying to create dataset "{}", but already exists '
                         'with mismatched shape and dtype. New shape/dtype '
                         'is {}/{}, previous shape/dtype is {}/{}'
                         .format(ds_name, shape, dtype, old_shape, old_dtype))
                    logger.error(e)
                    raise HandlerRuntimeError(e)

            if ds_name not in self.datasets:
                chunks = self._check_chunks(chunks, data=data)
                ds = self.h5.create_dataset(ds_name, shape=shape, dtype=dtype,
                                            chunks=chunks)

            if attrs is not None:
                for key, value in attrs.items():
                    ds.attrs[key] = value

            if data is not None:
                ds[...] = data
Exemplo n.º 8
0
    def _check_group(self, group):
        """
        Ensure group is in .h5 file

        Parameters
        ----------
        group : str
            Group of interest
        """
        if group is not None:
            if group not in self._h5:
                try:
                    if self.writable:
                        self._h5.create_group(group)
                except Exception as ex:
                    msg = ('Cannot create group {}: {}'
                           .format(group, ex))
                    raise HandlerRuntimeError(msg)

        return group
Exemplo n.º 9
0
    def _set_ds_array(self, ds_name, arr, ds_slice):
        """
        Write ds to disk

        Parameters
        ----------
        ds_name : str
            Dataset name
        arr : ndarray
            Dataset data array
        ds_slice : tuple
            Dataset slicing that corresponds to arr
        """
        if ds_name not in self.datasets:
            msg = '{} must be initialized!'.format(ds_name)
            raise HandlerRuntimeError(msg)

        dtype = self.h5[ds_name].dtype
        scale_factor = self.get_scale(ds_name)
        ds_slice = parse_slice(ds_slice)
        self.h5[ds_name][ds_slice] = self._check_data_dtype(arr, dtype,
                                                            scale_factor)
Exemplo n.º 10
0
    def _copy_dset(self, source_h5, dset, meta=None, pass_through=False):
        """
        Copy dset_in from source_h5 to multiyear .h5

        Parameters
        ----------
        source_h5 : str
            Path to source .h5 file to copy data from
        dset : str
            Dataset to copy
        meta : pandas.DataFrame
            If provided confirm that source meta matches given meta
        pass_through : bool
            Flag to just pass through dataset without name modifications
            (no differences between years, no means or stdevs)
        """
        if pass_through:
            dset_out = dset
        else:
            dset_out = self._create_dset_name(source_h5, dset)

        if dset_out not in self.datasets:
            logger.debug("- Collecting {} from {}"
                         .format(dset, os.path.basename(source_h5)))
            with Outputs(source_h5, unscale=False, mode='r') as f_in:
                if meta is not None:
                    cols = get_lat_lon_cols(meta)
                    source_meta = f_in.meta
                    if not meta[cols].equals(source_meta[cols]):
                        raise HandlerRuntimeError('Coordinates do not match')

                _, ds_dtype, ds_chunks = f_in.get_dset_properties(dset)
                ds_attrs = f_in.get_attrs(dset=dset)
                ds_data = f_in[dset]

            self._create_dset(dset_out, ds_data.shape, ds_dtype,
                              chunks=ds_chunks, attrs=ds_attrs, data=ds_data)