Exemplo n.º 1
0
def _find_profiles_with_undetected_melting(bits: list) -> np.ndarray:
    drizzle_and_falling = _find_drizzle_and_falling(*bits[:3])
    transition = ma.diff(drizzle_and_falling, axis=1)
    is_transition = ma.any(transition, axis=1)
    is_melting_layer = ma.any(bits[3], axis=1)
    is_undetected_melting = is_transition & ~is_melting_layer
    is_undetected_melting[is_undetected_melting == 0] = ma.masked
    return is_undetected_melting.astype(int)
Exemplo n.º 2
0
def rebin_1d(
    x_in: np.ndarray,
    array: Union[np.ndarray, ma.MaskedArray],
    x_new: np.ndarray,
    statistic: str = "mean",
) -> ma.MaskedArray:
    """Rebins 1D array.

    Args:
        x_in: 1-D array with shape (n,).
        array: 1-D input data with shape (m,).
        x_new: 1-D target vector (center points) with shape (N,).
        statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'.
            Default is 'mean'.

    Returns:
        Rebinned data with shape (N,).

    """
    edges = binvec(x_new)
    result = np.zeros(len(x_new))
    array_screened = ma.masked_invalid(
        array, copy=True)  # data may contain nan-values
    mask = ~array_screened.mask  # pylint: disable=E1101
    if ma.any(array_screened[mask]):
        result, _, _ = stats.binned_statistic(x_in[mask],
                                              array_screened[mask],
                                              statistic=statistic,
                                              bins=edges)
    result[~np.isfinite(result)] = 0
    return ma.masked_equal(result, 0)
Exemplo n.º 3
0
def pad_masked(m, *args, **kwargs):
    """
    Pad a masked array

    :param m: the array to pad. Cannot contain unmasked NaNs, since NaNs are used as placeholders for the mask.
    :type m: :class:`numpy.ma.masked_array`

    Remaining arguments are the same as :func:`numpy.pad`. But internally it replaces masked values with NaNs so several
    of the pad methods (maximum, minimum, mean, etc.) will always end up padding with NaNs.

    :return: the padded masked array.
    :rtype: :class:`numpy.ma.masked_array`
    """
    if ma.any(np.isnan(m)):
        raise ValueError(
            'm contains unmasked NaNs, it will not work with pad_masked')
    elif not np.issubdtype(m.dtype, np.float):
        raise NotImplementedError('Not set up to handle non-float arrays')

    # I want to use NaN because I'm afraid of how fill values will interact with some of the methods of extending
    # the array
    a = m.filled(np.nan)
    a = np.pad(a, *args, **kwargs)
    mprime = ma.masked_where(np.isnan(a), a)
    mprime.fill_value = m.fill_value

    return mprime
Exemplo n.º 4
0
def generate_temporal_neighboring_regions(in_file,
                                          search_path,
                                          out_folder,
                                          variable,
                                          mode,
                                          n_hist=None,
                                          n_cohort=None):
    fh_in = Dataset(in_file, "r")

    in_doy = datetime.strptime(in_file.split("/")[-1][:-3], "%Y%m%d").date()
    in_mask = ma.getmaskarray(fh_in.variables[variable][:])
    out_folder = get_out_path(out_folder)

    temp_candidates = [
        nc_file for nc_file in os.listdir(search_path)
        if nc_file.endswith(".nc")
    ]
    temp_candidates = sorted(
        temp_candidates,
        key=lambda x: datetime.strptime(x[:-3], '%Y%m%d'))[::-1]
    for nc_file in temp_candidates:
        nc_doy = datetime.strptime(nc_file[:-3], "%Y%m%d").date()
        doy_diff = (in_doy - nc_doy).days
        if (mode == "window" and (0 <= doy_diff <= n_hist)) \
                or (mode == "most_recent" and doy_diff > 0) \
                or (mode == "cohort" and (doy_diff // 12 == (n_cohort - 1))):
            fh_doy = Dataset(os.path.join(search_path, nc_file), "r")

            doy_mask = ma.mask_or(
                ma.getmaskarray(fh_doy.variables[variable][:]), in_mask)

            if ma.any(~doy_mask):
                print(in_file, "--", nc_file, doy_diff)
                fh_out = Dataset(os.path.join(out_folder, nc_file), "w")

                for name, dim in fh_doy.dimensions.items():
                    fh_out.createDimension(name, len(dim))

                for v_name, varin in fh_doy.variables.items():
                    if v_name == 'lat' or v_name == 'lon':
                        outVar = fh_out.createVariable(v_name, varin.datatype,
                                                       varin.dimensions)
                        outVar.setncatts(
                            {k: varin.getncattr(k)
                             for k in varin.ncattrs()})
                        outVar[:] = varin[:]
                    else:
                        outVar = fh_out.createVariable(v_name, varin.datatype,
                                                       varin.dimensions)
                        outVar.setncatts(
                            {k: varin.getncattr(k)
                             for k in varin.ncattrs()})
                        outVar[:] = ma.array(varin[:], mask=doy_mask)

                fh_out.close()
                if mode == "most_recent":
                    fh_doy.close()
                    break
            fh_doy.close()
    fh_in.close()
Exemplo n.º 5
0
def rebin_1d(x_in, data, x_new, statistic='mean'):
    """Rebins 1D array.

    Args:
        x_in (ndarray): 1-D array with shape (n,).
        data (MaskedArray): 1-D input data with shape (m,).
        x_new (ndarray): 1-D target vector (center points) with shape (N,).
        statistic (str, optional): Statistic to be calculated. Possible
            statistics are 'mean', 'std'. Default is 'mean'.

    Returns:
        MaskedArray: Rebinned data with shape (N,).

    """
    edges = binvec(x_new)
    datai = np.zeros(len(x_new))
    data = ma.masked_invalid(data)  # data may contain nan-values
    mask = ~data.mask  # pylint: disable=E1101
    if ma.any(data[mask]):
        datai, _, _ = stats.binned_statistic(x_in[mask],
                                             data[mask],
                                             statistic=statistic,
                                             bins=edges)
    datai[~np.isfinite(datai)] = 0
    return ma.masked_equal(datai, 0)
Exemplo n.º 6
0
    def format_and_clean_data_main(self):
        """
        Main function to format and clean data based on choices by the user.
        """
        # Check if over missing_bound percent or missing_bound number of values are missing
        too_many_missing = self.has_too_many_missing(self.init_perc_remove)
        if ma.any(too_many_missing):
            idx, = ma.where(too_many_missing)
            self.xs[idx] = ma.mask_rows(self.xs[idx])

        # Check array to see if it is filled with values or empty
        if ma.all(self.check_for_all()):
            return self.xs

        # Clean outliers
        self.clean_outliers()

        # Take average of neighbor values to fill up to a given missing value gap length
        self.clean_gaps_w_linspace(fill_gap_length=self.max_gap_length)
        if ma.all(ma.count_masked(self.xs[:, :-self.keep_n_values], axis=1)[np.newaxis,:] == 0):
            return self.xs # if no masked values remain in values before recent ones

        # Remove values if they start the array and are then followed by too many masked values
        start_idx = self.find_new_starting_value()

        # If there are over x% blank values left in the original data after above changes,
        # check to see if x% of the blanks fall after the new start year
        too_many_missing = self.has_too_many_missing(self.second_perc_remove) # boolean array
        if ma.any(too_many_missing):
            n_masked = np.array([ma.count_masked(self.xs[i,s_idx:])
                                 for i, s_idx in enumerate(start_idx)]) / self.N > self.perc_remove_after_start_idx
            if ma.any(n_masked):
                idx, = ma.where(n_masked)
                self.xs[idx] = ma.mask_rows(self.xs[idx])

        # To fill in remaining values, run linear regression on non-zero values
        self.clean_gaps_w_lin_regress(start_idx)

        # If linear regression left negative or zero values, then use linear space to fill in middle gaps
        if ma.any(ma.masked_less_equal(self.xs, 0.)):
            self.clean_gaps_w_linspace()
Exemplo n.º 7
0
def rebin_2d(
    x_in: np.ndarray,
    array: ma.MaskedArray,
    x_new: np.ndarray,
    statistic: str = "mean",
    n_min: int = 1,
) -> Tuple[ma.MaskedArray, list]:
    """Rebins 2-D data in one dimension.

    Args:
        x_in: 1-D array with shape (n,).
        array: 2-D input data with shape (n, m).
        x_new: 1-D target vector (center points) with shape (N,).
        statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'.
            Default is 'mean'.
        n_min: Minimum number of points to have good statistics in a bin. Default is 1.

    Returns:
        tuple: Rebinned data with shape (N, m) and indices of bins without enough data.

    Notes:
        0-values are masked in the returned array.

    """
    edges = binvec(x_new)
    result = np.zeros((len(x_new), array.shape[1]))
    array_screened = ma.masked_invalid(
        array, copy=True)  # data may contain nan-values
    for ind, values in enumerate(array_screened.T):
        mask = ~values.mask
        if ma.any(values[mask]):
            result[:, ind], _, _ = stats.binned_statistic(x_in[mask],
                                                          values[mask],
                                                          statistic=statistic,
                                                          bins=edges)
    result[~np.isfinite(result)] = 0
    masked_result = ma.masked_equal(result, 0)

    # Fill bins with not enough profiles
    empty_indices = []
    for ind in range(len(edges) - 1):
        is_data = np.where((x_in > edges[ind]) & (x_in <= edges[ind + 1]))[0]
        if len(is_data) < n_min:
            masked_result[ind, :] = ma.masked
            empty_indices.append(ind)
    if len(empty_indices) > 0:
        logging.info(f"No radar data in {len(empty_indices)} bins")

    return masked_result, empty_indices
Exemplo n.º 8
0
def rebin_2d(x_in: np.ndarray,
             array: ma.MaskedArray,
             x_new: np.ndarray,
             statistic: Optional[str] = 'mean',
             n_min: Optional[int] = 1) -> ma.MaskedArray:
    """Rebins 2-D data in one dimension.

    Args:
        x_in: 1-D array with shape (n,).
        array: 2-D input data with shape (n, m).
        x_new: 1-D target vector (center points) with shape (N,).
        statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'.
            Default is 'mean'.
        n_min: Minimum number of points to have good statistics in a bin. Default is 1.

    Returns:
        Rebinned data with shape (N, m).

    Notes:
        0-values are masked in the returned array.

    """
    edges = binvec(x_new)
    result = np.zeros((len(x_new), array.shape[1]))
    array_screened = ma.masked_invalid(
        array, copy=True)  # data may contain nan-values
    for ind, values in enumerate(array_screened.T):
        mask = ~values.mask
        if ma.any(values[mask]):
            result[:, ind], _, bin_no = stats.binned_statistic(
                x_in[mask], values[mask], statistic=statistic, bins=edges)
            if n_min > 1:
                unique, counts = np.unique(bin_no, return_counts=True)
                result[unique[counts < n_min] - 1, ind] = 0

    result[~np.isfinite(result)] = 0
    return ma.masked_equal(result, 0)
Exemplo n.º 9
0
def rebin_2d(x_in, data, x_new, statistic='mean', n_min=1):
    """Rebins 2-D data in one dimension.

    Args:
        x_in (ndarray): 1-D array with shape (n,).
        data (MaskedArray): 2-D input data with shape (n, m).
        x_new (ndarray): 1-D target vector (center points)
            with shape (N,).
        statistic (str, optional): Statistic to be calculated. Possible
            statistics are 'mean', 'std'. Default is 'mean'.
        n_min (int): Minimum number of points to have good statistics in a bin.
            Default is 1.

    Returns:
        MaskedArray: Rebinned data with shape (N, m).

    Notes: 0-values are masked in the returned array.

    """
    edges = binvec(x_new)
    datai = np.zeros((len(x_new), data.shape[1]))
    data = ma.masked_invalid(data)  # data may contain nan-values
    for ind, values in enumerate(data.T):
        mask = ~values.mask
        if ma.any(values[mask]):
            datai[:,
                  ind], _, bin_no = stats.binned_statistic(x_in[mask],
                                                           values[mask],
                                                           statistic=statistic,
                                                           bins=edges)
            if n_min > 1:
                unique, counts = np.unique(bin_no, return_counts=True)
                datai[unique[counts < n_min] - 1, ind] = 0

    datai[~np.isfinite(datai)] = 0
    return ma.masked_equal(datai, 0)
    def __fit__(self, rating, row=True):
        if isinstance(rating, ma.MaskedArray):
            self._rating = rating
        else:
            self._rating = ma.masked_equal(rating, 0)

        self._mean = ma.mean(self._rating, axis=1, keepdims=True)
        self._mean_center_rating = self._rating - self._mean
        self._rating_filled = self._rating.filled(0)

        if row:
            self._sim = person(mean_center_rating=self._mean_center_rating)
        else:
            self._rating = self._rating.T
            self._mean_center_rating = self._mean_center_rating.T
            self._sim = person(mean_center_rating=self._mean_center_rating)

        self._sim[np.diag_indices(self._sim.shape[0])] = -999

        self._skip_columns = np.where(self._rating.count(axis=0) == 0)[0]

        # params
        self._neighborhood = np.argsort(self._sim, axis=1)[:,
                                                           -self.config.topk:]
        self._neighborhood_idx = ([
            int(i / self._neighborhood.shape[1])
            for i in range(self._neighborhood.size)
        ], self._neighborhood.flatten())

        if row:
            self._m, self._n = rating.shape
        else:
            self._n, self._m = rating.shape

        if "_weight" not in self.__dict__:
            self._weight = np.random.randn(self._m, self.config.topk)
        if "_m_bias" not in self.__dict__:
            self._m_bias = np.random.randn(self._m)
        if "_n_bias" not in self.__dict__:
            self._n_bias = np.random.randn(self._n)

        assert self._weight.shape[1] == self.config.topk
        assert self._m_bias.shape[0] == self._m
        assert self._n_bias.shape[0] == self._n

        start = time.perf_counter()
        step = self._epoch * self._n

        for epoch in range(self._epoch, self.config.epochs):
            self._epoch = epoch

            for j in range(self._n):

                if j in self._skip_columns:
                    continue

                # forward
                step += 1
                _hat_rating, mid_data = self.__forward__(j)
                _loss = self.__loss__(self._rating[:, j], _hat_rating)

                logger.debug(
                    "[{:4d} step in {:4d} epoch\ttime:{:.2f}s] {}'s loss:{:.2f}"
                    .format(step, self._epoch,
                            time.perf_counter() - start, j, _loss))

                # backward
                _g_m_bias, _g_ngb_m_bias, _g_ngb_n_bias, _g_weight = self.__backward__(
                    _hat_rating, self._rating[:, j], mid_data[0], mid_data[1])

                if not ma.any(_g_m_bias):
                    continue

                for i, g in zip(self._neighborhood.flat, _g_ngb_m_bias.flat):
                    if g is not ma.masked:
                        _g_m_bias[i] += g

                # check gradient
                if self.config.check_gradient:
                    logger.debug("check gradient")
                    self.__check_gradient__(j, _g_weight, _g_m_bias,
                                            _g_ngb_n_bias)

                logger.debug(
                    "[gradient] max(m_bias): {}\tmax(n_bias): {}\tmax(weight):{}"
                    .format(ma.max(ma.abs(_g_m_bias)), ma.abs(_g_ngb_n_bias),
                            ma.max(ma.abs(_g_weight))))

                # update gradient
                self._m_bias -= self.config.lr / self._m * _g_m_bias + self.config.wdecay * self._m_bias
                self._n_bias[
                    j] -= self.config.lr / self._m * _g_ngb_n_bias + self.config.wdecay * self._n_bias[
                        j]
                self._weight -= self.config.lr / self._m * _g_weight + self.config.wdecay * self._weight

            logger.debug(
                "[{:4d} epoch\ttime:{:.2f}s] epoch loss:{:.2f}".format(
                    epoch,
                    time.perf_counter() - start,
                    self.__loss__(self._rating, self.__predict__())))
            if epoch % self.config.save_per_epochs == 0:
                self.save()

        if self._epoch % self.config.save_per_epochs != 0:
            self.save()
Exemplo n.º 11
0
        targets = get_image_data(lista, 'magp3', 'merrp3', refcat)
        
    color_to_use = lsc.sites.chosecolor(targets['filter'], True)
    colors_to_calculate = set(sum(color_to_use.values(), []))

    # copy average zero points & color terms from the standards to the science images
    if args.exzp:
        with open(args.exzp) as f:
            lista2 = f.read().splitlines()
        standards = get_image_data(lista2)
        standards = standards.group_by(['dayobs', 'shortname', 'instrument', 'filter', 'zcol1', 'zcol2'])
        targets[['zcol1', 'z1', 'dz1', 'c1', 'dc1', 'zcol2', 'z2', 'dz2', 'c2', 'dc2']].mask = True
        for group in standards.groups:
            matches_in_targets = ((targets['dayobs'] == group['dayobs'][0]) & (targets['shortname'] == group['shortname'][0])
                                   & (targets['instrument'] == group['instrument'][0]) & (targets['filter'] == group['filter'][0]))
            if not np.any(matches_in_targets):
                continue
            targets['zcol1'][matches_in_targets] = group['zcol1'][0]
            targets['zcol2'][matches_in_targets] = group['zcol2'][0]
            targets['z1'][matches_in_targets], targets['dz1'][matches_in_targets] = average_in_flux(group['z1'], group['dz1'])
            targets['z2'][matches_in_targets], targets['dz2'][matches_in_targets] = average_in_flux(group['z2'], group['dz2'])
            if np.all(group['dc1']):
                dc1 = np.sum(group['dc1']**-2)**-0.5
                targets['c1'][matches_in_targets] = np.sum(group['c1'] * group['dc1']**-2) * dc1**2
                targets['dc1'][matches_in_targets] = dc1
            else:
                targets['c1'][matches_in_targets] = np.mean(group['c1'])
                targets['dc1'] = 0.
            if np.all(group['dc2']):
                dc2 = np.sum(group['dc2']**-2)**-0.5
                targets['c2'][matches_in_targets] = np.sum(group['c2'] * group['dc2']**-2) * dc2**2
Exemplo n.º 12
0
def avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fillValue,timer,depend,fyr):

    '''
    Computes the average of a variable that contains missing values

    @param var         The name of the variable that is being  averaged.

    @param years       A list of the years that are in this average

    @param hist_dict   A dictionary that holds file references for all years/months. 

    @param ave_info    A dictionary of the type of average that is to be done.
                       Includes:  type, months_to_average, fn, and weights
                       (weights are not used in this function/average)
    
    @param file_dict   A dictionary which holds file pointers to the input files that
                       are needed by this average calculation.

    @param ave_type    The average type key that indicated which type of average will be done.

    @param fillValue   The value that indicates missing values within the data.

    @param timer       The timer class used for time bookkeeping.

    @param depend      Boolean variable to indicate if this average will be computed from previously calculated files.

    @param fyr         The first year of average series

    @return var_Ave    The averaged results for this variable across the designated time frame.
    '''
    # if variable contains missing values, create a mask accumulator that will count how many masked values not to add & divide
    count = 0
    first = True
    fetch_time = 0

    first_mask = True
    for yr in years:
        for m in ave_info['months_to_average']:
            timer.start("Variable fetch time")
            # Check if doing a winter average and get the correct year to pull
            if ((ave_type == 'djf' and depend == False) or ave_type == 'next_jan'
                    or ave_type == 'next_feb' or ave_type == 'prev_dec'):
                pull_year = climFileIO.which_winter_year(hist_dict, m, yr,fyr)
            else:
                pull_year = yr
            var_val = rover.fetch_slice(hist_dict,pull_year,m,var,file_dict)
            timer.stop("Variable fetch time")
            var_filled = var_val.filled(fill_value=0) # zero out the masked grid points
            # Get and add mask values to the mask accumulator
            if (first_mask):
                if (MA.any(MA.getmask(var_val))):
                    mask_sum = (MA.getmask(var_val)).astype(int)
                    first_mask = False
            else:
                if (MA.any(MA.getmask(var_val))):
                    mask_sum = mask_sum + (MA.getmask(var_val)).astype(int)
            # Add the variable value accumulator using the filled, zeroed about values. 
            if (first):
                var_sum = var_filled
                first = False
            else:
                var_sum = var_filled + var_sum
            count+=1
    # Create an inverserse of the mask to divide by
    if (first_mask == True):
        inv = count
    else:
        inv = (count - mask_sum)
    # Divide by mask to get average
    np.seterr(divide='ignore', invalid='ignore')
    var_Ave = var_sum / inv
    # Replace any nan values with the fill value.  Nans will occur if there is a 
    # missing value for that array element in all slices that are averaged (ie. land in ocean files).
    if var_Ave.shape:
        var_Ave[np.isnan(var_Ave)]=fillValue
    else:
        print var,var_Ave

    return var_Ave
Exemplo n.º 13
0
     lista2 = f.read().splitlines()
 standards = get_image_data(lista2)
 standards = standards.group_by(
     ['dayobs', 'shortname', 'instrument', 'filter', 'zcol1', 'zcol2'])
 for icol in [
         'zcol1', 'z1', 'dz1', 'c1', 'dc1', 'zcol2', 'z2', 'dz2', 'c2',
         'dc2'
 ]:
     targets[icol].mask = True
 for group in standards.groups:
     matches_in_targets = (
         (targets['dayobs'] == group['dayobs'][0]) &
         (targets['shortname'] == group['shortname'][0])
         & (targets['instrument'] == group['instrument'][0]) &
         (targets['filter'] == group['filter'][0]))
     if not np.any(matches_in_targets):
         continue
     targets['zcol1'][matches_in_targets] = group['zcol1'][0]
     targets['zcol2'][matches_in_targets] = group['zcol2'][0]
     targets['z1'][matches_in_targets], targets['dz1'][
         matches_in_targets] = average_in_flux(group['z1'],
                                               group['dz1'])
     targets['z2'][matches_in_targets], targets['dz2'][
         matches_in_targets] = average_in_flux(group['z2'],
                                               group['dz2'])
     if np.all(group['dc1']):
         dc1 = np.sum(group['dc1']**-2)**-0.5
         targets['c1'][matches_in_targets] = np.sum(
             group['c1'] * group['dc1']**-2) * dc1**2
         targets['dc1'][matches_in_targets] = dc1
     else:
Exemplo n.º 14
0
 def combine_masks(self, *masks):
     mask = [ma.any(m) for m in zip(*masks)]
     return mask
Exemplo n.º 15
0
def weighted_avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fillValue,timer,depend,fyr):

    '''
    Computes the average of a variable that contains missing values

    @param var         The name of the variable that is being  averaged.

    @param years       A list of the years that are in this average

    @param hist_dict   A dictionary that holds file references for all years/months. 

    @param ave_info    A dictionary of the type of average that is to be done.
                       Includes:  type, months_to_average, fn, and weights
                       (weights are not used in this function/average)
    
    @param file_dict   A dictionary which holds file pointers to the input files that
                       are needed by this average calculation.

    @param ave_type    The average type key that indicated which type of average will be done.

    @param fillValue   The value that indicates missing values within the data.

    @param timer       The timer class used for time bookkeeping.

    @param depend      Boolean variable to indicate if this average will be computed from previously calculated files.

    @param fyr         The first year of average series

    @return var_Ave    The averaged results for this variable across the designated time frame.
    '''
    # if variable contains missing values, create a mask accumulator that will count how many masked values not to add & divide
    count = 0
    first = True
    fetch_time = 0

    first_mask = True
    d_in_m = [31,28,31,30,31,30,31,31,30,31,30,31]
    for yr in years:
        i = 0
        for m in ave_info['months_to_average']:
            timer.start("Variable fetch time")
            # Check if doing a winter average and get the correct year to pull
            if ((ave_type == 'djf' and depend == False) or ave_type == 'next_jan'
                    or ave_type == 'next_feb' or ave_type == 'prev_dec'):
                pull_year = climFileIO.which_winter_year(hist_dict, m, yr,fyr)
            else:
                pull_year = yr
            var_val = rover.fetch_slice(hist_dict,pull_year,m,var,file_dict)
            timer.stop("Variable fetch time")
            
            if (hasattr(var_val, 'filled')):
                var_filled = var_val.filled(fill_value=0) # zero out the masked grid points
            else:
                var_filled = np.ones(var_val.shape)
            # Get and add mask values to the mask accumulator
            if (first_mask):
                if (MA.any(MA.getmask(var_val))):
                    mask_sum = (MA.getmask(var_val)).astype(int)
                    first_mask = False
            else:
                if (MA.any(MA.getmask(var_val))):
                    mask_sum = mask_sum + (MA.getmask(var_val)).astype(int)
            if (first):
                if (ave_type == 'ya'):
                    var_sum = (var_val*d_in_m[m])
                else:
                    var_sum = (var_val*ave_info['weights'][i])
                first = False
            else:
                if (ave_type == 'ya'):
                     var_sum = (var_val*d_in_m[m]) + var_sum
                else:
                    var_sum = (var_val*ave_info['weights'][i]) + var_sum
            i+=1
        count+=1
    # Since the weights are only for 1 year, divide by total number of years
    if (ave_type == 'ya'):
        var_Ave = var_sum * (1/365.)
    else:
        var_Ave = np.divide(var_sum,count)
    # If any values are 0, then replace the var_Ave value with the fill value
    if (first_mask != True):
        var_Ave[mask_sum>0]=fillValue 

    return var_Ave
Exemplo n.º 16
0
def avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fillValue,timer,depend,fyr):

    '''
    Computes the average of a variable that contains missing values

    @param var         The name of the variable that is being  averaged.

    @param years       A list of the years that are in this average

    @param hist_dict   A dictionary that holds file references for all years/months. 

    @param ave_info    A dictionary of the type of average that is to be done.
                       Includes:  type, months_to_average, fn, and weights
                       (weights are not used in this function/average)
    
    @param file_dict   A dictionary which holds file pointers to the input files that
                       are needed by this average calculation.

    @param ave_type    The average type key that indicated which type of average will be done.

    @param fillValue   The value that indicates missing values within the data.

    @param timer       The timer class used for time bookkeeping.

    @param depend      Boolean variable to indicate if this average will be computed from previously calculated files.

    @param fyr         The first year of average series

    @return var_Ave    The averaged results for this variable across the designated time frame.
    '''
    # if variable contains missing values, create a mask accumulator that will count how many masked values not to add & divide
    count = 0
    first = True
    fetch_time = 0

    first_mask = True
    for yr in years:
        for m in ave_info['months_to_average']:
            timer.start("Variable fetch time")
            # Check if doing a winter average and get the correct year to pull
            if ((ave_type == 'djf' and depend == False) or ave_type == 'next_jan'
                    or ave_type == 'next_feb' or ave_type == 'prev_dec'):
                pull_year = climFileIO.which_winter_year(hist_dict, m, yr,fyr)
            else:
                pull_year = yr
            var_val = rover.fetch_slice(hist_dict,pull_year,m,var,file_dict)
            timer.stop("Variable fetch time")
            if (hasattr(var_val, 'filled')):
                var_filled = var_val.filled(fill_value=0) # zero out the masked grid points
            else:
                var_filled = np.ones(var_val.shape)
            # Get and add mask values to the mask accumulator
            if (first_mask):
                if (MA.any(MA.getmask(var_val))):
                    mask_sum = (MA.getmask(var_val)).astype(int)
                    first_mask = False
            else:
                if (MA.any(MA.getmask(var_val))):
                    mask_sum = mask_sum + (MA.getmask(var_val)).astype(int)
            # Add the variable value accumulator using the filled, zeroed about values. 
            if (first):
                var_sum = var_filled
                first = False
            else:
                var_sum = var_filled + var_sum
            count+=1
    # Create an inverserse of the mask to divide by
    if (first_mask == True):
        inv = count
    else:
        inv = (count - mask_sum)
    # Divide by mask to get average
    np.seterr(divide='ignore', invalid='ignore')
    var_Ave = var_sum / inv
    # Replace any nan values with the fill value.  Nans will occur if there is a 
    # missing value for that array element in all slices that are averaged (ie. land in ocean files).
    if var_Ave.shape:
        var_Ave[np.isnan(var_Ave)]=fillValue
    else:
        print var,var_Ave

    return var_Ave
Exemplo n.º 17
0
def weighted_avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fillValue,timer,depend,fyr):

    '''
    Computes the average of a variable that contains missing values

    @param var         The name of the variable that is being  averaged.

    @param years       A list of the years that are in this average

    @param hist_dict   A dictionary that holds file references for all years/months. 

    @param ave_info    A dictionary of the type of average that is to be done.
                       Includes:  type, months_to_average, fn, and weights
                       (weights are not used in this function/average)
    
    @param file_dict   A dictionary which holds file pointers to the input files that
                       are needed by this average calculation.

    @param ave_type    The average type key that indicated which type of average will be done.

    @param fillValue   The value that indicates missing values within the data.

    @param timer       The timer class used for time bookkeeping.

    @param depend      Boolean variable to indicate if this average will be computed from previously calculated files.

    @param fyr         The first year of average series

    @return var_Ave    The averaged results for this variable across the designated time frame.
    '''
    # if variable contains missing values, create a mask accumulator that will count how many masked values not to add & divide
    count = 0
    first = True
    fetch_time = 0

    first_mask = True
    d_in_m = [31,28,31,30,31,30,31,31,30,31,30,31]
    for yr in years:
        i = 0
        for m in ave_info['months_to_average']:
            timer.start("Variable fetch time")
            # Check if doing a winter average and get the correct year to pull
            if ((ave_type == 'djf' and depend == False) or ave_type == 'next_jan'
                    or ave_type == 'next_feb' or ave_type == 'prev_dec'):
                pull_year = climFileIO.which_winter_year(hist_dict, m, yr,fyr)
            else:
                pull_year = yr
            var_val = rover.fetch_slice(hist_dict,pull_year,m,var,file_dict)
            timer.stop("Variable fetch time") 
            var_filled = var_val.filled(fill_value=0) # zero out the masked grid points
            # Get and add mask values to the mask accumulator
            if (first_mask):
                if (MA.any(MA.getmask(var_val))):
                    mask_sum = (MA.getmask(var_val)).astype(int)
                    first_mask = False
            else:
                if (MA.any(MA.getmask(var_val))):
                    mask_sum = mask_sum + (MA.getmask(var_val)).astype(int)
            if (first):
                if (ave_type == 'ya'):
                    var_sum = (var_val*d_in_m[m])
                else:
                    var_sum = (var_val*ave_info['weights'][i])
                first = False
            else:
                if (ave_type == 'ya'):
                     var_sum = (var_val*d_in_m[m]) + var_sum
                else:
                    var_sum = (var_val*ave_info['weights'][i]) + var_sum
            i+=1
        count+=1
    # Since the weights are only for 1 year, divide by total number of years
    if (ave_type == 'ya'):
        var_Ave = var_sum * (1/365.)
    else:
        var_Ave = np.divide(var_sum,count)
    # If any values are 0, then replace the var_Ave value with the fill value
    if (first_mask != True):
        var_Ave[mask_sum>0]=fillValue 

    return var_Ave