def _find_profiles_with_undetected_melting(bits: list) -> np.ndarray: drizzle_and_falling = _find_drizzle_and_falling(*bits[:3]) transition = ma.diff(drizzle_and_falling, axis=1) is_transition = ma.any(transition, axis=1) is_melting_layer = ma.any(bits[3], axis=1) is_undetected_melting = is_transition & ~is_melting_layer is_undetected_melting[is_undetected_melting == 0] = ma.masked return is_undetected_melting.astype(int)
def rebin_1d( x_in: np.ndarray, array: Union[np.ndarray, ma.MaskedArray], x_new: np.ndarray, statistic: str = "mean", ) -> ma.MaskedArray: """Rebins 1D array. Args: x_in: 1-D array with shape (n,). array: 1-D input data with shape (m,). x_new: 1-D target vector (center points) with shape (N,). statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'. Default is 'mean'. Returns: Rebinned data with shape (N,). """ edges = binvec(x_new) result = np.zeros(len(x_new)) array_screened = ma.masked_invalid( array, copy=True) # data may contain nan-values mask = ~array_screened.mask # pylint: disable=E1101 if ma.any(array_screened[mask]): result, _, _ = stats.binned_statistic(x_in[mask], array_screened[mask], statistic=statistic, bins=edges) result[~np.isfinite(result)] = 0 return ma.masked_equal(result, 0)
def pad_masked(m, *args, **kwargs): """ Pad a masked array :param m: the array to pad. Cannot contain unmasked NaNs, since NaNs are used as placeholders for the mask. :type m: :class:`numpy.ma.masked_array` Remaining arguments are the same as :func:`numpy.pad`. But internally it replaces masked values with NaNs so several of the pad methods (maximum, minimum, mean, etc.) will always end up padding with NaNs. :return: the padded masked array. :rtype: :class:`numpy.ma.masked_array` """ if ma.any(np.isnan(m)): raise ValueError( 'm contains unmasked NaNs, it will not work with pad_masked') elif not np.issubdtype(m.dtype, np.float): raise NotImplementedError('Not set up to handle non-float arrays') # I want to use NaN because I'm afraid of how fill values will interact with some of the methods of extending # the array a = m.filled(np.nan) a = np.pad(a, *args, **kwargs) mprime = ma.masked_where(np.isnan(a), a) mprime.fill_value = m.fill_value return mprime
def generate_temporal_neighboring_regions(in_file, search_path, out_folder, variable, mode, n_hist=None, n_cohort=None): fh_in = Dataset(in_file, "r") in_doy = datetime.strptime(in_file.split("/")[-1][:-3], "%Y%m%d").date() in_mask = ma.getmaskarray(fh_in.variables[variable][:]) out_folder = get_out_path(out_folder) temp_candidates = [ nc_file for nc_file in os.listdir(search_path) if nc_file.endswith(".nc") ] temp_candidates = sorted( temp_candidates, key=lambda x: datetime.strptime(x[:-3], '%Y%m%d'))[::-1] for nc_file in temp_candidates: nc_doy = datetime.strptime(nc_file[:-3], "%Y%m%d").date() doy_diff = (in_doy - nc_doy).days if (mode == "window" and (0 <= doy_diff <= n_hist)) \ or (mode == "most_recent" and doy_diff > 0) \ or (mode == "cohort" and (doy_diff // 12 == (n_cohort - 1))): fh_doy = Dataset(os.path.join(search_path, nc_file), "r") doy_mask = ma.mask_or( ma.getmaskarray(fh_doy.variables[variable][:]), in_mask) if ma.any(~doy_mask): print(in_file, "--", nc_file, doy_diff) fh_out = Dataset(os.path.join(out_folder, nc_file), "w") for name, dim in fh_doy.dimensions.items(): fh_out.createDimension(name, len(dim)) for v_name, varin in fh_doy.variables.items(): if v_name == 'lat' or v_name == 'lon': outVar = fh_out.createVariable(v_name, varin.datatype, varin.dimensions) outVar.setncatts( {k: varin.getncattr(k) for k in varin.ncattrs()}) outVar[:] = varin[:] else: outVar = fh_out.createVariable(v_name, varin.datatype, varin.dimensions) outVar.setncatts( {k: varin.getncattr(k) for k in varin.ncattrs()}) outVar[:] = ma.array(varin[:], mask=doy_mask) fh_out.close() if mode == "most_recent": fh_doy.close() break fh_doy.close() fh_in.close()
def rebin_1d(x_in, data, x_new, statistic='mean'): """Rebins 1D array. Args: x_in (ndarray): 1-D array with shape (n,). data (MaskedArray): 1-D input data with shape (m,). x_new (ndarray): 1-D target vector (center points) with shape (N,). statistic (str, optional): Statistic to be calculated. Possible statistics are 'mean', 'std'. Default is 'mean'. Returns: MaskedArray: Rebinned data with shape (N,). """ edges = binvec(x_new) datai = np.zeros(len(x_new)) data = ma.masked_invalid(data) # data may contain nan-values mask = ~data.mask # pylint: disable=E1101 if ma.any(data[mask]): datai, _, _ = stats.binned_statistic(x_in[mask], data[mask], statistic=statistic, bins=edges) datai[~np.isfinite(datai)] = 0 return ma.masked_equal(datai, 0)
def format_and_clean_data_main(self): """ Main function to format and clean data based on choices by the user. """ # Check if over missing_bound percent or missing_bound number of values are missing too_many_missing = self.has_too_many_missing(self.init_perc_remove) if ma.any(too_many_missing): idx, = ma.where(too_many_missing) self.xs[idx] = ma.mask_rows(self.xs[idx]) # Check array to see if it is filled with values or empty if ma.all(self.check_for_all()): return self.xs # Clean outliers self.clean_outliers() # Take average of neighbor values to fill up to a given missing value gap length self.clean_gaps_w_linspace(fill_gap_length=self.max_gap_length) if ma.all(ma.count_masked(self.xs[:, :-self.keep_n_values], axis=1)[np.newaxis,:] == 0): return self.xs # if no masked values remain in values before recent ones # Remove values if they start the array and are then followed by too many masked values start_idx = self.find_new_starting_value() # If there are over x% blank values left in the original data after above changes, # check to see if x% of the blanks fall after the new start year too_many_missing = self.has_too_many_missing(self.second_perc_remove) # boolean array if ma.any(too_many_missing): n_masked = np.array([ma.count_masked(self.xs[i,s_idx:]) for i, s_idx in enumerate(start_idx)]) / self.N > self.perc_remove_after_start_idx if ma.any(n_masked): idx, = ma.where(n_masked) self.xs[idx] = ma.mask_rows(self.xs[idx]) # To fill in remaining values, run linear regression on non-zero values self.clean_gaps_w_lin_regress(start_idx) # If linear regression left negative or zero values, then use linear space to fill in middle gaps if ma.any(ma.masked_less_equal(self.xs, 0.)): self.clean_gaps_w_linspace()
def rebin_2d( x_in: np.ndarray, array: ma.MaskedArray, x_new: np.ndarray, statistic: str = "mean", n_min: int = 1, ) -> Tuple[ma.MaskedArray, list]: """Rebins 2-D data in one dimension. Args: x_in: 1-D array with shape (n,). array: 2-D input data with shape (n, m). x_new: 1-D target vector (center points) with shape (N,). statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'. Default is 'mean'. n_min: Minimum number of points to have good statistics in a bin. Default is 1. Returns: tuple: Rebinned data with shape (N, m) and indices of bins without enough data. Notes: 0-values are masked in the returned array. """ edges = binvec(x_new) result = np.zeros((len(x_new), array.shape[1])) array_screened = ma.masked_invalid( array, copy=True) # data may contain nan-values for ind, values in enumerate(array_screened.T): mask = ~values.mask if ma.any(values[mask]): result[:, ind], _, _ = stats.binned_statistic(x_in[mask], values[mask], statistic=statistic, bins=edges) result[~np.isfinite(result)] = 0 masked_result = ma.masked_equal(result, 0) # Fill bins with not enough profiles empty_indices = [] for ind in range(len(edges) - 1): is_data = np.where((x_in > edges[ind]) & (x_in <= edges[ind + 1]))[0] if len(is_data) < n_min: masked_result[ind, :] = ma.masked empty_indices.append(ind) if len(empty_indices) > 0: logging.info(f"No radar data in {len(empty_indices)} bins") return masked_result, empty_indices
def rebin_2d(x_in: np.ndarray, array: ma.MaskedArray, x_new: np.ndarray, statistic: Optional[str] = 'mean', n_min: Optional[int] = 1) -> ma.MaskedArray: """Rebins 2-D data in one dimension. Args: x_in: 1-D array with shape (n,). array: 2-D input data with shape (n, m). x_new: 1-D target vector (center points) with shape (N,). statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'. Default is 'mean'. n_min: Minimum number of points to have good statistics in a bin. Default is 1. Returns: Rebinned data with shape (N, m). Notes: 0-values are masked in the returned array. """ edges = binvec(x_new) result = np.zeros((len(x_new), array.shape[1])) array_screened = ma.masked_invalid( array, copy=True) # data may contain nan-values for ind, values in enumerate(array_screened.T): mask = ~values.mask if ma.any(values[mask]): result[:, ind], _, bin_no = stats.binned_statistic( x_in[mask], values[mask], statistic=statistic, bins=edges) if n_min > 1: unique, counts = np.unique(bin_no, return_counts=True) result[unique[counts < n_min] - 1, ind] = 0 result[~np.isfinite(result)] = 0 return ma.masked_equal(result, 0)
def rebin_2d(x_in, data, x_new, statistic='mean', n_min=1): """Rebins 2-D data in one dimension. Args: x_in (ndarray): 1-D array with shape (n,). data (MaskedArray): 2-D input data with shape (n, m). x_new (ndarray): 1-D target vector (center points) with shape (N,). statistic (str, optional): Statistic to be calculated. Possible statistics are 'mean', 'std'. Default is 'mean'. n_min (int): Minimum number of points to have good statistics in a bin. Default is 1. Returns: MaskedArray: Rebinned data with shape (N, m). Notes: 0-values are masked in the returned array. """ edges = binvec(x_new) datai = np.zeros((len(x_new), data.shape[1])) data = ma.masked_invalid(data) # data may contain nan-values for ind, values in enumerate(data.T): mask = ~values.mask if ma.any(values[mask]): datai[:, ind], _, bin_no = stats.binned_statistic(x_in[mask], values[mask], statistic=statistic, bins=edges) if n_min > 1: unique, counts = np.unique(bin_no, return_counts=True) datai[unique[counts < n_min] - 1, ind] = 0 datai[~np.isfinite(datai)] = 0 return ma.masked_equal(datai, 0)
def __fit__(self, rating, row=True): if isinstance(rating, ma.MaskedArray): self._rating = rating else: self._rating = ma.masked_equal(rating, 0) self._mean = ma.mean(self._rating, axis=1, keepdims=True) self._mean_center_rating = self._rating - self._mean self._rating_filled = self._rating.filled(0) if row: self._sim = person(mean_center_rating=self._mean_center_rating) else: self._rating = self._rating.T self._mean_center_rating = self._mean_center_rating.T self._sim = person(mean_center_rating=self._mean_center_rating) self._sim[np.diag_indices(self._sim.shape[0])] = -999 self._skip_columns = np.where(self._rating.count(axis=0) == 0)[0] # params self._neighborhood = np.argsort(self._sim, axis=1)[:, -self.config.topk:] self._neighborhood_idx = ([ int(i / self._neighborhood.shape[1]) for i in range(self._neighborhood.size) ], self._neighborhood.flatten()) if row: self._m, self._n = rating.shape else: self._n, self._m = rating.shape if "_weight" not in self.__dict__: self._weight = np.random.randn(self._m, self.config.topk) if "_m_bias" not in self.__dict__: self._m_bias = np.random.randn(self._m) if "_n_bias" not in self.__dict__: self._n_bias = np.random.randn(self._n) assert self._weight.shape[1] == self.config.topk assert self._m_bias.shape[0] == self._m assert self._n_bias.shape[0] == self._n start = time.perf_counter() step = self._epoch * self._n for epoch in range(self._epoch, self.config.epochs): self._epoch = epoch for j in range(self._n): if j in self._skip_columns: continue # forward step += 1 _hat_rating, mid_data = self.__forward__(j) _loss = self.__loss__(self._rating[:, j], _hat_rating) logger.debug( "[{:4d} step in {:4d} epoch\ttime:{:.2f}s] {}'s loss:{:.2f}" .format(step, self._epoch, time.perf_counter() - start, j, _loss)) # backward _g_m_bias, _g_ngb_m_bias, _g_ngb_n_bias, _g_weight = self.__backward__( _hat_rating, self._rating[:, j], mid_data[0], mid_data[1]) if not ma.any(_g_m_bias): continue for i, g in zip(self._neighborhood.flat, _g_ngb_m_bias.flat): if g is not ma.masked: _g_m_bias[i] += g # check gradient if self.config.check_gradient: logger.debug("check gradient") self.__check_gradient__(j, _g_weight, _g_m_bias, _g_ngb_n_bias) logger.debug( "[gradient] max(m_bias): {}\tmax(n_bias): {}\tmax(weight):{}" .format(ma.max(ma.abs(_g_m_bias)), ma.abs(_g_ngb_n_bias), ma.max(ma.abs(_g_weight)))) # update gradient self._m_bias -= self.config.lr / self._m * _g_m_bias + self.config.wdecay * self._m_bias self._n_bias[ j] -= self.config.lr / self._m * _g_ngb_n_bias + self.config.wdecay * self._n_bias[ j] self._weight -= self.config.lr / self._m * _g_weight + self.config.wdecay * self._weight logger.debug( "[{:4d} epoch\ttime:{:.2f}s] epoch loss:{:.2f}".format( epoch, time.perf_counter() - start, self.__loss__(self._rating, self.__predict__()))) if epoch % self.config.save_per_epochs == 0: self.save() if self._epoch % self.config.save_per_epochs != 0: self.save()
targets = get_image_data(lista, 'magp3', 'merrp3', refcat) color_to_use = lsc.sites.chosecolor(targets['filter'], True) colors_to_calculate = set(sum(color_to_use.values(), [])) # copy average zero points & color terms from the standards to the science images if args.exzp: with open(args.exzp) as f: lista2 = f.read().splitlines() standards = get_image_data(lista2) standards = standards.group_by(['dayobs', 'shortname', 'instrument', 'filter', 'zcol1', 'zcol2']) targets[['zcol1', 'z1', 'dz1', 'c1', 'dc1', 'zcol2', 'z2', 'dz2', 'c2', 'dc2']].mask = True for group in standards.groups: matches_in_targets = ((targets['dayobs'] == group['dayobs'][0]) & (targets['shortname'] == group['shortname'][0]) & (targets['instrument'] == group['instrument'][0]) & (targets['filter'] == group['filter'][0])) if not np.any(matches_in_targets): continue targets['zcol1'][matches_in_targets] = group['zcol1'][0] targets['zcol2'][matches_in_targets] = group['zcol2'][0] targets['z1'][matches_in_targets], targets['dz1'][matches_in_targets] = average_in_flux(group['z1'], group['dz1']) targets['z2'][matches_in_targets], targets['dz2'][matches_in_targets] = average_in_flux(group['z2'], group['dz2']) if np.all(group['dc1']): dc1 = np.sum(group['dc1']**-2)**-0.5 targets['c1'][matches_in_targets] = np.sum(group['c1'] * group['dc1']**-2) * dc1**2 targets['dc1'][matches_in_targets] = dc1 else: targets['c1'][matches_in_targets] = np.mean(group['c1']) targets['dc1'] = 0. if np.all(group['dc2']): dc2 = np.sum(group['dc2']**-2)**-0.5 targets['c2'][matches_in_targets] = np.sum(group['c2'] * group['dc2']**-2) * dc2**2
def avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fillValue,timer,depend,fyr): ''' Computes the average of a variable that contains missing values @param var The name of the variable that is being averaged. @param years A list of the years that are in this average @param hist_dict A dictionary that holds file references for all years/months. @param ave_info A dictionary of the type of average that is to be done. Includes: type, months_to_average, fn, and weights (weights are not used in this function/average) @param file_dict A dictionary which holds file pointers to the input files that are needed by this average calculation. @param ave_type The average type key that indicated which type of average will be done. @param fillValue The value that indicates missing values within the data. @param timer The timer class used for time bookkeeping. @param depend Boolean variable to indicate if this average will be computed from previously calculated files. @param fyr The first year of average series @return var_Ave The averaged results for this variable across the designated time frame. ''' # if variable contains missing values, create a mask accumulator that will count how many masked values not to add & divide count = 0 first = True fetch_time = 0 first_mask = True for yr in years: for m in ave_info['months_to_average']: timer.start("Variable fetch time") # Check if doing a winter average and get the correct year to pull if ((ave_type == 'djf' and depend == False) or ave_type == 'next_jan' or ave_type == 'next_feb' or ave_type == 'prev_dec'): pull_year = climFileIO.which_winter_year(hist_dict, m, yr,fyr) else: pull_year = yr var_val = rover.fetch_slice(hist_dict,pull_year,m,var,file_dict) timer.stop("Variable fetch time") var_filled = var_val.filled(fill_value=0) # zero out the masked grid points # Get and add mask values to the mask accumulator if (first_mask): if (MA.any(MA.getmask(var_val))): mask_sum = (MA.getmask(var_val)).astype(int) first_mask = False else: if (MA.any(MA.getmask(var_val))): mask_sum = mask_sum + (MA.getmask(var_val)).astype(int) # Add the variable value accumulator using the filled, zeroed about values. if (first): var_sum = var_filled first = False else: var_sum = var_filled + var_sum count+=1 # Create an inverserse of the mask to divide by if (first_mask == True): inv = count else: inv = (count - mask_sum) # Divide by mask to get average np.seterr(divide='ignore', invalid='ignore') var_Ave = var_sum / inv # Replace any nan values with the fill value. Nans will occur if there is a # missing value for that array element in all slices that are averaged (ie. land in ocean files). if var_Ave.shape: var_Ave[np.isnan(var_Ave)]=fillValue else: print var,var_Ave return var_Ave
lista2 = f.read().splitlines() standards = get_image_data(lista2) standards = standards.group_by( ['dayobs', 'shortname', 'instrument', 'filter', 'zcol1', 'zcol2']) for icol in [ 'zcol1', 'z1', 'dz1', 'c1', 'dc1', 'zcol2', 'z2', 'dz2', 'c2', 'dc2' ]: targets[icol].mask = True for group in standards.groups: matches_in_targets = ( (targets['dayobs'] == group['dayobs'][0]) & (targets['shortname'] == group['shortname'][0]) & (targets['instrument'] == group['instrument'][0]) & (targets['filter'] == group['filter'][0])) if not np.any(matches_in_targets): continue targets['zcol1'][matches_in_targets] = group['zcol1'][0] targets['zcol2'][matches_in_targets] = group['zcol2'][0] targets['z1'][matches_in_targets], targets['dz1'][ matches_in_targets] = average_in_flux(group['z1'], group['dz1']) targets['z2'][matches_in_targets], targets['dz2'][ matches_in_targets] = average_in_flux(group['z2'], group['dz2']) if np.all(group['dc1']): dc1 = np.sum(group['dc1']**-2)**-0.5 targets['c1'][matches_in_targets] = np.sum( group['c1'] * group['dc1']**-2) * dc1**2 targets['dc1'][matches_in_targets] = dc1 else:
def combine_masks(self, *masks): mask = [ma.any(m) for m in zip(*masks)] return mask
def weighted_avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fillValue,timer,depend,fyr): ''' Computes the average of a variable that contains missing values @param var The name of the variable that is being averaged. @param years A list of the years that are in this average @param hist_dict A dictionary that holds file references for all years/months. @param ave_info A dictionary of the type of average that is to be done. Includes: type, months_to_average, fn, and weights (weights are not used in this function/average) @param file_dict A dictionary which holds file pointers to the input files that are needed by this average calculation. @param ave_type The average type key that indicated which type of average will be done. @param fillValue The value that indicates missing values within the data. @param timer The timer class used for time bookkeeping. @param depend Boolean variable to indicate if this average will be computed from previously calculated files. @param fyr The first year of average series @return var_Ave The averaged results for this variable across the designated time frame. ''' # if variable contains missing values, create a mask accumulator that will count how many masked values not to add & divide count = 0 first = True fetch_time = 0 first_mask = True d_in_m = [31,28,31,30,31,30,31,31,30,31,30,31] for yr in years: i = 0 for m in ave_info['months_to_average']: timer.start("Variable fetch time") # Check if doing a winter average and get the correct year to pull if ((ave_type == 'djf' and depend == False) or ave_type == 'next_jan' or ave_type == 'next_feb' or ave_type == 'prev_dec'): pull_year = climFileIO.which_winter_year(hist_dict, m, yr,fyr) else: pull_year = yr var_val = rover.fetch_slice(hist_dict,pull_year,m,var,file_dict) timer.stop("Variable fetch time") if (hasattr(var_val, 'filled')): var_filled = var_val.filled(fill_value=0) # zero out the masked grid points else: var_filled = np.ones(var_val.shape) # Get and add mask values to the mask accumulator if (first_mask): if (MA.any(MA.getmask(var_val))): mask_sum = (MA.getmask(var_val)).astype(int) first_mask = False else: if (MA.any(MA.getmask(var_val))): mask_sum = mask_sum + (MA.getmask(var_val)).astype(int) if (first): if (ave_type == 'ya'): var_sum = (var_val*d_in_m[m]) else: var_sum = (var_val*ave_info['weights'][i]) first = False else: if (ave_type == 'ya'): var_sum = (var_val*d_in_m[m]) + var_sum else: var_sum = (var_val*ave_info['weights'][i]) + var_sum i+=1 count+=1 # Since the weights are only for 1 year, divide by total number of years if (ave_type == 'ya'): var_Ave = var_sum * (1/365.) else: var_Ave = np.divide(var_sum,count) # If any values are 0, then replace the var_Ave value with the fill value if (first_mask != True): var_Ave[mask_sum>0]=fillValue return var_Ave
def avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fillValue,timer,depend,fyr): ''' Computes the average of a variable that contains missing values @param var The name of the variable that is being averaged. @param years A list of the years that are in this average @param hist_dict A dictionary that holds file references for all years/months. @param ave_info A dictionary of the type of average that is to be done. Includes: type, months_to_average, fn, and weights (weights are not used in this function/average) @param file_dict A dictionary which holds file pointers to the input files that are needed by this average calculation. @param ave_type The average type key that indicated which type of average will be done. @param fillValue The value that indicates missing values within the data. @param timer The timer class used for time bookkeeping. @param depend Boolean variable to indicate if this average will be computed from previously calculated files. @param fyr The first year of average series @return var_Ave The averaged results for this variable across the designated time frame. ''' # if variable contains missing values, create a mask accumulator that will count how many masked values not to add & divide count = 0 first = True fetch_time = 0 first_mask = True for yr in years: for m in ave_info['months_to_average']: timer.start("Variable fetch time") # Check if doing a winter average and get the correct year to pull if ((ave_type == 'djf' and depend == False) or ave_type == 'next_jan' or ave_type == 'next_feb' or ave_type == 'prev_dec'): pull_year = climFileIO.which_winter_year(hist_dict, m, yr,fyr) else: pull_year = yr var_val = rover.fetch_slice(hist_dict,pull_year,m,var,file_dict) timer.stop("Variable fetch time") if (hasattr(var_val, 'filled')): var_filled = var_val.filled(fill_value=0) # zero out the masked grid points else: var_filled = np.ones(var_val.shape) # Get and add mask values to the mask accumulator if (first_mask): if (MA.any(MA.getmask(var_val))): mask_sum = (MA.getmask(var_val)).astype(int) first_mask = False else: if (MA.any(MA.getmask(var_val))): mask_sum = mask_sum + (MA.getmask(var_val)).astype(int) # Add the variable value accumulator using the filled, zeroed about values. if (first): var_sum = var_filled first = False else: var_sum = var_filled + var_sum count+=1 # Create an inverserse of the mask to divide by if (first_mask == True): inv = count else: inv = (count - mask_sum) # Divide by mask to get average np.seterr(divide='ignore', invalid='ignore') var_Ave = var_sum / inv # Replace any nan values with the fill value. Nans will occur if there is a # missing value for that array element in all slices that are averaged (ie. land in ocean files). if var_Ave.shape: var_Ave[np.isnan(var_Ave)]=fillValue else: print var,var_Ave return var_Ave
def weighted_avg_var_missing(var,years,hist_dict,ave_info,file_dict,ave_type,fillValue,timer,depend,fyr): ''' Computes the average of a variable that contains missing values @param var The name of the variable that is being averaged. @param years A list of the years that are in this average @param hist_dict A dictionary that holds file references for all years/months. @param ave_info A dictionary of the type of average that is to be done. Includes: type, months_to_average, fn, and weights (weights are not used in this function/average) @param file_dict A dictionary which holds file pointers to the input files that are needed by this average calculation. @param ave_type The average type key that indicated which type of average will be done. @param fillValue The value that indicates missing values within the data. @param timer The timer class used for time bookkeeping. @param depend Boolean variable to indicate if this average will be computed from previously calculated files. @param fyr The first year of average series @return var_Ave The averaged results for this variable across the designated time frame. ''' # if variable contains missing values, create a mask accumulator that will count how many masked values not to add & divide count = 0 first = True fetch_time = 0 first_mask = True d_in_m = [31,28,31,30,31,30,31,31,30,31,30,31] for yr in years: i = 0 for m in ave_info['months_to_average']: timer.start("Variable fetch time") # Check if doing a winter average and get the correct year to pull if ((ave_type == 'djf' and depend == False) or ave_type == 'next_jan' or ave_type == 'next_feb' or ave_type == 'prev_dec'): pull_year = climFileIO.which_winter_year(hist_dict, m, yr,fyr) else: pull_year = yr var_val = rover.fetch_slice(hist_dict,pull_year,m,var,file_dict) timer.stop("Variable fetch time") var_filled = var_val.filled(fill_value=0) # zero out the masked grid points # Get and add mask values to the mask accumulator if (first_mask): if (MA.any(MA.getmask(var_val))): mask_sum = (MA.getmask(var_val)).astype(int) first_mask = False else: if (MA.any(MA.getmask(var_val))): mask_sum = mask_sum + (MA.getmask(var_val)).astype(int) if (first): if (ave_type == 'ya'): var_sum = (var_val*d_in_m[m]) else: var_sum = (var_val*ave_info['weights'][i]) first = False else: if (ave_type == 'ya'): var_sum = (var_val*d_in_m[m]) + var_sum else: var_sum = (var_val*ave_info['weights'][i]) + var_sum i+=1 count+=1 # Since the weights are only for 1 year, divide by total number of years if (ave_type == 'ya'): var_Ave = var_sum * (1/365.) else: var_Ave = np.divide(var_sum,count) # If any values are 0, then replace the var_Ave value with the fill value if (first_mask != True): var_Ave[mask_sum>0]=fillValue return var_Ave