def test_sum_to_scale(): """ Test for the compute.sum_to_scale() function """ # test an input array with no missing values values = np.array([3.0, 4, 6, 2, 1, 3, 5, 8, 5]) computed_values = compute.sum_to_scale(values, 3) expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18]) np.testing.assert_allclose(computed_values, expected_values, err_msg="Sliding sums not computed as expected") computed_values = compute.sum_to_scale(values, 4) expected_values = np.array([np.NaN, np.NaN, np.NaN, 15, 13, 12, 11, 17, 21]) np.testing.assert_allclose(computed_values, expected_values, err_msg="Sliding sums not computed as expected") # test an input array with missing values on the end values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.NaN, np.NaN, np.NaN]) computed_values = compute.sum_to_scale(values, 3) expected_values = np.array( [np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18, np.NaN, np.NaN, np.NaN] ) np.testing.assert_allclose(computed_values, expected_values, err_msg="Sliding sums not computed as expected when " "missing values appended to end of input array") # test an input array with missing values within the array values = np.array([3, 4, 6, 2, 1, 3, 5, np.NaN, 8, 5, 6]) computed_values = compute.sum_to_scale(values, 3) expected_values = np.array( [np.NaN, np.NaN, 13, 12, 9, 6, 9, np.NaN, np.NaN, np.NaN, 19] ) np.testing.assert_allclose(computed_values, expected_values, err_msg="Sliding sums not computed as expected when " "missing values appended to end of input array") test_values = np.array([1.0, 5, 7, 2, 3, 4, 9, 6, 3, 8]) sum_by2 = np.array([np.NaN, 6, 12, 9, 5, 7, 13, 15, 9, 11]) sum_by4 = np.array([np.NaN, np.NaN, np.NaN, 15, 17, 16, 18, 22, 22, 26]) sum_by6 = np.array([np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22, 30, 31, 27, 33]) np.testing.assert_equal(compute.sum_to_scale(test_values, 2), sum_by2, err_msg="Sliding sums not computed as expected") np.testing.assert_equal(compute.sum_to_scale(test_values, 4), sum_by4, err_msg="Sliding sums not computed as expected") np.testing.assert_equal(compute.sum_to_scale(test_values, 6), sum_by6, err_msg="Sliding sums not computed as expected")
def test_sum_to_scale(self): ''' Test for the compute.sum_to_scale() function ''' # test an input array with no missing values values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5]) computed_values = compute.sum_to_scale(values, 3) expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18]) np.testing.assert_allclose( computed_values, expected_values, err_msg='Sliding sums not computed as expected') # test an input array with missing values on the end values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.NaN, np.NaN, np.NaN]) computed_values = compute.sum_to_scale(values, 3) expected_values = np.array( [np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18, np.NaN, np.NaN, np.NaN]) np.testing.assert_allclose( computed_values, expected_values, err_msg= 'Sliding sums not computed as expected when missing values appended to end of input array' ) # test an input array with missing values within the array values = np.array([3, 4, 6, 2, 1, 3, 5, np.NaN, 8, 5, 6]) computed_values = compute.sum_to_scale(values, 3) expected_values = np.array( [np.NaN, np.NaN, 13, 12, 9, 6, 9, np.NaN, np.NaN, np.NaN, 19]) np.testing.assert_allclose( computed_values, expected_values, err_msg= 'Sliding sums not computed as expected when missing values appended to end of input array' )
def spi(values: np.ndarray, scale: int, distribution, data_start_year: int, calibration_year_initial: int, calibration_year_final: int, periodicity): """ Computes SPI (Standardized Precipitation Index). :param values: 1-D numpy array of precipitation values, in any units, first value assumed to correspond to January of the initial year if the periodicity is monthly, or January 1st of the initial year if daily :param scale: number of time steps over which the values should be scaled before the index is computed :param distribution: distribution type to be used for the internal fitting/transform computation :param data_start_year: the initial year of the input precipitation dataset :param calibration_year_initial: initial year of the calibration period :param calibration_year_final: final year of the calibration period :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :return SPI values fitted to the gamma distribution at the specified time step scale, unitless :rtype: 1-D numpy.ndarray of floats of the same length as the input array of precipitation values """ # we expect to operate upon a 1-D array, so if we've been passed a 2-D array # then we flatten it, otherwise raise an error shape = values.shape if len(shape) == 2: values = values.flatten() elif len(shape) != 1: message = f"Invalid shape of input array: {shape} -- " + \ "only 1-D and 2-D arrays are supported" _logger.error(message) raise ValueError(message) # if we're passed all missing values then we can't compute # anything, so we return the same array of missing values if (np.ma.is_masked(values) and values.mask.all()) or np.all(np.isnan(values)): return values # remember the original length of the array, in order to facilitate # returning an array of the same size original_length = values.size # get a sliding sums array, with each time step's value scaled # by the specified number of time steps values = compute.sum_to_scale(values, scale) # reshape precipitation values to (years, 12) for monthly, # or to (years, 366) for daily if periodicity is compute.Periodicity.monthly: values = utils.reshape_to_2d(values, 12) elif periodicity is compute.Periodicity.daily: values = utils.reshape_to_2d(values, 366) else: raise ValueError("Invalid periodicity argument: %s" % periodicity) if distribution is Distribution.gamma: # fit the scaled values to a gamma distribution # and transform to corresponding normalized sigmas values = compute.transform_fitted_gamma(values, data_start_year, calibration_year_initial, calibration_year_final, periodicity) elif distribution is Distribution.pearson: # fit the scaled values to a Pearson Type III distribution # and transform to corresponding normalized sigmas values = compute.transform_fitted_pearson(values, data_start_year, calibration_year_initial, calibration_year_final, periodicity) else: message = f"Unsupported distribution argument: {distribution}" _logger.error(message) raise ValueError(message) # clip values to within the valid range, reshape the array back to 1-D values = np.clip(values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return values[0:original_length]
def percentage_of_normal(values: np.ndarray, scale: int, data_start_year: int, calibration_start_year: int, calibration_end_year: int, periodicity): """ This function finds the percent of normal values (average of each calendar month or day over a specified calibration period of years) for a specified time steps scale. The normal precipitation for each calendar time step is computed for the specified time steps scale, and then each time step's scaled value is compared against the corresponding calendar time step's average to determine the percentage of normal. The period that defines the normal is described by the calibration start and end years arguments. The calibration period typically used for US climate monitoring is 1981-2010. :param values: 1-D numpy array of precipitation values, any length, initial value assumed to be January of the data start year (January 1st of the start year if daily periodicity), see the description of the *periodicity* argument below for further clarification :param scale: integer number of months over which the normal value is computed (eg 3-months, 6-months, etc.) :param data_start_year: the initial year of the input monthly values array :param calibration_start_year: the initial year of the calibration period over which the normal average for each calendar time step is computed :param calibration_end_year: the final year of the calibration period over which the normal average for each calendar time step is computed :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :return: percent of normal precipitation values corresponding to the scaled precipitation values array :rtype: numpy.ndarray of type float """ # validate the scale argument if (scale is None) or (scale < 1): message = "Invalid scale argument: '{0}'".format(scale) _logger.error(message) raise ValueError(message) # if doing monthly then we'll use 12 periods, corresponding to calendar # months, if daily assume years w/366 days if periodicity is compute.Periodicity.monthly: periodicity = 12 elif periodicity is compute.Periodicity.daily: periodicity = 366 else: message = "Invalid periodicity argument: '{0}'".format(periodicity) _logger.error(message) raise ValueError(message) # bypass processing if all values are masked if np.ma.is_masked(values) and values.mask.all(): return values # make sure we've been provided with sane calibration limits if data_start_year > calibration_start_year: raise ValueError("Invalid start year arguments (data and/or " "calibration): calibration start year is before " "the data start year" ) elif ((calibration_end_year - calibration_start_year + 1) * 12) > values.size: raise ValueError("Invalid calibration period specified: total " "calibration years exceeds the actual " "number of years of data" ) # get an array containing a sliding sum on the specified time step # scale -- i.e. if the scale is 3 then the first two elements will be # np.NaN, since we need 3 elements to get a sum, and then from the third # element to the end the values will equal the sum of the corresponding # time step plus the values of the two previous time steps scale_sums = compute.sum_to_scale(values, scale) # extract the timesteps over which we'll compute the normal # average for each time step of the year calibration_years = calibration_end_year - calibration_start_year + 1 calibration_start_index = (calibration_start_year - data_start_year) * periodicity calibration_end_index = calibration_start_index + (calibration_years * periodicity) calibration_period_sums = scale_sums[calibration_start_index:calibration_end_index] # for each time step in the calibration period, get the average of # the scale sum for that calendar time step (i.e. average all January sums, # then all February sums, etc.) averages = np.full((periodicity,), np.nan) for i in range(periodicity): averages[i] = np.nanmean(calibration_period_sums[i::periodicity]) # TODO replace the below loop with a vectorized implementation # for each time step of the scale_sums array find its corresponding # percentage of the time steps scale average for its respective calendar time step percentages_of_normal = np.full(scale_sums.shape, np.nan) for i in range(scale_sums.size): # make sure we don't have a zero divisor divisor = averages[i % periodicity] if divisor > 0.0: percentages_of_normal[i] = scale_sums[i] / divisor return percentages_of_normal
def spei(precips_mm: np.ndarray, pet_mm: np.ndarray, scale: int, distribution, periodicity, data_start_year: int, calibration_year_initial: int, calibration_year_final: int): """ Compute SPEI fitted to the gamma distribution. PET values are subtracted from the precipitation values to come up with an array of (P - PET) values, which is then scaled to the specified months scale and finally fitted/transformed to SPEI values corresponding to the input precipitation time series. :param precips_mm: an array of monthly total precipitation values, in millimeters, should be of the same size (and shape?) as the input PET array :param pet_mm: an array of monthly PET values, in millimeters, should be of the same size (and shape?) as the input precipitation array :param scale: the number of months over which the values should be scaled before computing the indicator :param distribution: distribution type to be used for the internal fitting/transform computation :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :param data_start_year: the initial year of the input datasets (assumes that the two inputs cover the same period) :param calibration_year_initial: initial year of the calibration period :param calibration_year_final: final year of the calibration period :return: an array of SPEI values :rtype: numpy.ndarray of type float, of the same size and shape as the input PET and precipitation arrays """ # if we're passed all missing values then we can't compute anything, # so we return the same array of missing values if (np.ma.is_masked(precips_mm) and precips_mm.mask.all()) \ or np.all(np.isnan(precips_mm)): return precips_mm # validate that the two input arrays are compatible if precips_mm.size != pet_mm.size: message = "Incompatible precipitation and PET arrays" _logger.error(message) raise ValueError(message) # subtract the PET from precipitation, adding an offset # to ensure that all values are positive p_minus_pet = (precips_mm.flatten() - pet_mm.flatten()) + 1000.0 # remember the original length of the input array, in order to facilitate # returning an array of the same size original_length = precips_mm.size # get a sliding sums array, with each element's value # scaled by the specified number of time steps scaled_values = compute.sum_to_scale(p_minus_pet, scale) if distribution is Distribution.gamma: # fit the scaled values to a gamma distribution and # transform to corresponding normalized sigmas transformed_fitted_values = \ compute.transform_fitted_gamma(scaled_values, data_start_year, calibration_year_initial, calibration_year_final, periodicity) elif distribution is Distribution.pearson: # fit the scaled values to a Pearson Type III distribution # and transform to corresponding normalized sigmas transformed_fitted_values = \ compute.transform_fitted_pearson(scaled_values, data_start_year, calibration_year_initial, calibration_year_final, periodicity) else: message = f"Unsupported distribution argument: {distribution}" _logger.error(message) raise ValueError(message) # clip values to within the valid range, reshape the array back to 1-D values = \ np.clip(transformed_fitted_values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return values[0:original_length]
def spei(scale, distribution, periodicity, data_start_year, calibration_year_initial, calibration_year_final, precips_mm, pet_mm=None, temps_celsius=None, latitude_degrees=None): ''' Compute SPEI fitted to the gamma distribution. PET values are subtracted from the precipitation values to come up with an array of (P - PET) values, which is then scaled to the specified months scale and finally fitted/transformed to SPEI values corresponding to the input precipitation time series. If an input array of temperature values is provided then PET values are computed internally using the input temperature array, data start year, and latitude value (all three of which are required in combination). In this case an input array of PET values should not be specified and if so will result in an error being raised indicating invalid arguments. If an input array of PET values is provided then neither an input array of temperature values nor a latitude should be specified, and if so will result in an error being raised indicating invalid arguments. :param scale: the number of months over which the values should be scaled before computing the indicator :param distribution: distribution type to be used for the internal fitting/transform computation :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :param precips_mm: an array of monthly total precipitation values, in millimeters, should be of the same size (and shape?) as the input temperature array :param pet_mm: an array of monthly PET values, in millimeters, should be of the same size (and shape?) as the input precipitation array, must be unspecified or None if using an array of temperature values as input :param temps_celsius: an array of monthly average temperature values, in degrees Celsius, should be of the same size (and shape?) as the input precipitation array, must be unspecified or None if using an array of PET values as input :param data_start_year: the initial year of the input datasets (assumes that the two inputs cover the same period) :param latitude_degrees: the latitude of the location, in degrees north, must be unspecified or None if using an array of PET values as an input, and must be specified if using an array of temperatures as input, valid range is -90.0 to 90.0 (inclusive) :return: an array of SPEI values :rtype: numpy.ndarray of type float, of the same size and shape as the input temperature and precipitation arrays ''' # if we're passed all missing values then we can't compute anything, return the same array of missing values if np.ma.is_masked(precips_mm) and precips_mm.mask.all(): return precips_mm elif np.all(np.isnan(precips_mm)): return precips_mm # validate the function's argument combinations if temps_celsius is not None: # since we have temperature then it's expected that we'll compute PET internally, so we shouldn't have PET as an input if pet_mm is not None: message = 'Incompatible arguments: either temperature or PET arrays can be specified as arguments, but not both' _logger.error(message) raise ValueError(message) # we'll need both the latitude and data start year in order to compute PET elif (latitude_degrees is None) or (data_start_year is None): message = 'Missing arguments: since temperature is provided as an input then both latitude ' + \ 'and the data start year must also be specified, and one or both is not' _logger.error(message) raise ValueError(message) # validate that the two input arrays are compatible elif precips_mm.size != temps_celsius.size: message = 'Incompatible precipitation and temperature arrays' _logger.error(message) raise ValueError(message) elif periodicity != 'monthly': # our PET currently uses a monthly version of Thornthwaite's equation and therefore's only valid for monthly message = 'Unsupported periodicity: \'{0}\' '.format(periodicity) + \ '-- only monthly time series is supported when providing temperature and latitude inputs' _logger.error(message) raise ValueError(message) # compute PET pet_mm = pet(temps_celsius, latitude_degrees, data_start_year) elif pet_mm is not None: # make sure there's no confusion by not allowing a user to specify unnecessary parameters if latitude_degrees is not None: message = 'Invalid argument: since PET is provided as an input then latitude must be absent' _logger.error(message) raise ValueError(message) # validate that the two input arrays are compatible elif precips_mm.size != pet_mm.size: message = 'Incompatible precipitation and PET arrays' _logger.error(message) raise ValueError(message) else: message = 'Neither temperature nor PET array was specified, one or the other is required for SPEI' _logger.error(message) raise ValueError(message) # subtract the PET from precipitation, adding an offset to ensure that all values are positive p_minus_pet = (precips_mm.flatten() - pet_mm.flatten()) + 1000.0 # remember the original length of the input array, in order to facilitate returning an array of the same size original_length = precips_mm.size # get a sliding sums array, with each element's value scaled by the specified number of time steps scaled_values = compute.sum_to_scale(p_minus_pet, scale) if distribution is Distribution.gamma: # fit the scaled values to a gamma distribution and transform to corresponding normalized sigmas transformed_fitted_values = compute.transform_fitted_gamma(scaled_values, data_start_year, calibration_year_initial, calibration_year_final, periodicity) elif distribution is Distribution.pearson_type3: # fit the scaled values to a Pearson Type III distribution and transform to corresponding normalized sigmas transformed_fitted_values = compute.transform_fitted_pearson(scaled_values, data_start_year, calibration_year_initial, calibration_year_final, periodicity) # clip values to within the valid range, reshape the array back to 1-D spei = np.clip(transformed_fitted_values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return spei[0:original_length]
def spi( values: np.ndarray, scale: int, distribution: Distribution, data_start_year: int, calibration_year_initial: int, calibration_year_final: int, periodicity: compute.Periodicity, fitting_params: Dict = None, ) -> np.ndarray: """ Computes SPI (Standardized Precipitation Index). :param values: 1-D numpy array of precipitation values, in any units, first value assumed to correspond to January of the initial year if the periodicity is monthly, or January 1st of the initial year if daily :param scale: number of time steps over which the values should be scaled before the index is computed :param distribution: distribution type to be used for the internal fitting/transform computation :param data_start_year: the initial year of the input precipitation dataset :param calibration_year_initial: initial year of the calibration period :param calibration_year_final: final year of the calibration period :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :param fitting_params: optional dictionary of pre-computed distribution fitting parameters, if the distribution is gamma then this dict should contain two arrays, keyed as "alphas" and "betas", and if the distribution is Pearson then this dict should contain four arrays keyed as "probabilities_of_zero", "locs", "scales", and "skews" :return SPI values fitted to the gamma distribution at the specified time step scale, unitless :rtype: 1-D numpy.ndarray of floats of the same length as the input array of precipitation values """ # we expect to operate upon a 1-D array, so if we've been passed a 2-D array # then we flatten it, otherwise raise an error shape = values.shape if len(shape) == 2: values = values.flatten() elif len(shape) != 1: message = "Invalid shape of input array: {shape}".format(shape=shape) + \ " -- only 1-D and 2-D arrays are supported" _logger.error(message) raise ValueError(message) # if we're passed all missing values then we can't compute # anything, so we return the same array of missing values if (np.ma.is_masked(values) and values.mask.all()) or np.all( np.isnan(values)): return values # clip any negative values to zero if np.amin(values) < 0.0: _logger.warn( "Input contains negative values -- all negatives clipped to zero") values = np.clip(values, a_min=0.0, a_max=None) # remember the original length of the array, in order to facilitate # returning an array of the same size original_length = values.size # get a sliding sums array, with each time step's value scaled # by the specified number of time steps values = compute.sum_to_scale(values, scale) # reshape precipitation values to (years, 12) for monthly, # or to (years, 366) for daily if periodicity is compute.Periodicity.monthly: values = utils.reshape_to_2d(values, 12) elif periodicity is compute.Periodicity.daily: values = utils.reshape_to_2d(values, 366) else: raise ValueError("Invalid periodicity argument: %s" % periodicity) if distribution is Distribution.gamma: # get (optional) fitting parameters if provided if fitting_params is not None: alphas = fitting_params["alpha"] betas = fitting_params["beta"] else: alphas = None betas = None # fit the scaled values to a gamma distribution # and transform to corresponding normalized sigmas values = compute.transform_fitted_gamma( values, data_start_year, calibration_year_initial, calibration_year_final, periodicity, alphas, betas, ) elif distribution is Distribution.pearson: # get (optional) fitting parameters if provided if fitting_params is not None: probabilities_of_zero = fitting_params["prob_zero"] locs = fitting_params["loc"] scales = fitting_params["scale"] skews = fitting_params["skew"] else: probabilities_of_zero = None locs = None scales = None skews = None # fit the scaled values to a Pearson Type III distribution # and transform to corresponding normalized sigmas values = compute.transform_fitted_pearson( values, data_start_year, calibration_year_initial, calibration_year_final, periodicity, probabilities_of_zero, locs, scales, skews, ) else: message = "Unsupported distribution argument: " + \ "{dist}".format(dist=distribution) _logger.error(message) raise ValueError(message) # clip values to within the valid range, reshape the array back to 1-D values = np.clip(values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return values[0:original_length]
def spei( precips_mm: np.ndarray, pet_mm: np.ndarray, scale: int, distribution: Distribution, periodicity: compute.Periodicity, data_start_year: int, calibration_year_initial: int, calibration_year_final: int, fitting_params: dict = None, ) -> np.ndarray: """ Compute SPEI fitted to the gamma distribution. PET values are subtracted from the precipitation values to come up with an array of (P - PET) values, which is then scaled to the specified months scale and finally fitted/transformed to SPEI values corresponding to the input precipitation time series. :param precips_mm: an array of monthly total precipitation values, in millimeters, should be of the same size (and shape?) as the input PET array :param pet_mm: an array of monthly PET values, in millimeters, should be of the same size (and shape?) as the input precipitation array :param scale: the number of months over which the values should be scaled before computing the indicator :param distribution: distribution type to be used for the internal fitting/transform computation :param periodicity: the periodicity of the time series represented by the input data, valid/supported values are 'monthly' and 'daily' 'monthly' indicates an array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily' indicates an array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :param data_start_year: the initial year of the input datasets (assumes that the two inputs cover the same period) :param calibration_year_initial: initial year of the calibration period :param calibration_year_final: final year of the calibration period :param fitting_params: optional dictionary of pre-computed distribution fitting parameters, if the distribution is gamma then this dict should contain two arrays, keyed as "alphas" and "betas", and if the distribution is Pearson then this dict should contain four arrays keyed as "probabilities_of_zero", "locs", "scales", and "skews" :return: an array of SPEI values :rtype: numpy.ndarray of type float, of the same size and shape as the input PET and precipitation arrays """ # if we're passed all missing values then we can't compute anything, # so we return the same array of missing values if (np.ma.is_masked(precips_mm) and precips_mm.mask.all()) \ or np.all(np.isnan(precips_mm)): return precips_mm # validate that the two input arrays are compatible if precips_mm.size != pet_mm.size: message = "Incompatible precipitation and PET arrays" _logger.error(message) raise ValueError(message) # clip any negative values to zero if np.amin(precips_mm) < 0.0: _logger.warn( "Input contains negative values -- all negatives clipped to zero") precips_mm = np.clip(precips_mm, a_min=0.0, a_max=None) # subtract the PET from precipitation, adding an offset # to ensure that all values are positive p_minus_pet = (precips_mm.flatten() - pet_mm.flatten()) + 1000.0 # remember the original length of the input array, in order to facilitate # returning an array of the same size original_length = precips_mm.size # get a sliding sums array, with each element's value # scaled by the specified number of time steps scaled_values = compute.sum_to_scale(p_minus_pet, scale) if distribution is Distribution.gamma: # get (optional) fitting parameters if provided if fitting_params is not None: alphas = fitting_params["alphas"] betas = fitting_params["betas"] else: alphas = None betas = None # fit the scaled values to a gamma distribution and # transform to corresponding normalized sigmas transformed_fitted_values = \ compute.transform_fitted_gamma( scaled_values, data_start_year, calibration_year_initial, calibration_year_final, periodicity, alphas, betas, ) elif distribution is Distribution.pearson: # get (optional) fitting parameters if provided if fitting_params is not None: probabilities_of_zero = fitting_params["probabilities_of_zero"] locs = fitting_params["locs"] scales = fitting_params["scales"] skews = fitting_params["skews"] else: probabilities_of_zero = None locs = None scales = None skews = None # fit the scaled values to a Pearson Type III distribution # and transform to corresponding normalized sigmas transformed_fitted_values = \ compute.transform_fitted_pearson( scaled_values, data_start_year, calibration_year_initial, calibration_year_final, periodicity, probabilities_of_zero, locs, scales, skews, ) else: message = "Unsupported distribution argument: " + \ "{dist}".format(dist=distribution) _logger.error(message) raise ValueError(message) # clip values to within the valid range, reshape the array back to 1-D values = \ np.clip(transformed_fitted_values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return values[0:original_length]
def spi_pearson(precips, scale, data_start_year, calibration_year_initial, calibration_year_final, time_series_type): ''' Computes SPI using a fitting to the Pearson Type III distribution. :param precips: 1-D numpy array of precipitation values, in any units, first value assumed to correspond to January of the initial year if the time series type is monthly, or January 1st of the initial year if daily :param scale: number of time steps over which the values should be scaled before the index is computed :param data_start_year: the initial year of the input precipitation dataset :param calibration_year_initial: initial year of the calibration period :param calibration_year_final: final year of the calibration period :param time_series_type: the type of time series represented by the input data, valid values are 'monthly' or 'daily' 'monthly': array of monthly values, assumed to span full years, i.e. the first value corresponds to January of the initial year and any missing final months of the final year filled with NaN values, with size == # of years * 12 'daily': array of full years of daily values with 366 days per year, as if each year were a leap year and any missing final months of the final year filled with NaN values, with array size == (# years * 366) :return SPI values fitted to the Pearson Type III distribution at the specified time scale, unitless :rtype: 1-D numpy.ndarray of floats of the same length as the input array of precipitation values ''' # remember the original length of the array, in order to facilitate returning an array of the same size original_length = precips.size # get a sliding sums array, with each time step's value scaled by the specified number of time steps scaled_precips = compute.sum_to_scale(precips, scale) # reshape precipitation values to (years, 12) for monthly, or to (years, 366) for daily (representing all years as leap) if time_series_type == 'monthly': scaled_precips = utils.reshape_to_2d(scaled_precips, 12) elif time_series_type == 'daily': scaled_precips = utils.reshape_to_2d(scaled_precips, 366) else: raise ValueError('Invalid time series type argument: %s' % time_series_type) # fit the scaled values to a Pearson Type III distribution and transform the values to corresponding normalized sigmas # transformed_fitted_values = compute.transform_fitted_pearson_new(scaled_precips, # data_start_year, # calibration_year_initial, # calibration_year_final) transformed_fitted_values = compute.transform_fitted_pearson(scaled_precips, data_start_year, calibration_year_initial, calibration_year_final, time_series_type) # clip values to within the valid range, reshape the array back to 1-D spi = np.clip(transformed_fitted_values, _FITTED_INDEX_VALID_MIN, _FITTED_INDEX_VALID_MAX).flatten() # return the original size array return spi[0:original_length]