示例#1
0
    def test_count_zeros_and_non_missings(self):
        '''
        Test for the utils.count_zeros_and_non_missings() function
        '''

        # vanilla use case
        values_list = [
            3, 4, 0, 2, 3.1, 5, np.NaN, 8, 5, 6, 0.0, np.NaN, 5.6, 2
        ]
        values = np.array(values_list)
        zeros, non_missings = utils.count_zeros_and_non_missings(values)
        self.assertEqual(zeros, 2, 'Failed to correctly count zero values')
        self.assertEqual(non_missings, 12,
                         'Failed to correctly count non-missing values')

        # test with lists
        values = values_list
        zeros, non_missings = utils.count_zeros_and_non_missings(values)
        self.assertEqual(zeros, 2, 'Failed to correctly count zero values')
        self.assertEqual(non_missings, 12,
                         'Failed to correctly count non-missing values')
        values = [[3, 4, 0, 2, 3.1, 5, np.NaN], [8, 5, 6, 0.0, np.NaN, 5.6, 2]]
        zeros, non_missings = utils.count_zeros_and_non_missings(values)
        self.assertEqual(zeros, 2, 'Failed to correctly count zero values')
        self.assertEqual(non_missings, 12,
                         'Failed to correctly count non-missing values')

        # using a list that can't be converted into an array should result in a TypeError
        values = [1, 2, 3, 0, 'abcxyz']
        np.testing.assert_raises(TypeError, utils.count_zeros_and_non_missings,
                                 values)
示例#2
0
def test_count_zeros_and_non_missings():
    # Test for the utils.count_zeros_and_non_missings() function

    # vanilla use case
    values_list = [3, 4, 0, 2, 3.1, 5, np.NaN, 8, 5, 6, 0.0, np.NaN, 5.6, 2]
    values = np.array(values_list)
    zeros, non_missings = utils.count_zeros_and_non_missings(values)
    if zeros != 2:
        raise AssertionError("Failed to correctly count zero values")
    if non_missings != 12:
        raise AssertionError("Failed to correctly count non-missing values")

    # test with lists
    values = values_list
    zeros, non_missings = utils.count_zeros_and_non_missings(values)
    if zeros != 2:
        raise AssertionError("Failed to correctly count zero values")
    if non_missings != 12:
        raise AssertionError("Failed to correctly count non-missing values")
    values = [[3, 4, 0, 2, 3.1, 5, np.NaN], [8, 5, 6, 0.0, np.NaN, 5.6, 2]]
    zeros, non_missings = utils.count_zeros_and_non_missings(values)
    if zeros != 2:
        raise AssertionError("Failed to correctly count zero values")
    if non_missings != 12:
        raise AssertionError("Failed to correctly count non-missing values")

    # using a list that can't be converted into an array should result in a TypeError
    values = [1, 2, 3, 0, "abcxyz"]
    np.testing.assert_raises(TypeError, utils.count_zeros_and_non_missings,
                             values)
示例#3
0
def _probability_of_zero(values: np.ndarray, ) -> np.ndarray:
    """
    This function computes the probability of zero and Pearson Type III
    distribution parameters corresponding to an array of values.

    :param values: 2-D array of values, with each row representing a year
        containing either 12 values corresponding to the calendar months of
        that year, or 366 values corresponding to the days of the year
        (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for
        non-leap years) and assuming that the first value of the array is
        January of the initial year for an input array of monthly values or
        Jan. 1st of initial year for an input array daily values
    :return: a 1-D array of probability of zero values, with shape (12,) for
        monthly or (366,) for daily
    """

    # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily
    if len(values.shape) != 2:
        message = "Invalid shape of input data array: {shape}".format(
            shape=values.shape)
        _logger.error(message)
        raise ValueError(message)

    else:

        # determine the number of time steps per year
        # (we expect 12 for monthly, 366 for daiy)
        time_steps_per_year = values.shape[1]
        if (time_steps_per_year != 12) and (time_steps_per_year != 366):
            message = "Invalid shape of input data array: {shape}".format(
                shape=values.shape)
            _logger.error(message)
            raise ValueError(message)

    # the values we'll compute and return
    probabilities_of_zero = np.zeros((time_steps_per_year, ))

    # compute the probability of zero for each calendar time step
    # TODO vectorize the below loop? create a @numba.vectorize() ufunc
    #  for application over the second axis
    for time_step_index in range(time_steps_per_year):

        # get the values for the current calendar time step
        time_step_values = values[:, time_step_index]

        # count the number of zeros and valid (non-missing/non-NaN) values
        number_of_zeros, number_of_non_missing = \
            utils.count_zeros_and_non_missings(time_step_values)

        # calculate the probability of zero for the calendar time step
        if (number_of_zeros > 0) and (number_of_non_missing > 0):

            probabilities_of_zero[
                time_step_index] = number_of_zeros / number_of_non_missing

        else:
            # fill with NaN
            probabilities_of_zero[time_step_index] = np.NaN

    return probabilities_of_zero
示例#4
0
def _pearson3_fitting_values(values: np.ndarray, ) -> np.ndarray:
    """
    This function computes the probability of zero and Pearson Type III
    distribution parameters corresponding to an array of values.

    :param values: 2-D array of values, with each row representing a year
        containing either 12 values corresponding to the calendar months of
        that year, or 366 values corresponding to the days of the year
        (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for
        non-leap years) and assuming that the first value of the array is
        January of the initial year for an input array of monthly values or
        Jan. 1st of initial year for an input array daily values
    :return: a 2-D array of fitting values for the Pearson Type III
        distribution, with shape (4, 12) for monthly or (4, 366) for daily

        returned_array[0] == probability of zero for each of the calendar time steps
        returned_array[1] == the first Pearson Type III distribution parameter
            for each of the calendar time steps
        returned_array[2] == the second Pearson Type III distribution parameter
            for each of the calendar time steps
        returned_array[3] == the third Pearson Type III distribution parameter
            for each of the calendar time steps
    """

    # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily
    if len(values.shape) != 2:
        message = "Invalid shape of input data array: {shape}".format(
            shape=values.shape)
        _logger.error(message)
        raise ValueError(message)

    else:

        time_steps_per_year = values.shape[1]
        if (time_steps_per_year != 12) and (time_steps_per_year != 366):
            message = "Invalid shape of input data array: {shape}".format(
                shape=values.shape)
            _logger.error(message)
            raise ValueError(message)

    # the values we'll compute and return
    fitting_values = np.zeros((4, time_steps_per_year))

    # compute the probability of zero and Pearson
    # parameters for each calendar time step
    # TODO vectorize the below loop? create a @numba.vectorize() ufunc
    #  for application over the second axis
    for time_step_index in range(time_steps_per_year):

        # get the values for the current calendar time step
        time_step_values = values[:, time_step_index]

        # count the number of zeros and valid (non-missing/non-NaN) values
        number_of_zeros, number_of_non_missing = \
            utils.count_zeros_and_non_missings(time_step_values)

        # make sure we have at least four values that are both non-missing (i.e. non-NaN)
        # and non-zero, otherwise use the entire period of record
        if (number_of_non_missing - number_of_zeros) < 4:

            # we can't proceed, bail out using zeros
            return fitting_values

        # calculate the probability of zero for the calendar time step
        probability_of_zero = 0.0
        if number_of_zeros > 0:

            probability_of_zero = number_of_zeros / number_of_non_missing

        # get the estimated L-moments, if we have
        # more than three non-missing/non-zero values
        if (number_of_non_missing - number_of_zeros) > 3:

            # # remove NaN values from the array, as this invalidates
            # # the calculation within the lmoments fitting function
            # time_step_values = time_step_values[~np.isnan(time_step_values)]

            # get the Pearson Type III parameters for this time
            # step's values within the calibration period
            params = lmoments.fit(time_step_values)
            fitting_values[0, time_step_index] = probability_of_zero
            fitting_values[1, time_step_index] = params["loc"]
            fitting_values[2, time_step_index] = params["scale"]
            fitting_values[3, time_step_index] = params["skew"]

    return fitting_values
示例#5
0
def _pearson3_fitting_values(values):
    """
    This function computes the probability of zero and Pearson Type III distribution parameters 
    corresponding to an array of values.
    
    :param values: 2-D array of values, with each row representing a year containing either 12 values corresponding 
                   to the calendar months of that year, or 366 values corresponding to the days of the year 
                   (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for non-leap years)
                   and assuming that the first value of the array is January of the initial year for an input array 
                   of monthly values or Jan. 1st of initial year for an input array daily values
    :return: a 2-D array of fitting values for the Pearson Type III distribution, with shape (4, 12) for monthly
             or (4, 366) for daily
             returned_array[0] == probability of zero for each of the calendar time steps 
             returned_array[1] == the first Pearson Type III distribution parameter for each of the calendar time steps 
             returned_array[2] == the second Pearson Type III distribution parameter for each of the calendar time steps 
             returned_array[3] == the third Pearson Type III distribution parameter for each of the calendar time steps 
    """

    # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily
    if len(values.shape) != 2:
        message = 'Invalid shape of input data array: {0}'.format(values.shape)
        _logger.error(message)
        raise ValueError(message)

    else:

        time_steps_per_year = values.shape[1]
        if (time_steps_per_year != 12) and (time_steps_per_year != 366):
            message = 'Invalid shape of input data array: {0}'.format(
                values.shape)
            _logger.error(message)
            raise ValueError(message)

    # the values we'll compute and return
    fitting_values = np.zeros((4, time_steps_per_year))

    # compute the probability of zero and Pearson parameters for each calendar time step
    #TODO vectorize the below loop? create a @numba.vectorize() ufunc for application over the second axis of the values
    for time_step_index in range(time_steps_per_year):

        # get the values for the current calendar time step
        time_step_values = values[:, time_step_index]

        # count the number of zeros and valid (non-missing/non-NaN) values
        number_of_zeros, number_of_non_missing = utils.count_zeros_and_non_missings(
            time_step_values)

        # make sure we have at least four values that are both non-missing (i.e. non-NaN)
        # and non-zero, otherwise use the entire period of record
        if (number_of_non_missing - number_of_zeros) < 4:

            # we can't proceed, bail out using zeros
            return fitting_values

        # calculate the probability of zero for the calendar time step
        probability_of_zero = 0.0
        if number_of_zeros > 0:

            probability_of_zero = number_of_zeros / number_of_non_missing

        # get the estimated L-moments, if we have more than three non-missing/non-zero values
        if (number_of_non_missing - number_of_zeros) > 3:

            # estimate the L-moments of the calibration values
            lmoments = _estimate_lmoments(time_step_values)

            # if we have valid L-moments then we can proceed, otherwise
            # the fitting values for the time step will be all zeros
            if (lmoments[1] > 0.0) and (abs(lmoments[2]) < 1.0):

                # get the Pearson Type III parameters for the time step, based on the L-moments
                pearson_parameters = _estimate_pearson3_parameters(lmoments)

                fitting_values[0, time_step_index] = probability_of_zero
                fitting_values[1, time_step_index] = pearson_parameters[0]
                fitting_values[2, time_step_index] = pearson_parameters[1]
                fitting_values[3, time_step_index] = pearson_parameters[2]

            # else:
            #     # FIXME/TODO there must be a better way to handle this, and/or is this as irrelevant
            #     # as swallowing the error here assumes? Do we get similar results using lmoments3 module?
            #     # How does the comparable NCSU SPI code (Cumbie et al?) handle this?
            #     _logger.warn('Due to invalid L-moments the Pearson fitting values '
            #                  'for time step {0} are defaulting to zero'.format(time_step_index))

    return fitting_values
示例#6
0
def pearson_parameters(
    values: np.ndarray,
    data_start_year: int,
    calibration_start_year: int,
    calibration_end_year: int,
    periodicity: Periodicity,
) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray):
    """
    This function computes the probability of zero and Pearson Type III
    distribution parameters corresponding to an array of values.

    :param values: 2-D array of values, with each row representing a year
        containing either 12 values corresponding to the calendar months of
        that year, or 366 values corresponding to the days of the year
        (with Feb. 29th being an average of the Feb. 28th and Mar. 1st values for
        non-leap years) and assuming that the first value of the array is
        January of the initial year for an input array of monthly values or
        Jan. 1st of initial year for an input array daily values
    :param periodicity: monthly or daily
    :return: four 1-D array of fitting values for the Pearson Type III
        distribution, with shape (12,) for monthly or (366,) for daily

        returned array 1: probability of zero
        returned array 2: first Pearson Type III distribution parameter (loc)
        returned array 3 :second Pearson Type III distribution parameter (scale)
        returned array 4: third Pearson Type III distribution parameter (skew)
    """

    # reshape precipitation values to (years, 12) for monthly,
    # or to (years, 366) for daily
    if periodicity is Periodicity.monthly:

        values = utils.reshape_to_2d(values, 12)

    elif periodicity is Periodicity.daily:

        values = utils.reshape_to_2d(values, 366)

    else:

        raise ValueError("Invalid periodicity argument: %s" % periodicity)

    # validate that the values array has shape: (years, 12) for monthly or (years, 366) for daily
    if len(values.shape) != 2:
        message = "Invalid shape of input data array: {shape}".format(
            shape=values.shape)
        _logger.error(message)
        raise ValueError(message)

    else:

        time_steps_per_year = values.shape[1]
        if (time_steps_per_year != 12) and (time_steps_per_year != 366):
            message = "Invalid shape of input data array: {shape}".format(
                shape=values.shape)
            _logger.error(message)
            raise ValueError(message)

    # determine the end year of the values array
    data_end_year = data_start_year + values.shape[0]

    # make sure that we have data within the full calibration period,
    # otherwise use the full period of record
    if (calibration_start_year < data_start_year) or \
            (calibration_end_year > data_end_year):
        calibration_start_year = data_start_year
        calibration_end_year = data_end_year

    # get the year axis indices corresponding to
    # the calibration start and end years
    calibration_begin_index = calibration_start_year - data_start_year
    calibration_end_index = (calibration_end_year - data_start_year) + 1

    # get the values for the current calendar time step
    # that fall within the calibration years period
    calibration_values = values[
        calibration_begin_index:calibration_end_index, :]

    # the values we'll compute and return
    probabilities_of_zero = np.zeros((time_steps_per_year, ))
    locs = np.zeros((time_steps_per_year, ))
    scales = np.zeros((time_steps_per_year, ))
    skews = np.zeros((time_steps_per_year, ))

    # compute the probability of zero and Pearson
    # parameters for each calendar time step
    # TODO vectorize the below loop? create a @numba.vectorize() ufunc
    #  for application over the second axis
    for time_step_index in range(time_steps_per_year):

        # get the values for the current calendar time step
        time_step_values = calibration_values[:, time_step_index]

        # count the number of zeros and valid (non-missing/non-NaN) values
        number_of_zeros, number_of_non_missing = \
            utils.count_zeros_and_non_missings(time_step_values)

        # make sure we have at least four values that are both non-missing (i.e. non-NaN)
        # and non-zero, otherwise use the entire period of record
        if (number_of_non_missing - number_of_zeros) < 4:

            # we can't proceed, bail out using zeros
            continue

        # calculate the probability of zero for the calendar time step
        probability_of_zero = 0.0
        if number_of_zeros > 0:

            probability_of_zero = number_of_zeros / number_of_non_missing

        # get the estimated L-moments, if we have
        # more than three non-missing/non-zero values
        if (number_of_non_missing - number_of_zeros) > 3:

            # get the Pearson Type III parameters for this time
            # step's values within the calibration period
            params = lmoments.fit(time_step_values)
            probabilities_of_zero[time_step_index] = probability_of_zero
            locs[time_step_index] = params["loc"]
            scales[time_step_index] = params["scale"]
            skews[time_step_index] = params["skew"]

    return probabilities_of_zero, locs, scales, skews