Exemplo n.º 1
0
def cdf_match(src,
              ref,
              min_val=None,
              max_val=None,
              nbins=100,
              minobs=None,
              **kwargs):
    """
    computes cumulative density functions of src and ref at their
    respective bin-edges by 5th order spline interpolation; then matches CDF of
    src to CDF of ref.

    This function does not make sure that the percentiles are unique so
    it can happen that multiple measurements are scaled to one point or that
    there are NaN values in the output array.

    Parameters
    ----------
    src: numpy.array
        input dataset which will be scaled
    ref: numpy.array
        src will be scaled to this dataset
    min_val: float, optional
        Minimum allowed value, output data is capped at this value
    max_val: float, optional
        Maximum allowed value, output data is capped at this value
    nbins: int, optional
        Number of bins to use for estimation of the CDF
    minobs : int
        Minimum desired number of observations in a bin.
    ** kwargs: dict
        keywords to be passed onto the gen_cdf_match() function

    Returns
    -------
    CDF matched values: numpy.array
        dataset src with CDF as ref
    """
    percentiles = np.linspace(0, 100, nbins)

    if minobs is not None:
        percentiles = utils.resize_percentiles(src, percentiles, minobs)

    perc_src = np.array(np.percentile(src, percentiles))
    perc_src = utils.unique_percentiles_interpolate(perc_src,
                                                    percentiles=percentiles)
    perc_ref = np.array(np.percentile(ref, percentiles))
    perc_ref = utils.unique_percentiles_interpolate(perc_ref,
                                                    percentiles=percentiles)

    return gen_cdf_match(
        src,
        perc_src,
        perc_ref,
        ref=ref,
        min_val=min_val,
        max_val=max_val,
        k=5,
        **kwargs,
    )
Exemplo n.º 2
0
    def calc_parameters(self, data):
        """
        Calculate the percentiles used for CDF matching.

        Parameters
        ----------
        data: pandas.DataFrame
            temporally matched dataset

        Returns
        -------
        parameters: dictionary
            keys -> Names of columns in the input data frame
            values -> numpy.ndarrays with the percentiles
        """

        parameters = {}
        for column in data.columns:
            c_data = data[column].values
            perc = np.percentile(c_data, self.percentiles)
            perc = unique_percentiles_interpolate(perc,
                                                  percentiles=self.percentiles)
            parameters[column] = perc

        return parameters
Exemplo n.º 3
0
    def calc_parameters(self, data):
        """
        Calculate the percentiles used for CDF matching.

        Parameters
        ----------
        data: pandas.DataFrame
            temporally matched dataset

        Returns
        -------
        parameters: dictionary
            keys -> Names of columns in the input data frame
            values -> numpy.ndarrays with the percentiles
        """

        parameters = {}
        for column in data.columns:
            c_data = data[column].values
            perc = np.percentile(c_data, self.percentiles)
            perc = unique_percentiles_interpolate(perc,
                                                  percentiles=self.percentiles)
            parameters[column] = perc

        return parameters
Exemplo n.º 4
0
def cdf_match(src, ref, min_val=None, max_val=None, nbins=100):
    '''
    computes cumulative density functions of src and ref at their
    respective bin-edges by 5th order spline interpolation; then matches CDF of
    src to CDF of ref.

    This function does not make sure that the percentiles are unique so
    it can happen that multiple measurements are scaled to one point or that
    there are NaN values in the output array.

    Parameters
    ----------
    src: numpy.array
        input dataset which will be scaled
    ref: numpy.array
        src will be scaled to this dataset
    min_val: float, optional
        Minimum allowed value, output data is capped at this value
    max_val: float, optional
        Maximum allowed value, output data is capped at this value
    nbins: int, optional
        Number of bins to use for estimation of the CDF

    Returns
    -------
    CDF matched values: numpy.array
        dataset src with CDF as ref
    '''

    percentiles = np.linspace(0, 100, nbins)
    perc_src = np.array(np.percentile(src, percentiles))
    perc_src = unique_percentiles_interpolate(perc_src,
                                              percentiles=percentiles)
    perc_ref = np.array(np.percentile(ref, percentiles))
    perc_ref = unique_percentiles_interpolate(perc_ref,
                                              percentiles=percentiles)

    return gen_cdf_match(src,
                         perc_src,
                         perc_ref,
                         min_val=min_val,
                         max_val=max_val,
                         k=5)
Exemplo n.º 5
0
def cdf_match(src, ref,
              min_val=None, max_val=None,
              nbins=100):
    '''
    computes cumulative density functions of src and ref at their
    respective bin-edges by 5th order spline interpolation; then matches CDF of
    src to CDF of ref.

    This function does not make sure that the percentiles are unique so
    it can happen that multiple measurements are scaled to one point or that
    there are NaN values in the output array.

    Parameters
    ----------
    src: numpy.array
        input dataset which will be scaled
    ref: numpy.array
        src will be scaled to this dataset
    min_val: float, optional
        Minimum allowed value, output data is capped at this value
    max_val: float, optional
        Maximum allowed value, output data is capped at this value
    nbins: int, optional
        Number of bins to use for estimation of the CDF

    Returns
    -------
    CDF matched values: numpy.array
        dataset src with CDF as ref
    '''

    percentiles = np.linspace(0, 100, nbins)
    perc_src = np.array(np.percentile(src, percentiles))
    perc_src = unique_percentiles_interpolate(perc_src,
                                              percentiles=percentiles)
    perc_ref = np.array(np.percentile(ref, percentiles))
    perc_ref = unique_percentiles_interpolate(perc_ref,
                                              percentiles=percentiles)

    return gen_cdf_match(src, perc_src, perc_ref,
                         min_val=min_val, max_val=max_val,
                         k=5)
Exemplo n.º 6
0
def test_unique_percentile_interpolation():
    """
    test generation of unique percentile values
    by interpolation or order k
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    p = ml_percentile(arr1, percentiles)
    src_perc = unique_percentiles_interpolate(p, percentiles=percentiles)
    assert len(p) == len(src_perc)

    nptest.assert_almost_equal(src_perc, [
        1., 1.025, 1.05, 1.1, 2., 3.5, 5., 5.3, 8.4, 8.93333333, 9.46666667,
        9.73333333, 10.
    ])
Exemplo n.º 7
0
def test_unique_percentile_interpolation():
    """
    test generation of unique percentile values
    by interpolation or order k
    """

    arr1 = np.array([1, 1, 1, 2, 2, 2, 5, 5, 6, 10, 10, 10, 10])
    percentiles = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]
    p = ml_percentile(arr1, percentiles)
    src_perc = unique_percentiles_interpolate(p,
                                              percentiles=percentiles)
    assert len(p) == len(src_perc)

    nptest.assert_almost_equal(src_perc, [1.,   1.025,   1.05,   1.1,
                                          2.,   3.5,   5.,   5.3,
                                          8.4,   8.93333333,   9.46666667,   9.73333333,  10.])