示例#1
0
def test_moving_average_min_observations():
    """
    Test moving average filter with datetimeindex.
    """
    test_jd = pd.date_range(start='2000-01-01', periods=12, freq='D')
    test_data = np.array(
        [1, 2, 3, 4, np.nan, np.nan, 8, 9, 10, np.nan, np.nan, np.nan],
        dtype=np.double)

    ser = pd.Series(test_data, index=test_jd)

    filtered = filtering.moving_average(ser,
                                        window_size=5,
                                        fillna=True,
                                        min_obs=3)

    np.testing.assert_allclose(
        filtered.values,
        [2., 2.5, 2.5, 3.0, 5.0, 7.0, 9.0, 9.0, 9.0, np.nan, np.nan, np.nan])

    filtered = filtering.moving_average(ser,
                                        window_size=5,
                                        fillna=True,
                                        min_obs=4)

    np.testing.assert_allclose(filtered.values, [
        np.nan, 2.5, 2.5, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
        np.nan, np.nan, np.nan
    ])
示例#2
0
def calc_climatology(Ser,
                     moving_avg_orig=5,
                     moving_avg_clim=30,
                     median=False,
                     timespan=None):
    '''
    Calculates the climatology of a data set

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex or julian date)

    moving_avg_orig : float, optional
        The size of the moving_average window [days] that will be applied on the
        input Series (gap filling, short-term rainfall correction)
        Default: 5

    moving_avg_clim : float, optional
        The size of the moving_average window [days] that will be applied on the
        calculated climatology (long-term event correction)
        Default: 35

    median : boolean, optional
        if set to True, the climatology will be based on the median conditions

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        Set this to calculate the climatology based on a subset of the input
        Series

    Returns
    -------
    climatology : pandas.Series
        Series containing the calculated climatology
    '''

    if timespan is not None:
        Ser = Ser.truncate(before=timespan[0], after=timespan[1])

    Ser = moving_average(Ser, window_size=moving_avg_orig)

    Ser = pd.DataFrame(Ser)

    if type(Ser.index) == pd.DatetimeIndex:

        doys = doy(Ser.index.month, Ser.index.day)

    else:
        year, month, day = julian2date(Ser.index.values)[0:3]
        doys = doy(month, day)


    Ser['doy'] = doys


    if median:
        clim = Ser.groupby('doy').median()
    else:
        clim = Ser.groupby('doy').mean()

    return moving_average(pd.Series(clim.values.flatten(), index=clim.index.values), window_size=moving_avg_clim)
示例#3
0
def calc_anomaly(Ser,
                 window_size=35,
                 climatology=None):
    '''
    Calculates the anomaly of a time series (Pandas series).
    Both, climatology based, or moving-average based anomalies can be
    calculated

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex)

    window_size : float, optional
        The window-size [days] of the moving-average window to calculate the
        anomaly reference (only used if climatology is not provided)
        Default: 35 (days)

    climatology : pandas.Series (index: 1-366), optional
        if provided, anomalies will be based on the climatology

    timespann : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        If set, only a subset

    Returns
    -------
    anomaly : pandas.Series
        Series containing the calculated anomalies
    '''

    if climatology is not None:

        if type(Ser.index) == pd.DatetimeIndex:

            doys = doy(Ser.index.month, Ser.index.day)

        else:
            year, month, day = julian2date(Ser.index.values)[0:3]
            doys = doy(month, day)

        df = pd.DataFrame()
        df['absolute'] = Ser
        df['doy'] = doys

        clim = pd.DataFrame(climatology, columns=['climatology'])

        df = df.join(clim, on='doy', how='left')

        anomaly = df['absolute'] - df['climatology']
        anomaly.index = df.index


    else:
        reference = moving_average(Ser, window_size=window_size)
        anomaly = Ser - reference

    return anomaly
示例#4
0
def test_moving_average_size_1():
    """
    Test moving average filter with input size 1.
    """
    test_jd = np.arange(1, dtype=np.double)
    test_data = np.array([1], dtype=np.double)

    ser = pd.Series(test_data, index=test_jd)

    filtered = filtering.moving_average(ser, window_size=5)

    np.testing.assert_allclose(filtered.values, [1.])
示例#5
0
def test_moving_average_size_1():
    """
    Test moving average filter with input size 1.
    """
    test_jd = np.arange(1, dtype=np.double)
    test_data = np.array(
        [1], dtype=np.double)

    ser = pd.Series(test_data, index=test_jd)

    filtered = filtering.moving_average(ser, window_size=5)

    np.testing.assert_allclose(filtered.values, [1.])
示例#6
0
def test_moving_average_dt_index():
    """
    Test moving average filter with datetimeindex.
    """
    test_jd = pd.date_range(start='2000-01-01', periods=10, freq='D')
    test_data = np.array(
        [1, 2, 3, 4, -999999.0, 6, 7, 8, 9, np.nan], dtype=np.double)

    ser = pd.Series(test_data, index=test_jd)

    filtered = filtering.moving_average(ser, window_size=5)

    np.testing.assert_allclose(filtered.values, [2., 2.5, 2.5, 3.75, np.nan, 6.25,
                                                 7.5, 7.5, 8., np.nan])
示例#7
0
def test_moving_average():
    """
    Test moving average filter.
    """
    test_jd = np.arange(10, dtype=np.double)
    test_data = np.array(
        [1, 2, 3, 4, -999999.0, 6, 7, 8, 9, np.nan], dtype=np.double)

    ser = pd.Series(test_data, index=test_jd)

    filtered = filtering.moving_average(ser, window_size=5)

    np.testing.assert_allclose(filtered.values, [2., 2.5, 2.5, 3.75, np.nan, 6.25,
                                                 7.5, 7.5, 8., np.nan])
示例#8
0
def mask_vod():

    dir_in = r'D:\data_sets\AMSR2\timeseries_w_vod'
    dir_out = r'D:\data_sets\AMSR2\timeseries'

    files = os.listdir(dir_in)

    for fname in files:
        df = pd.read_csv(os.path.join(dir_in, fname), index_col=0)

        df.index = pd.to_datetime(df['vod'].index)
        df['vod_ma'] = moving_average(df['vod'], window_size=35)
        Ser = df[df['vod_ma'] <= 0.6]['sm']

        if len(Ser) > 10:
            Ser.to_csv(os.path.join(dir_out, fname))
示例#9
0
def calc_climatology(Ser,
                     moving_avg_orig=5,
                     moving_avg_clim=30,
                     median=False,
                     timespan=None,
                     fill=np.nan,
                     wraparound=False):
    '''
    Calculates the climatology of a data set.

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex or julian date)

    moving_avg_orig : float, optional
        The size of the moving_average window [days] that will be applied on the
        input Series (gap filling, short-term rainfall correction)
        Default: 5

    moving_avg_clim : float, optional
        The size of the moving_average window [days] that will be applied on the
        calculated climatology (long-term event correction)
        Default: 35

    median : boolean, optional
        if set to True, the climatology will be based on the median conditions

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        Set this to calculate the climatology based on a subset of the input
        Series

    fill : float or int, optional
        Fill value to use for days on which no climatology exists

    wraparound : boolean, optional
        If set then the climatology is wrapped around at the edges before
        doing the second running average (long-term event correction)

    Returns
    -------
    climatology : pandas.Series
        Series containing the calculated climatology
        Always has 366 values behaving like a leap year
    '''

    if timespan is not None:
        Ser = Ser.truncate(before=timespan[0], after=timespan[1])

    Ser = moving_average(Ser, window_size=moving_avg_orig)

    Ser = pd.DataFrame(Ser)

    if type(Ser.index) == pd.DatetimeIndex:

        doys = doy(Ser.index.month, Ser.index.day)

    else:
        year, month, day = julian2date(Ser.index.values)[0:3]
        doys = doy(month, day)

    Ser['doy'] = doys

    if median:
        clim = Ser.groupby('doy').median()
    else:
        clim = Ser.groupby('doy').mean()

    clim_ser = pd.Series(clim.values.flatten(),
                         index=clim.index.values)

    if wraparound:
        index_old = clim_ser.index.copy()
        left_mirror = clim_ser.iloc[-moving_avg_clim:]
        right_mirror = clim_ser.iloc[:moving_avg_clim]
        # Shift index to start at 366 - index at -moving_avg_clim
        # to run over a whole year while keeping gaps the same size
        right_mirror.index = right_mirror.index + 366 * 2
        clim_ser.index = clim_ser.index + 366
        clim_ser = pd.concat([left_mirror,
                              clim_ser,
                              right_mirror])

        clim_ser = moving_average(clim_ser, window_size=moving_avg_clim)
        clim_ser = clim_ser.iloc[moving_avg_clim:-moving_avg_clim]
        clim_ser.index = index_old
    else:
        clim_ser = moving_average(clim_ser, window_size=moving_avg_clim)

    clim_ser = clim_ser.reindex(np.arange(366) + 1)
    clim_ser = clim_ser.fillna(fill)
    return clim_ser
示例#10
0
def calc_anomaly(Ser,
                 window_size=35,
                 climatology=None,
                 respect_leap_years=True,
                 return_clim=False):
    '''
    Calculates the anomaly of a time series (Pandas series).
    Both, climatology based, or moving-average based anomalies can be
    calculated

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex)

    window_size : float, optional
        The window-size [days] of the moving-average window to calculate the
        anomaly reference (only used if climatology is not provided)
        Default: 35 (days)

    climatology : pandas.Series (index: 1-366), optional
        if provided, anomalies will be based on the climatology

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        If set, only a subset

    respect_leap_years : boolean, optional
        If set then leap years will be respected during matching of the climatology
        to the time series

    return_clim : boolean, optional
        if set to true the return argument will be a DataFrame which
        also contains the climatology time series.
        Only has an effect if climatology is used.

    Returns
    -------
    anomaly : pandas.Series
        Series containing the calculated anomalies
    '''

    if climatology is not None:

        if type(Ser.index) == pd.DatetimeIndex:

            year, month, day = (np.asarray(Ser.index.year),
                                np.asarray(Ser.index.month),
                                np.asarray(Ser.index.day))

        else:
            year, month, day = julian2date(Ser.index.values)[0:3]

        if respect_leap_years:
            doys = doy(month, day, year)
        else:
            doys = doy(month, day)

        df = pd.DataFrame()
        df['absolute'] = Ser
        df['doy'] = doys

        clim = pd.DataFrame({'climatology': climatology})

        df = df.join(clim, on='doy', how='left')

        anomaly = df['absolute'] - df['climatology']
        anomaly.index = df.index

        if return_clim:
            anomaly = pd.DataFrame({'anomaly': anomaly})
            anomaly['climatology'] = df['climatology']

    else:
        reference = moving_average(Ser, window_size=window_size)
        anomaly = Ser - reference

    return anomaly
示例#11
0
def calc_climatology(Ser,
                     moving_avg_orig=5,
                     moving_avg_clim=30,
                     median=False,
                     timespan=None,
                     fill=np.nan,
                     wraparound=False,
                     respect_leap_years=False,
                     interpolate_leapday=False,
                     fillna=True,
                     min_obs_orig=1,
                     min_obs_clim=1):
    '''
    Calculates the climatology of a data set.

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex or julian date)

    moving_avg_orig : float, optional
        The size of the moving_average window [days] that will be applied on the
        input Series (gap filling, short-term rainfall correction)
        Default: 5

    moving_avg_clim : float, optional
        The size of the moving_average window [days] that will be applied on the
        calculated climatology (long-term event correction)
        Default: 35

    median : boolean, optional
        if set to True, the climatology will be based on the median conditions

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        Set this to calculate the climatology based on a subset of the input
        Series

    fill : float or int, optional
        Fill value to use for days on which no climatology exists

    wraparound : boolean, optional
        If set then the climatology is wrapped around at the edges before
        doing the second running average (long-term event correction)

    respect_leap_years : boolean, optional
        If set then leap years will be respected during the calculation of 
        the climatology
        Default: False

    fillna: boolean, optional
        If set, then the moving average used for the calculation of the
        climatology will be filled at the nan-values

    min_obs_orig: int
        Minimum observations required to give a valid output in the first
        moving average applied on the input series

    min_obs_clim: int
        Minimum observations required to give a valid output in the second
        moving average applied on the calculated climatology

    Returns
    -------
    climatology : pandas.Series
        Series containing the calculated climatology
        Always has 366 values behaving like a leap year
    '''

    if timespan is not None:
        Ser = Ser.truncate(before=timespan[0], after=timespan[1])

    Ser = moving_average(Ser, window_size=moving_avg_orig, fillna=fillna, min_obs=min_obs_orig)

    Ser = pd.DataFrame(Ser)

    if type(Ser.index) == pd.DatetimeIndex:
        year, month, day = (np.asarray(Ser.index.year),
                            np.asarray(Ser.index.month),
                            np.asarray(Ser.index.day))
    else:
        year, month, day = julian2date(Ser.index.values)[0:3]




    if respect_leap_years:
        doys = doy(month, day, year)
    else:
        doys = doy(month, day)


    Ser['doy'] = doys

    if median:
        clim = Ser.groupby('doy').median()
    else:
        clim = Ser.groupby('doy').mean()

    clim_ser = pd.Series(clim.values.flatten(),
                         index=clim.index.values)

    if interpolate_leapday and not respect_leap_years:
        clim_ser[60] = np.mean((clim_ser[59], clim_ser[61]))
    elif interpolate_leapday and respect_leap_years:
        clim_ser[366] = np.mean((clim_ser[365], clim_ser[1]))

    if wraparound:
        index_old = clim_ser.index.copy()
        left_mirror = clim_ser.iloc[-moving_avg_clim:]
        right_mirror = clim_ser.iloc[:moving_avg_clim]
        # Shift index to start at 366 - index at -moving_avg_clim
        # to run over a whole year while keeping gaps the same size
        right_mirror.index = right_mirror.index + 366 * 2
        clim_ser.index = clim_ser.index + 366
        clim_ser = pd.concat([left_mirror,
                              clim_ser,
                              right_mirror])

        clim_ser = moving_average(clim_ser, window_size=moving_avg_clim, fillna=fillna, min_obs=min_obs_clim)
        clim_ser = clim_ser.iloc[moving_avg_clim:-moving_avg_clim]
        clim_ser.index = index_old
    else:
        clim_ser = moving_average(clim_ser, window_size=moving_avg_clim, fillna=fillna, min_obs=min_obs_clim)

    clim_ser = clim_ser.reindex(np.arange(366) + 1)
    clim_ser = clim_ser.fillna(fill)
    return clim_ser
示例#12
0
def calc_anomaly(Ser,
                 window_size=35,
                 climatology=None,
                 respect_leap_years=True,
                 return_clim=False):
    '''
    Calculates the anomaly of a time series (Pandas series).
    Both, climatology based, or moving-average based anomalies can be
    calculated

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex)

    window_size : float, optional
        The window-size [days] of the moving-average window to calculate the
        anomaly reference (only used if climatology is not provided)
        Default: 35 (days)

    climatology : pandas.Series (index: 1-366), optional
        if provided, anomalies will be based on the climatology

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        If set, only a subset

    respect_leap_years : boolean, optional
        If set then leap years will be respected during matching of the climatology
        to the time series

    return_clim : boolean, optional
        if set to true the return argument will be a DataFrame which
        also contains the climatology time series.
        Only has an effect if climatology is used.

    Returns
    -------
    anomaly : pandas.Series
        Series containing the calculated anomalies
    '''

    if climatology is not None:

        if type(Ser.index) == pd.DatetimeIndex:

            year, month, day = (np.asarray(Ser.index.year),
                                np.asarray(Ser.index.month),
                                np.asarray(Ser.index.day))

        else:
            year, month, day = julian2date(Ser.index.values)[0:3]

        if respect_leap_years:
            doys = doy(month, day, year)
        else:
            doys = doy(month, day)

        df = pd.DataFrame()
        df['absolute'] = Ser
        df['doy'] = doys

        clim = pd.DataFrame({'climatology': climatology})

        df = df.join(clim, on='doy', how='left')

        anomaly = df['absolute'] - df['climatology']
        anomaly.index = df.index

        if return_clim:
            anomaly = pd.DataFrame({'anomaly': anomaly})
            anomaly['climatology'] = df['climatology']

    else:
        reference = moving_average(Ser, window_size=window_size)
        anomaly = Ser - reference

    return anomaly
示例#13
0
def calc_climatology(Ser,
                     moving_avg_orig=5,
                     moving_avg_clim=35,
                     moving_avg_month_clim=3,
                     median=False,
                     timespan=None,
                     fill=np.nan,
                     wraparound=True,
                     respect_leap_years=False,
                     interpolate_leapday=False,
                     fillna=True,
                     min_obs_orig=1,
                     min_obs_clim=1,
                     unit="day"):
    """
    Calculates the climatology of a data set.

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex or julian date)

    moving_avg_orig : float, optional
        The size of the moving_average window [days] that will be applied on the
        input Series (gap filling, short-term rainfall correction)
        Default: 5

    moving_avg_clim : float, optional
        The size of the moving_average window in days that will be applied on the
        calculated climatology (long-term event correction)
        Default: 35

    moving_avg_month_clim: : float, optional
        Same as for 'moving_avg_clim', but applied to monthly climatologies. In case
        unit='month', this value overrides 'moving_avg_clim'
        Default: 3

    median : boolean, optional
        if set to True, the climatology will be based on the median conditions

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        Set this to calculate the climatology based on a subset of the input
        Series

    fill : float or int, optional
        Fill value to use for days on which no climatology exists

    wraparound : boolean, optional
        If set then the climatology is wrapped around at the edges before
        doing the second running average (long-term event correction)

    respect_leap_years : boolean, optional
        If set then leap years will be respected during the calculation of
        the climatology. Only valid with 'unit' value set to 'day'.
        Default: False

    interpolate_leapday: boolean, optional
        <description>. Only valid with 'unit' value set to 'day'.
        Default: False

    fillna: boolean, optional
        If set, then the moving average used for the calculation of the
        climatology will be filled at the nan-values

    min_obs_orig: int
        Minimum observations required to give a valid output in the first
        moving average applied on the input series

    min_obs_clim: int
        Minimum observations required to give a valid output in the second
        moving average applied on the calculated climatology

    unit: str, optional
        Unit of the year to apply the climatology calculation to. Currently,
        supported options are 'day', 'month'.
        Default: 'day'

    Returns
    -------
    climatology : pandas.Series
        Series containing the calculated climatology
        Always has 366 values behaving like a leap year
    """
    if unit != "day":
        respect_leap_years, interpolate_leapday = False, False

    if unit == "month":
        moving_avg_clim = moving_avg_month_clim

    if timespan is not None:
        Ser = Ser.truncate(before=timespan[0], after=timespan[1])

    Ser = moving_average(Ser,
                         window_size=moving_avg_orig,
                         fillna=fillna,
                         min_obs=min_obs_orig)

    Ser = pd.DataFrame(Ser)

    if type(Ser.index) == pd.DatetimeIndex:
        year, month, day = (np.asarray(Ser.index.year),
                            np.asarray(Ser.index.month),
                            np.asarray(Ser.index.day))
    else:
        year, month, day = julian2date(Ser.index.values)[0:3]

    # provide indices for the selected unit
    indices, n_idx = _index_units(year,
                                  month,
                                  day,
                                  unit=unit,
                                  respect_leap_years=respect_leap_years)
    Ser['unit'] = indices

    if median:
        clim = Ser.groupby('unit').median()
    else:
        clim = Ser.groupby('unit').mean()

    clim_ser = pd.Series(clim.values.flatten(), index=clim.index.values)

    clim_ser = clim_ser.reindex(np.arange(n_idx) + 1)

    if wraparound:
        index_old = clim_ser.index.copy()
        left_mirror = clim_ser.iloc[-moving_avg_clim:]
        right_mirror = clim_ser.iloc[:moving_avg_clim]
        # Shift index to start at n_idx - index at -moving_avg_clim
        # to run over a whole year while keeping gaps the same size
        right_mirror.index = right_mirror.index + n_idx * 2
        clim_ser.index = clim_ser.index + n_idx
        clim_ser = pd.concat([left_mirror, clim_ser, right_mirror])

        clim_ser = moving_average(clim_ser,
                                  window_size=moving_avg_clim,
                                  fillna=fillna,
                                  min_obs=min_obs_clim)
        clim_ser = clim_ser.iloc[moving_avg_clim:-moving_avg_clim]
        clim_ser.index = index_old
    else:
        clim_ser = moving_average(clim_ser,
                                  window_size=moving_avg_clim,
                                  fillna=fillna,
                                  min_obs=min_obs_clim)

    # keep hardcoding as it's only for doys
    if interpolate_leapday and not respect_leap_years:
        clim_ser[60] = np.mean((clim_ser[59], clim_ser[61]))
    elif interpolate_leapday and respect_leap_years:
        clim_ser[366] = np.mean((clim_ser[365], clim_ser[1]))

    clim_ser = clim_ser.fillna(fill)

    return clim_ser