示例#1
0
def mpdist(ts, ts_b, w, threshold=0.05, n_jobs=1):
    """
    Computes the MPDist between the two series ts and ts_b. For more details
    refer to the paper:

    Matrix Profile XII: MPdist: A Novel Time Series Distance Measure to Allow 
    Data Mining in More Challenging Scenarios. Shaghayegh Gharghabi, 
    Shima Imani, Anthony Bagnall, Amirali Darvishzadeh, Eamonn Keogh. ICDM 2018

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    ts_b : array_like
        The time series to compare against.
    w : int
        The window size.
    threshold : float, Default 0.05
        The percentile in which the distance is taken from. By default it is
        set to 0.05 based on empircal research results from the paper. 
        Generally, you should not change this unless you know what you are
        doing! This value must be a float greater than 0 and less than 1.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    float : mpdist
        The MPDist.

    """
    ts = core.to_np_array(ts).astype('d')
    ts_b = core.to_np_array(ts_b).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)

    if not core.is_one_dimensional(ts):
        raise ValueError('ts must be one dimensional!')

    if not core.is_one_dimensional(ts_b):
        raise ValueError('ts_b must be one dimensional!')

    if not isinstance(threshold, float) or threshold <= 0 or threshold >= 1:
        raise ValueError('threshold must be a float greater than 0 and less'\
            ' than 1')

    mp, mpi, mpb, mpib = cympx_ab_parallel(ts, ts_b, w, 0, n_jobs)

    mp_abba = np.append(mp, mpb)
    data_len = len(ts) + len(ts_b)
    abba_sorted = np.sort(mp_abba[~core.nan_inf_indices(mp_abba)])

    distance = np.inf
    if len(abba_sorted) > 0:
        upper_idx = int(np.ceil(threshold * data_len)) - 1
        idx = np.min([len(abba_sorted) - 1, upper_idx])
        distance = abba_sorted[idx]

    return distance
示例#2
0
def mpdist(ts, ts_b, w, n_jobs=1):
    """
    Computes the MPDist between the two series ts and ts_b. For more details
    refer to the paper:

    Matrix Profile XII: MPdist: A Novel Time Series Distance Measure to Allow 
    Data Mining in More Challenging Scenarios. Shaghayegh Gharghabi, 
    Shima Imani, Anthony Bagnall, Amirali Darvishzadeh, Eamonn Keogh. ICDM 2018

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    ts_b : array_like
        The time series to compare against.
    w : int
        The window size.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    float :
        The MPDist.
    """
    ts = core.to_np_array(ts).astype('d')
    ts_b = core.to_np_array(ts_b).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)

    if not core.is_one_dimensional(ts):
        raise ValueError('ts must be one dimensional!')

    if not core.is_one_dimensional(ts_b):
        raise ValueError('ts_b must be one dimensional!')

    mp, mpi, mpb, mpib = cympx_ab_parallel(ts, ts_b, w, 0, n_jobs)

    mp_abba = np.append(mp, mpb)
    data_len = len(ts) + len(ts_b)
    abba_sorted = np.sort(mp_abba[~core.nan_inf_indices(mp_abba)])

    distance = np.inf
    if len(abba_sorted) > 0:
        idx = np.min([len(abba_sorted) - 1, int(np.ceil(0.05 * data_len)) - 1])
        distance = abba_sorted[idx]

    return distance
示例#3
0
def make_clipping_av(ts, window):
    """
    Utility function that returns an annotation vector such that
    subsequences that have more clipping have less importance.

    Parameters
    ----------
    ts : array_like
        The time series.
    window : int
        The specific window size used to compute the MatrixProfile.

    Returns
    -------
    np.array : av
        An annotation vector.

    Raises
    ------
    ValueError
        If ts is not a list or np.array.
        If ts is not one-dimensional.
        If window is not an integer.

    """
    try:
        ts = core.to_np_array(ts)
    except ValueError:
        raise ValueError('make_clipping_av expects ts to be array-like')

    if not core.is_one_dimensional(ts):
        raise ValueError('make_clipping_av expects ts to be one-dimensional')

    if not isinstance(window, int):
        raise ValueError('make_clipping_av expects window to be an integer')

    av = np.zeros(len(ts) - window + 1)

    max_val, min_val = np.max(ts), np.min(ts)
    for i in range(len(av)):
        num_clip = 0.0
        for j in range(window):
            if ts[i + j] == max_val or ts[i + j] == min_val:
                num_clip += 1
        av[i] = num_clip

    min_val = np.min(av)
    av -= min_val

    max_val = np.max(av)
    if max_val == 0:
        av = np.zeros(len(av))
    else:
        av = 1 - av / max_val

    return av
示例#4
0
def make_complexity_av(ts, window):
    """
    Utility function that returns an annotation vector where values are based
    on the complexity estimation of the signal.

    Parameters
    ----------
    ts : array_like
        The time series.
    window : int
        The specific window size used to compute the MatrixProfile.

    Returns
    -------
    np.array : av
        An annotation vector.

    Raises
    ------
    ValueError
        If ts is not a list or np.array.
        If ts is not one-dimensional.
        If window is not an integer.

    """
    try:
        ts = core.to_np_array(ts)
    except ValueError:
        raise ValueError('make_complexity_av expects ts to be array-like')

    if not core.is_one_dimensional(ts):
        raise ValueError('make_complexity_av expects ts to be one-dimensional')

    if not isinstance(window, int):
        raise ValueError('make_complexity_av expects window to be an integer')

    av = np.zeros(len(ts) - window + 1)

    for i in range(len(av)):
        ce = np.sum(np.diff(ts[i:i + window])**2)
        av[i] = np.sqrt(ce)

    max_val, min_val = np.max(av), np.min(av)
    if max_val == 0:
        av = np.zeros(len(av))
    else:
        av = (av - min_val) / max_val

    return av
示例#5
0
def make_meanstd_av(ts, window):
    """
    Utility function that returns an annotation vector where values are set to
    1 if the standard deviation is less than the mean of standard deviation.
    Otherwise, the values are set to 0.

    Parameters
    ----------
    ts : array_like
        The time series.
    window : int
        The specific window size used to compute the MatrixProfile.

    Returns
    -------
    np.array : av
        An annotation vector.

    Raises
    ------
    ValueError
        If ts is not a list or np.array.
        If ts is not one-dimensional.
        If window is not an integer.

    """
    try:
        ts = core.to_np_array(ts)
    except ValueError:
        raise ValueError('make_meanstd_av expects ts to be array-like')

    if not core.is_one_dimensional(ts):
        raise ValueError('make_meanstd_av expects ts to be one-dimensional')

    if not isinstance(window, int):
        raise ValueError('make_meanstd_av expects window to be an integer')

    av = np.zeros(len(ts) - window + 1)

    std = core.moving_std(ts, window)
    mu = np.mean(std)
    for i in range(len(av)):
        if std[i] < mu:
            av[i] = 1

    return av
示例#6
0
def make_default_av(ts, window):
    """
    Utility function that returns an annotation vector filled with 1s
    (should not change the matrix profile).

    Parameters
    ----------
    ts : array_like
        The time series.
    window : int
        The specific window size used to compute the MatrixProfile.

    Returns
    -------
    np.array : av
        An annotation vector.

    Raises
    ------
    ValueError
        If ts is not a list or np.array.
        If ts is not one-dimensional.
        If window is not an integer.

    """
    try:
        ts = core.to_np_array(ts)
    except ValueError:
        raise ValueError('make_default_av expects ts to be array-like')

    if not core.is_one_dimensional(ts):
        raise ValueError('make_default_av expects ts to be one-dimensional')

    if not isinstance(window, int):
        raise ValueError('make_default_av expects window to be an integer')

    av = np.ones(len(ts) - window + 1)

    return av
示例#7
0
def test_is_one_dimensional_valid():
    a = np.array([1, 2, 3, 4])
    assert (core.is_one_dimensional(a) == True)
示例#8
0
def test_is_one_dimensional_invalid():
    a = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6]])
    assert (core.is_one_dimensional(a) == False)
示例#9
0
def statistics(ts, window_size):
    """
	Compute global and moving statistics for the provided 1D time
	series. The statistics computed include the min, max, mean, std. and median
	over the window specified and globally.

	Parameters
	----------
	ts : array_like
        The time series.
    window_size: int
        The size of the window to compute moving statistics over.

    Returns
    -------
    dict :
    {
    	ts: the original time series,
		min: the global minimum,
		max: the global maximum,
		mean: the global mean,
		std: the global standard deviation,
		median: the global standard deviation,
		moving_min: the moving minimum,
		moving_max: the moving maximum,
		moving_mean: the moving mean,
		moving_std: the moving standard deviation,
		moving_median: the moving median,
		window_size: the window size provided,
		class: Statistics
    }

    Raises
    ------
    ValueError
    	If window_size is not an int.
        If window_size > len(ts)
        If ts is not a list or np.array.
        If ts is not 1D.
	"""
    if not core.is_array_like(ts):
        raise ValueError('ts must be array like')

    if not core.is_one_dimensional(ts):
        raise ValueError('The time series must be 1D')

    if not isinstance(window_size, int):
        raise ValueError('Expecting int for window_size')

    if window_size > len(ts):
        raise ValueError('Window size cannot be greater than len(ts)')

    if window_size < 3:
        raise ValueError('Window size cannot be less than 3')

    moving_mu, moving_sigma = core.moving_avg_std(ts, window_size)
    rolling_ts = core.rolling_window(ts, window_size)

    return {
        'ts': ts,
        'min': np.min(ts),
        'max': np.max(ts),
        'mean': np.mean(ts),
        'std': np.std(ts),
        'median': np.median(ts),
        'moving_min': np.min(rolling_ts, axis=1),
        'moving_max': np.max(rolling_ts, axis=1),
        'moving_mean': moving_mu,
        'moving_std': moving_sigma,
        'moving_median': np.median(rolling_ts, axis=1),
        'window_size': window_size,
        'class': 'Statistics'
    }