示例#1
0
def mpdist(ts, ts_b, w, threshold=0.05, n_jobs=1):
    """
    Computes the MPDist between the two series ts and ts_b. For more details
    refer to the paper:

    Matrix Profile XII: MPdist: A Novel Time Series Distance Measure to Allow 
    Data Mining in More Challenging Scenarios. Shaghayegh Gharghabi, 
    Shima Imani, Anthony Bagnall, Amirali Darvishzadeh, Eamonn Keogh. ICDM 2018

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    ts_b : array_like
        The time series to compare against.
    w : int
        The window size.
    threshold : float, Default 0.05
        The percentile in which the distance is taken from. By default it is
        set to 0.05 based on empircal research results from the paper. 
        Generally, you should not change this unless you know what you are
        doing! This value must be a float greater than 0 and less than 1.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    float : mpdist
        The MPDist.

    """
    ts = core.to_np_array(ts).astype('d')
    ts_b = core.to_np_array(ts_b).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)

    if not core.is_one_dimensional(ts):
        raise ValueError('ts must be one dimensional!')

    if not core.is_one_dimensional(ts_b):
        raise ValueError('ts_b must be one dimensional!')

    if not isinstance(threshold, float) or threshold <= 0 or threshold >= 1:
        raise ValueError('threshold must be a float greater than 0 and less'\
            ' than 1')

    mp, mpi, mpb, mpib = cympx_ab_parallel(ts, ts_b, w, 0, n_jobs)

    mp_abba = np.append(mp, mpb)
    data_len = len(ts) + len(ts_b)
    abba_sorted = np.sort(mp_abba[~core.nan_inf_indices(mp_abba)])

    distance = np.inf
    if len(abba_sorted) > 0:
        upper_idx = int(np.ceil(threshold * data_len)) - 1
        idx = np.min([len(abba_sorted) - 1, upper_idx])
        distance = abba_sorted[idx]

    return distance
示例#2
0
def mpdist(ts, ts_b, w, n_jobs=1):
    """
    Computes the MPDist between the two series ts and ts_b. For more details
    refer to the paper:

    Matrix Profile XII: MPdist: A Novel Time Series Distance Measure to Allow 
    Data Mining in More Challenging Scenarios. Shaghayegh Gharghabi, 
    Shima Imani, Anthony Bagnall, Amirali Darvishzadeh, Eamonn Keogh. ICDM 2018

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    ts_b : array_like
        The time series to compare against.
    w : int
        The window size.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    float :
        The MPDist.
    """
    ts = core.to_np_array(ts).astype('d')
    ts_b = core.to_np_array(ts_b).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)

    if not core.is_one_dimensional(ts):
        raise ValueError('ts must be one dimensional!')

    if not core.is_one_dimensional(ts_b):
        raise ValueError('ts_b must be one dimensional!')

    mp, mpi, mpb, mpib = cympx_ab_parallel(ts, ts_b, w, 0, n_jobs)

    mp_abba = np.append(mp, mpb)
    data_len = len(ts) + len(ts_b)
    abba_sorted = np.sort(mp_abba[~core.nan_inf_indices(mp_abba)])

    distance = np.inf
    if len(abba_sorted) > 0:
        idx = np.min([len(abba_sorted) - 1, int(np.ceil(0.05 * data_len)) - 1])
        distance = abba_sorted[idx]

    return distance
示例#3
0
def mpx(ts, w, query=None, cross_correlation=False, n_jobs=1):
    """
    The MPX algorithm computes the matrix profile without using the FFT.

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    w : int
        The window size.
    query : array_like
        Optionally a query series.
    cross_correlation : bool, Default=False
        Setermine if cross_correlation distance should be returned. It defaults
        to Euclidean Distance.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    A dict of key data points computed.
    {
        'mp': The matrix profile,
        'pi': The matrix profile 1NN indices,
        'rmp': The right matrix profile,
        'rpi': The right matrix profile 1NN indices,
        'lmp': The left matrix profile,
        'lpi': The left matrix profile 1NN indices,
        'metric': The distance metric computed for the mp,
        'w': The window size used to compute the matrix profile,
        'ez': The exclusion zone used,
        'join': Flag indicating if a similarity join was computed,
        'sample_pct': Percentage of samples used in computing the MP,
        'data': {
            'ts': Time series data,
            'query': Query data if supplied
        }
        'class': "MatrixProfile"
        'algorithm': "mpx"
    }
    """
    ts = core.to_np_array(ts).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)
    is_join = False

    if core.is_array_like(query):
        query = core.to_np_array(query).astype('d')
        is_join = True
        mp, mpi, mpb, mpib = cympx_ab_parallel(ts, query, w,
                                               int(cross_correlation), n_jobs)
    else:
        mp, mpi = cympx_parallel(ts, w, int(cross_correlation), n_jobs)

    mp = np.asarray(mp)
    mpi = np.asarray(mpi)
    distance_metric = 'euclidean'
    if cross_correlation:
        distance_metric = 'cross_correlation'

    return {
        'mp': mp,
        'pi': mpi,
        'rmp': None,
        'rpi': None,
        'lmp': None,
        'lpi': None,
        'metric': distance_metric,
        'w': w,
        'ez': int(np.floor(w / 4)),
        'join': is_join,
        'sample_pct': 1,
        'data': {
            'ts': ts,
            'query': query
        },
        'class': 'MatrixProfile',
        'algorithm': 'mpx'
    }
示例#4
0
def mpx(ts, w, query=None, cross_correlation=False, n_jobs=1):
    """
    The MPX algorithm computes the matrix profile without using the FFT.

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    w : int
        The window size.
    query : array_like
        Optionally a query series.
    cross_correlation : bool, Default=False
        Determine if cross_correlation distance should be returned. It defaults
        to Euclidean Distance.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    dict : profile
        A MatrixProfile data structure.
        
        >>> {
        >>>     'mp': The matrix profile,
        >>>     'pi': The matrix profile 1NN indices,
        >>>     'rmp': The right matrix profile,
        >>>     'rpi': The right matrix profile 1NN indices,
        >>>     'lmp': The left matrix profile,
        >>>     'lpi': The left matrix profile 1NN indices,
        >>>     'metric': The distance metric computed for the mp,
        >>>     'w': The window size used to compute the matrix profile,
        >>>     'ez': The exclusion zone used,
        >>>     'join': Flag indicating if a similarity join was computed,
        >>>     'sample_pct': Percentage of samples used in computing the MP,
        >>>     'data': {
        >>>         'ts': Time series data,
        >>>         'query': Query data if supplied
        >>>     }
        >>>     'class': "MatrixProfile"
        >>>     'algorithm': "mpx"
        >>> }

    """
    # --- Drew's addition ---
    dtype = core.get_dtype(ts)
    ts = core.to_np_array(ts).astype(dtype)
    #ts = core.to_np_array(ts).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)
    is_join = False

    if core.is_array_like(query):
        query = core.to_np_array(query).astype(dtype)
        #query = core.to_np_array(query).astype('d')
        is_join = True
        mp, mpi, mpb, mpib = cympx_ab_parallel(ts, query, w,
                                               int(cross_correlation), n_jobs)
    else:
        # --- More changes... ---
        if np.issubdtype(dtype, 'U'):
            #ts = np.array([ord(x) for x in ts], dtype = 'd')
            mp, mpi = mpx_single_char(ts, w)
        else:
            mp, mpi = cympx_parallel(ts, w, int(cross_correlation), n_jobs)
        # --- That's it for now... ---
        #mp, mpi = cympx_parallel(ts, w, int(cross_correlation), n_jobs)

    mp = np.asarray(mp)
    mpi = np.asarray(mpi)
    if np.issubdtype(dtype, 'U'):
        distance_metric = 'hamming'
    else:
        distance_metric = 'euclidean'
        if cross_correlation:
            distance_metric = 'cross_correlation'

    return {
        'mp': mp,
        'pi': mpi,
        'rmp': None,
        'rpi': None,
        'lmp': None,
        'lpi': None,
        'metric': distance_metric,
        'w': w,
        'ez': int(np.ceil(w / 4.0)) if is_join else 0,
        'join': is_join,
        'sample_pct': 1,
        'data': {
            'ts': ts,
            'query': query
        },
        'class': 'MatrixProfile',
        'algorithm': 'mpx'
    }