Пример #1
0
def mpdist(ts, ts_b, w, threshold=0.05, n_jobs=1):
    """
    Computes the MPDist between the two series ts and ts_b. For more details
    refer to the paper:

    Matrix Profile XII: MPdist: A Novel Time Series Distance Measure to Allow 
    Data Mining in More Challenging Scenarios. Shaghayegh Gharghabi, 
    Shima Imani, Anthony Bagnall, Amirali Darvishzadeh, Eamonn Keogh. ICDM 2018

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    ts_b : array_like
        The time series to compare against.
    w : int
        The window size.
    threshold : float, Default 0.05
        The percentile in which the distance is taken from. By default it is
        set to 0.05 based on empircal research results from the paper. 
        Generally, you should not change this unless you know what you are
        doing! This value must be a float greater than 0 and less than 1.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    float : mpdist
        The MPDist.

    """
    ts = core.to_np_array(ts).astype('d')
    ts_b = core.to_np_array(ts_b).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)

    if not core.is_one_dimensional(ts):
        raise ValueError('ts must be one dimensional!')

    if not core.is_one_dimensional(ts_b):
        raise ValueError('ts_b must be one dimensional!')

    if not isinstance(threshold, float) or threshold <= 0 or threshold >= 1:
        raise ValueError('threshold must be a float greater than 0 and less'\
            ' than 1')

    mp, mpi, mpb, mpib = cympx_ab_parallel(ts, ts_b, w, 0, n_jobs)

    mp_abba = np.append(mp, mpb)
    data_len = len(ts) + len(ts_b)
    abba_sorted = np.sort(mp_abba[~core.nan_inf_indices(mp_abba)])

    distance = np.inf
    if len(abba_sorted) > 0:
        upper_idx = int(np.ceil(threshold * data_len)) - 1
        idx = np.min([len(abba_sorted) - 1, upper_idx])
        distance = abba_sorted[idx]

    return distance
Пример #2
0
def mpdist(ts, ts_b, w, n_jobs=1):
    """
    Computes the MPDist between the two series ts and ts_b. For more details
    refer to the paper:

    Matrix Profile XII: MPdist: A Novel Time Series Distance Measure to Allow 
    Data Mining in More Challenging Scenarios. Shaghayegh Gharghabi, 
    Shima Imani, Anthony Bagnall, Amirali Darvishzadeh, Eamonn Keogh. ICDM 2018

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    ts_b : array_like
        The time series to compare against.
    w : int
        The window size.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    float :
        The MPDist.
    """
    ts = core.to_np_array(ts).astype('d')
    ts_b = core.to_np_array(ts_b).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)

    if not core.is_one_dimensional(ts):
        raise ValueError('ts must be one dimensional!')

    if not core.is_one_dimensional(ts_b):
        raise ValueError('ts_b must be one dimensional!')

    mp, mpi, mpb, mpib = cympx_ab_parallel(ts, ts_b, w, 0, n_jobs)

    mp_abba = np.append(mp, mpb)
    data_len = len(ts) + len(ts_b)
    abba_sorted = np.sort(mp_abba[~core.nan_inf_indices(mp_abba)])

    distance = np.inf
    if len(abba_sorted) > 0:
        idx = np.min([len(abba_sorted) - 1, int(np.ceil(0.05 * data_len)) - 1])
        distance = abba_sorted[idx]

    return distance
Пример #3
0
def mpx(ts, w, query=None, cross_correlation=False, n_jobs=1):
    """
    The MPX algorithm computes the matrix profile without using the FFT.

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    w : int
        The window size.
    query : array_like
        Optionally a query series.
    cross_correlation : bool, Default=False
        Setermine if cross_correlation distance should be returned. It defaults
        to Euclidean Distance.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    A dict of key data points computed.
    {
        'mp': The matrix profile,
        'pi': The matrix profile 1NN indices,
        'rmp': The right matrix profile,
        'rpi': The right matrix profile 1NN indices,
        'lmp': The left matrix profile,
        'lpi': The left matrix profile 1NN indices,
        'metric': The distance metric computed for the mp,
        'w': The window size used to compute the matrix profile,
        'ez': The exclusion zone used,
        'join': Flag indicating if a similarity join was computed,
        'sample_pct': Percentage of samples used in computing the MP,
        'data': {
            'ts': Time series data,
            'query': Query data if supplied
        }
        'class': "MatrixProfile"
        'algorithm': "mpx"
    }
    """
    ts = core.to_np_array(ts).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)
    is_join = False

    if core.is_array_like(query):
        query = core.to_np_array(query).astype('d')
        is_join = True
        mp, mpi, mpb, mpib = cympx_ab_parallel(ts, query, w,
                                               int(cross_correlation), n_jobs)
    else:
        mp, mpi = cympx_parallel(ts, w, int(cross_correlation), n_jobs)

    mp = np.asarray(mp)
    mpi = np.asarray(mpi)
    distance_metric = 'euclidean'
    if cross_correlation:
        distance_metric = 'cross_correlation'

    return {
        'mp': mp,
        'pi': mpi,
        'rmp': None,
        'rpi': None,
        'lmp': None,
        'lpi': None,
        'metric': distance_metric,
        'w': w,
        'ez': int(np.floor(w / 4)),
        'join': is_join,
        'sample_pct': 1,
        'data': {
            'ts': ts,
            'query': query
        },
        'class': 'MatrixProfile',
        'algorithm': 'mpx'
    }
Пример #4
0
def analyze(ts, query=None, windows=None, sample_pct=1.0, threshold=0.98, n_jobs=1):
    """
    Runs an appropriate workflow based on the parameters passed in. The goal
    of this function is to compute all fundamental algorithms on the provided
    time series data. For now the following is computed:

    1. Matrix Profile - exact or approximate based on sample_pct given that a
       window is provided. By default is the exact algorithm.
    2. Top Motifs - The top 3 motifs are found.
    3. Top Discords - The top 3 discords are found.
    4. Plot MP, Motifs and Discords

    When a window is not provided or more than a single window is provided,
    the PMP is computed:

    1. Compute UPPER window when no window(s) is provided
    2. Compute PMP for all windows
    3. Top Motifs
    4. Top Discords
    5. Plot PMP, motifs and discords.

    Parameters
    ----------
    ts : array_like
        The time series to analyze.
    query : array_like, Optional
        The query to analyze. Note that when computing the PMP the query is
		ignored!
    windows : int or array_like, Optional
        The window(s) to compute the MatrixProfile. Note that it may be an int
		for a single matrix profile computation or an array of ints for
		computing the pan matrix profile.
	sample_pct : float, default = 1
        A float between 0 and 1 representing how many samples to compute for
        the MP or PMP. When it is 1, the exact algorithm is used.
    threshold : float, Default 0.98
        The correlation coefficient used as the threshold. It should be between
        0 and 1. This is used to compute the upper window size when no 
        window(s) is given.
    n_jobs : int, Default = 1
        Number of cpu cores to use.

    Returns
    -------
    tuple : (profile, figures)
        The appropriate PMP or MP profile object and associated figures.
    """
    result = None

    # determine proper number of jobs
    n_jobs = core.valid_n_jobs(n_jobs)

    # determine what algorithm to use based on params
    no_window = isinstance(windows, type(None))
    many_windows = core.is_array_like(windows) and len(windows) > 1
    single_window = isinstance(windows, int) or \
                    (core.is_array_like(windows) and len(windows) == 1)
    is_exact = sample_pct >= 1
    is_approx = sample_pct > 0 and sample_pct < 1

    # use PMP with no window provided
    if no_window or many_windows:
        result = analyze_pmp(ts, query, sample_pct, threshold, windows=windows, n_jobs=n_jobs)
    elif single_window and is_exact:
        result = analyze_mp_exact(ts, query, windows, n_jobs=n_jobs)
    elif single_window and is_approx:
        result = analyze_mp_approximate(ts, query, windows, sample_pct, n_jobs=n_jobs)
    else:
        raise RuntimeError('Param combination resulted in an uknown operation')

    return result
Пример #5
0
def stomp(ts, window_size, query=None, n_jobs=1):
    """
    Computes matrix profiles for a single dimensional time series using the 
    parallelized STOMP algorithm (by default). Ray or Python's multiprocessing
    library may be used. When you have initialized Ray on your machine, 
    it takes priority over using Python's multiprocessing.

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    window_size: int
        The size of the window to compute the matrix profile over.
    query : array_like
        Optionally, a query can be provided to perform a similarity join.
    n_jobs : int, Default = 1
        Number of cpu cores to use.

    Returns
    -------
    dict : profile
        A MatrixProfile data structure.
        
        >>> {
        >>>     'mp': The matrix profile,
        >>>     'pi': The matrix profile 1NN indices,
        >>>     'rmp': The right matrix profile,
        >>>     'rpi': The right matrix profile 1NN indices,
        >>>     'lmp': The left matrix profile,
        >>>     'lpi': The left matrix profile 1NN indices,
        >>>     'metric': The distance metric computed for the mp,
        >>>     'w': The window size used to compute the matrix profile,
        >>>     'ez': The exclusion zone used,
        >>>     'join': Flag indicating if a similarity join was computed,
        >>>     'sample_pct': Percentage of samples used in computing the MP,
        >>>     'data': {
        >>>         'ts': Time series data,
        >>>         'query': Query data if supplied
        >>>     }
        >>>     'class': "MatrixProfile"
        >>>     'algorithm': "stomp_parallel"
        >>> }

    Raises
    ------
    ValueError
        If window_size < 4.
        If window_size > query length / 2.
        If ts is not a list or np.array.
        If query is not a list or np.array.
        If ts or query is not one dimensional.

    """
    is_join = core.is_similarity_join(ts, query)
    if not is_join:
        query = ts

    # data conversion to np.array
    ts = core.to_np_array(ts)
    query = core.to_np_array(query)

    if window_size < 4:
        error = "window size must be at least 4."
        raise ValueError(error)

    if window_size > len(query) / 2:
        error = "Time series is too short relative to desired window size"
        raise ValueError(error)

    # multiprocessing or single threaded approach
    if n_jobs == 1:
        pass
    else:
        n_jobs = core.valid_n_jobs(n_jobs)

    # precompute some common values - profile length, query length etc.
    profile_length = core.get_profile_length(ts, query, window_size)
    data_length = len(ts)
    query_length = len(query)
    num_queries = query_length - window_size + 1
    exclusion_zone = int(np.ceil(window_size / 2.0))

    # do not use exclusion zone for join
    if is_join:
        exclusion_zone = 0

    # find skip locations, clean up nan and inf in the ts and query
    skip_locs = core.find_skip_locations(ts, profile_length, window_size)
    ts = core.clean_nan_inf(ts)
    query = core.clean_nan_inf(query)

    # initialize matrices
    matrix_profile = np.full(profile_length, np.inf)
    profile_index = np.full(profile_length, 0)

    # compute left and right matrix profile when similarity join does not happen
    left_matrix_profile = None
    right_matrix_profile = None
    left_profile_index = None
    right_profile_index = None

    if not is_join:
        left_matrix_profile = np.copy(matrix_profile)
        right_matrix_profile = np.copy(matrix_profile)
        left_profile_index = np.copy(profile_index)
        right_profile_index = np.copy(profile_index)

    # precompute some statistics on ts
    data_mu, data_sig = core.moving_avg_std(ts, window_size)
    first_window = query[0:window_size]
    first_product = core.fft_convolve(ts, first_window)

    batch_windows = []
    results = []

    # batch compute with multiprocessing
    args = []
    for start, end in core.generate_batch_jobs(num_queries, n_jobs):
        args.append((start, end, ts, query, window_size, data_length,
                     profile_length, exclusion_zone, is_join, data_mu,
                     data_sig, first_product, skip_locs))
        batch_windows.append((start, end))

    # we are running single threaded stomp - no need to initialize any
    # parallel environments.
    if n_jobs == 1 or len(args) == 1:
        results.append(_batch_compute(args[0]))
    else:
        # parallelize
        with core.mp_pool()(n_jobs) as pool:
            results = pool.map(_batch_compute, args)

    # now we combine the batch results
    if len(results) == 1:
        result = results[0]
        matrix_profile = result['mp']
        profile_index = result['pi']
        left_matrix_profile = result['lmp']
        left_profile_index = result['lpi']
        right_matrix_profile = result['rmp']
        right_profile_index = result['rpi']
    else:
        for index, result in enumerate(results):
            start = batch_windows[index][0]
            end = batch_windows[index][1]

            # update the matrix profile
            indices = result['mp'] < matrix_profile
            matrix_profile[indices] = result['mp'][indices]
            profile_index[indices] = result['pi'][indices]

            # update the left and right matrix profiles
            if not is_join:
                indices = result['lmp'] < left_matrix_profile
                left_matrix_profile[indices] = result['lmp'][indices]
                left_profile_index[indices] = result['lpi'][indices]

                indices = result['rmp'] < right_matrix_profile
                right_matrix_profile[indices] = result['rmp'][indices]
                right_profile_index[indices] = result['rpi'][indices]

    return {
        'mp': matrix_profile,
        'pi': profile_index,
        'rmp': right_matrix_profile,
        'rpi': right_profile_index,
        'lmp': left_matrix_profile,
        'lpi': left_profile_index,
        'metric': 'euclidean',
        'w': window_size,
        'ez': exclusion_zone,
        'join': is_join,
        'sample_pct': 1,
        'data': {
            'ts': ts,
            'query': query
        },
        'class': "MatrixProfile",
        'algorithm': "stomp"
    }
Пример #6
0
def mpx(ts, w, query=None, cross_correlation=False, n_jobs=1):
    """
    The MPX algorithm computes the matrix profile without using the FFT.

    Parameters
    ----------
    ts : array_like
        The time series to compute the matrix profile for.
    w : int
        The window size.
    query : array_like
        Optionally a query series.
    cross_correlation : bool, Default=False
        Determine if cross_correlation distance should be returned. It defaults
        to Euclidean Distance.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    
    Returns
    -------
    dict : profile
        A MatrixProfile data structure.
        
        >>> {
        >>>     'mp': The matrix profile,
        >>>     'pi': The matrix profile 1NN indices,
        >>>     'rmp': The right matrix profile,
        >>>     'rpi': The right matrix profile 1NN indices,
        >>>     'lmp': The left matrix profile,
        >>>     'lpi': The left matrix profile 1NN indices,
        >>>     'metric': The distance metric computed for the mp,
        >>>     'w': The window size used to compute the matrix profile,
        >>>     'ez': The exclusion zone used,
        >>>     'join': Flag indicating if a similarity join was computed,
        >>>     'sample_pct': Percentage of samples used in computing the MP,
        >>>     'data': {
        >>>         'ts': Time series data,
        >>>         'query': Query data if supplied
        >>>     }
        >>>     'class': "MatrixProfile"
        >>>     'algorithm': "mpx"
        >>> }

    """
    # --- Drew's addition ---
    dtype = core.get_dtype(ts)
    ts = core.to_np_array(ts).astype(dtype)
    #ts = core.to_np_array(ts).astype('d')
    n_jobs = core.valid_n_jobs(n_jobs)
    is_join = False

    if core.is_array_like(query):
        query = core.to_np_array(query).astype(dtype)
        #query = core.to_np_array(query).astype('d')
        is_join = True
        mp, mpi, mpb, mpib = cympx_ab_parallel(ts, query, w,
                                               int(cross_correlation), n_jobs)
    else:
        # --- More changes... ---
        if np.issubdtype(dtype, 'U'):
            #ts = np.array([ord(x) for x in ts], dtype = 'd')
            mp, mpi = mpx_single_char(ts, w)
        else:
            mp, mpi = cympx_parallel(ts, w, int(cross_correlation), n_jobs)
        # --- That's it for now... ---
        #mp, mpi = cympx_parallel(ts, w, int(cross_correlation), n_jobs)

    mp = np.asarray(mp)
    mpi = np.asarray(mpi)
    if np.issubdtype(dtype, 'U'):
        distance_metric = 'hamming'
    else:
        distance_metric = 'euclidean'
        if cross_correlation:
            distance_metric = 'cross_correlation'

    return {
        'mp': mp,
        'pi': mpi,
        'rmp': None,
        'rpi': None,
        'lmp': None,
        'lpi': None,
        'metric': distance_metric,
        'w': w,
        'ez': int(np.ceil(w / 4.0)) if is_join else 0,
        'join': is_join,
        'sample_pct': 1,
        'data': {
            'ts': ts,
            'query': query
        },
        'class': 'MatrixProfile',
        'algorithm': 'mpx'
    }
Пример #7
0
def mstomp(ts, window_size, return_dimension=False, n_jobs=1):
    """
    Computes multidimensional matrix profile with mSTAMP (stomp based). Ray or Python's multiprocessing library may be used. When you have initialized Ray on your machine, it takes priority over using Python's multiprocessing.

    Parameters
    ----------
    ts : array_like, shape (n_dim, seq_len)
        The multidimensional time series to compute the multidimensional matrix profile for.
    window_size: int
        The size of the window to compute the matrix profile over.
    return_dimension : bool
        if True, also return the matrix profile dimension. It takses O(d^2 n)
        to store and O(d^2 n^2) to compute. (default is False)
    n_jobs : int, Default = 1
        Number of cpu cores to use.

    Returns
    -------
    dict : profile
        A MatrixProfile data structure.
        
        >>> {
        >>>     'mp': The matrix profile,
        >>>     'pi': The matrix profile 1NN indices,
        >>>     'rmp': The right matrix profile,
        >>>     'rpi': The right matrix profile 1NN indices,
        >>>     'lmp': The left matrix profile,
        >>>     'lpi': The left matrix profile 1NN indices,
        >>>     'metric': The distance metric computed for the mp,
        >>>     'w': The window size used to compute the matrix profile,
        >>>     'ez': The exclusion zone used,
        >>>     'sample_pct': Percentage of samples used in computing the MP,
        >>>     'data': {
        >>>         'ts': Time series data,
        >>>         'query': Query data if supplied
        >>>     }
        >>>     'class': "MatrixProfile"
        >>>     'algorithm': "stomp_based_mstamp"
        >>> }

    Raises
    ------
    ValueError
        If window_size < 4.
        If window_size > time series length / 2.
        If ts is not a list or np.array.

    """

    query = ts

    # data conversion to np.array
    ts = core.to_np_array(ts)
    query = core.to_np_array(query)

    if window_size < 4:
        error = "window size must be at least 4."
        raise ValueError(error)

    if ts.ndim == 1:
        ts = np.expand_dims(ts, axis=0)
        query = np.expand_dims(query, axis=0)

    if window_size > query.shape[1] / 2:
        error = "Time series is too short relative to desired window size"
        raise ValueError(error)

    # multiprocessing or single threaded approach
    if n_jobs == 1:
        pass
    else:
        n_jobs = core.valid_n_jobs(n_jobs)

    # precompute some common values - profile length, query length etc.
    profile_length = core.get_profile_length(ts, query, window_size)
    data_length = ts.shape[1]
    query_length = query.shape[1]
    num_queries = query_length - window_size + 1
    exclusion_zone = int(np.ceil(window_size / 2.0))
    num_dim = ts.shape[0]

    # find skip locations, clean up nan and inf in the ts and query
    skip_locs = core.find_multid_skip_locations(ts, profile_length, window_size)
    ts = core.clean_nan_inf(ts)
    query = core.clean_nan_inf(query)

    # initialize matrices
    matrix_profile = np.full((num_dim, profile_length), np.inf)
    profile_index = np.full((num_dim, profile_length), 0)
    # profile_index = np.full((num_dim, profile_length), -1)

    # compute left and right matrix profile when similarity join does not happen
    left_matrix_profile = np.copy(matrix_profile)
    right_matrix_profile = np.copy(matrix_profile)
    left_profile_index = np.copy(profile_index)
    right_profile_index = np.copy(profile_index)

    profile_dimension = []
    if return_dimension:
        n_jobs = 1
        for i in range(num_dim):
            profile_dimension.append(np.empty((i + 1, profile_length), dtype=int))

    # precompute some statistics on ts
    data_mu, data_sig, first_product = np.empty((num_dim, profile_length)), np.empty(
        (num_dim, profile_length)), np.empty((num_dim, profile_length))
    for i in range(num_dim):
        data_mu[i, :], data_sig[i, :] = core.moving_avg_std(ts[i, :], window_size)
        first_window = query[i, 0:window_size]
        first_product[i, :] = core.fft_convolve(ts[i, :], first_window)

    batch_windows = []
    results = []

    # batch compute with multiprocessing
    args = []
    for start, end in core.generate_batch_jobs(num_queries, n_jobs):
        args.append((num_dim, start, end, ts, query, window_size, data_length, profile_length, exclusion_zone, data_mu,
                     data_sig, first_product, skip_locs, profile_dimension, return_dimension))
        batch_windows.append((start, end))

    # we are running single threaded stomp - no need to initialize any
    # parallel environments.
    if n_jobs == 1 or len(args) == 1:
        results.append(_batch_compute(args[0]))
    else:
        # parallelize
        with core.mp_pool()(n_jobs) as pool:
            results = pool.map(_batch_compute, args)

    # now we combine the batch results
    if len(results) == 1:
        result = results[0]
        matrix_profile = result['mp']
        profile_index = result['pi']
        profile_dimension = result['pd']
        left_matrix_profile = result['lmp']
        left_profile_index = result['lpi']
        right_matrix_profile = result['rmp']
        right_profile_index = result['rpi']
    else:
        for index, result in enumerate(results):
            start = batch_windows[index][0]
            end = batch_windows[index][1]

            # update the matrix profile
            indices = result['mp'] < matrix_profile
            matrix_profile[indices] = result['mp'][indices]
            profile_index[indices] = result['pi'][indices]

            # update the left and right matrix profiles
            indices = result['lmp'] < left_matrix_profile
            left_matrix_profile[indices] = result['lmp'][indices]
            left_profile_index[indices] = result['lpi'][indices]

            indices = result['rmp'] < right_matrix_profile
            right_matrix_profile[indices] = result['rmp'][indices]
            right_profile_index[indices] = result['rpi'][indices]

    return {
        'mp': matrix_profile,
        'pi': profile_index,
        'pd': profile_dimension,
        'rmp': right_matrix_profile,
        'rpi': right_profile_index,
        'lmp': left_matrix_profile,
        'lpi': left_profile_index,
        'metric': 'euclidean',
        'w': window_size,
        'ez': exclusion_zone,
        'sample_pct': 1,
        'data': {
            'ts': ts,
            'query': query
        },
        'class': "MatrixProfile",
        'algorithm': "stomp_based_mstamp"
    }
Пример #8
0
def analyze(ts,
            query=None,
            windows=None,
            sample_pct=1.0,
            threshold=0.98,
            n_jobs=1,
            preprocessing_kwargs=None):
    """
    Runs an appropriate workflow based on the parameters passed in. The goal
    of this function is to compute all fundamental algorithms on the provided
    time series data. For now the following is computed:

    1. Matrix Profile - exact or approximate based on sample_pct given that a
       window is provided. By default is the exact algorithm.
    2. Top Motifs - The top 3 motifs are found.
    3. Top Discords - The top 3 discords are found.
    4. Plot MP, Motifs and Discords

    When a window is not provided or more than a single window is provided,
    the PMP is computed:

    1. Compute UPPER window when no window(s) is provided
    2. Compute PMP for all windows
    3. Top Motifs
    4. Top Discords
    5. Plot PMP, motifs and discords.

    Parameters
    ----------
    ts : array_like
        The time series to analyze.
    query : array_like, Optional
        The query to analyze. Note that when computing the PMP the query is
        ignored!
    windows : int or array_like, Optional
        The window(s) to compute the MatrixProfile. Note that it may be an int
        for a single matrix profile computation or an array of ints for
        computing the pan matrix profile.
    sample_pct : float, default = 1
        A float between 0 and 1 representing how many samples to compute for
        the MP or PMP. When it is 1, the exact algorithm is used.
    threshold : float, Default 0.98
        The correlation coefficient used as the threshold. It should be between
        0 and 1. This is used to compute the upper window size when no
        window(s) is given.
    n_jobs : int, Default = 1
        Number of cpu cores to use.
    preprocessing_kwargs : dict, default = None
        A dictionary object to sets parameters for preprocess function.
        A valid preprocessing_kwargs should have the following structure:

        >>> {
        >>>     'window': The window size to compute the mean/median/minimum/maximum value,
        >>>     'method': A string indicating the data imputation method, which should be
        >>>               'mean', 'median', 'min' or 'max',
        >>>     'direction': A string indicating the data imputation direction, which should be
        >>>                 'forward', 'fwd', 'f', 'backward', 'bwd', 'b'. If the direction is
        >>>                 forward, we use previous data for imputation; if the direction is
        >>>                 backward, we use subsequent data for imputation.,
        >>>     'add_noise': A boolean value indicating whether noise needs to be added into the
        >>>                 time series
        >>> }

        To disable preprocessing procedure, set the preprocessing_kwargs to
        None/False/""/{}.

    Returns
    -------
    tuple : (profile, figures)
        The appropriate PMP or MP profile object and associated figures.

    """
    result = None

    # preprocess the time series
    preprocessing_kwargs = validate_preprocess_kwargs(preprocessing_kwargs)
    if preprocessing_kwargs:
        ts = preprocess(
            ts,
            window=preprocessing_kwargs['window'],
            impute_method=preprocessing_kwargs['impute_method'],
            impute_direction=preprocessing_kwargs['impute_direction'],
            add_noise=preprocessing_kwargs['add_noise'])

    # determine proper number of jobs
    n_jobs = core.valid_n_jobs(n_jobs)

    # determine what algorithm to use based on params
    no_window = isinstance(windows, type(None))
    many_windows = core.is_array_like(windows) and len(windows) > 1
    single_window = isinstance(windows, int) or \
                    (core.is_array_like(windows) and len(windows) == 1)
    is_exact = sample_pct >= 1
    is_approx = sample_pct > 0 and sample_pct < 1

    # use PMP with no window provided
    if no_window or many_windows:
        result = analyze_pmp(ts,
                             query,
                             sample_pct,
                             threshold,
                             windows=windows,
                             n_jobs=n_jobs)
    elif single_window and is_exact:
        result = analyze_mp_exact(ts, query, windows, n_jobs=n_jobs)
    elif single_window and is_approx:
        result = analyze_mp_approximate(ts,
                                        query,
                                        windows,
                                        sample_pct,
                                        n_jobs=n_jobs)
    else:
        raise RuntimeError('Param combination resulted in an uknown operation')

    return result