Пример #1
0
def test_pearson_to_euclidean_2d():
    a = np.array([[0.23, 0.5, 0.34, 0.67, 0.88], [0.23, 0.5, 0.34, 0.67,
                                                  0.88]])
    w = [4, 4]
    desired = np.array([[2.48193473, 2, 2.29782506, 1.62480768, 0.9797959],
                        [2.48193473, 2, 2.29782506, 1.62480768, 0.9797959]])
    actual = core.pearson_to_euclidean(a, w)

    np.testing.assert_almost_equal(desired, actual)
Пример #2
0
 def resize(mp, pi, n):
     """Helper function to resize mp and pi to be aligned with the
     PMP. Also convert pearson to euclidean."""
     mp = core.pearson_to_euclidean(profile['mp'], window_size)
     infs = np.full(n - mp.shape[0], np.inf)
     nans = np.full(n - mp.shape[0], np.nan)
     mp = np.append(mp, infs)
     pi = np.append(profile['pi'], nans)
     
     return (mp, pi)
Пример #3
0
    def resize(mp, pi, n):
        """Helper function to resize mp and pi to be aligned with the
        PMP. Also convert pearson to euclidean."""
        # Only convert pearson to euclidean if not string data type
        if not np.issubdtype(ts.dtype, 'U'):
            mp = core.pearson_to_euclidean(profile['mp'], window_size)
        infs = np.full(n - mp.shape[0], np.inf)
        nans = np.full(n - mp.shape[0], np.nan)
        mp = np.append(mp, infs)
        pi = np.append(profile['pi'], nans)

        return (mp, pi)
Пример #4
0
def pmp_top_k_discords(profile, exclusion_zone=None, k=3):
    """
    Computes the top K discords for the given Pan-MatrixProfile. The return
    values is a list of row by col indices.

    Notes
    -----
    This algorithm is written to work with Euclidean distance. If you submit
    a PMP of Pearson metrics, then it is first converted to Euclidean.

    Parameters
    ----------
    profile : dict
        Data structure from a PMP algorithm.
    exclusion_zone : int, Default window / 2
        The zone to exclude around the found discords to reduce trivial
        findings. By default we use the row-wise window / 2.
    k : int
        Maximum number of discords to find.

    Returns
    -------
    dict : profile
        A 2D array of indices. The first column corresponds to the row index
        and the second column corresponds to the column index of the 
        submitted PMP. It is placed back on the original object passed in as
        'discords' key.

    """
    if not core.is_pmp_obj(profile):
        raise ValueError('Expecting PMP data structure!')

    # this function requires euclidean distance
    # convert if the metric is pearson
    metric = profile.get('metric', None)
    pmp = profile.get('pmp', None)
    windows = profile.get('windows', None)
    
    tmp = None
    if metric == 'pearson':
        tmp = core.pearson_to_euclidean(pmp, windows)
    else:
        tmp = np.copy(pmp).astype('d')        
    
    # replace nan and infs with -infinity
    # for whatever reason numpy argmax finds infinity as max so
    # this is a way to get around it by converting to -infinity
    tmp[core.nan_inf_indices(tmp)] = -np.inf
            
    # iterate finding the max value k times or until negative
    # infinity is obtained
    found = []
    
    for _ in range(k):
        max_idx = np.unravel_index(np.argmax(tmp), tmp.shape)
        window = windows[max_idx[0]]
        
        if tmp[max_idx] == -np.inf:
            break
        
        found.append(max_idx)
        
        # apply exclusion zone
        # the exclusion zone is based on 1/2 of the window size
        # used to compute that specific matrix profile
        n = tmp[max_idx[0]].shape[0]
        if exclusion_zone is None:
            exclusion_zone = int(np.floor(window / 2))

        ez_start = np.max([0, max_idx[1] - exclusion_zone])
        ez_stop = np.min([n, max_idx[1] + exclusion_zone])
        tmp[max_idx[0]][ez_start:ez_stop] = -np.inf
    
    profile['discords'] = np.array(found)

    return profile
Пример #5
0
def pmp_top_k_motifs(profile,
                     exclusion_zone=None,
                     k=3,
                     max_neighbors=10,
                     radius=3):
    """
    Find the top K number of motifs (patterns) given a pan matrix profile. By
    default the algorithm will find up to 3 motifs (k) and up to 10 of their
    neighbors with a radius of 3 * min_dist.

    Parameters
    ----------
    profile : dict
        The output from one of the pan matrix profile algorithms.
    exclusion_zone : int, Default to algorithm ez
        Desired number of values to exclude on both sides of the motif. This
        avoids trivial matches. It defaults to half of the computed window
        size. Setting the exclusion zone to 0 makes it not apply.
    k : int, Default = 3
        Desired number of motifs to find.
    neighbor_count : int, Default = 10
        The maximum number of neighbors to include for a given motif.
    radius : int, Default = 3
        The radius is used to associate a neighbor by checking if the
        neighbor's distance is less than or equal to dist * radius

    Returns
    -------
    The original input obj with the addition of the "motifs" key. The motifs
    key consists of the following structure.

    A list of dicts containing motif indices and their corresponding neighbor
    indices. Note that each index is a (row, col) index corresponding to the
    pan matrix profile.

    [
        {
            'motifs': [first_index, second_index],
            'neighbors': [index, index, index ...max_neighbors]
        }
    ]
    """
    if not core.is_pmp_obj(profile):
        raise ValueError('Expecting PMP data structure!')

    data = profile.get('data', None)
    ts = data.get('ts', None)
    data_len = len(ts)

    pmp = profile.get('pmp', None)
    profile_len = pmp.shape[1]
    pmpi = profile.get('pmpi', None)
    windows = profile.get('windows', None)

    # make sure we are working with Euclidean distances
    tmp = None
    if core.is_pearson_array(pmp):
        tmp = core.pearson_to_euclidean(pmp, windows)
    else:
        tmp = np.copy(pmp).astype('d')

    # replace nan and infs with infinity
    tmp[core.nan_inf_indices(tmp)] = np.inf

    motifs = []
    for _ in range(k):
        min_idx = np.unravel_index(np.argmin(tmp), tmp.shape)
        min_dist = tmp[min_idx]

        # nothing else to find...
        if core.is_nan_inf(min_dist):
            break

        # create the motif pair
        min_row_idx = min_idx[0]
        min_col_idx = min_idx[1]

        # motif pairs are respective to the column of the matching row
        first_idx = np.min([min_col_idx, pmpi[min_row_idx][min_col_idx]])
        second_idx = np.max([min_col_idx, pmpi[min_row_idx][min_col_idx]])

        # compute distance profile for first appearance
        window_size = windows[min_row_idx]
        query = ts[first_idx:first_idx + window_size]
        distance_profile = mass2(ts, query)

        # extend the distance profile to be as long as the original
        infs = np.full(profile_len - len(distance_profile), np.inf)
        distance_profile = np.append(distance_profile, infs)

        # exclude already picked motifs and neighbors
        mask = core.nan_inf_indices(pmp[min_row_idx])
        distance_profile[mask] = np.inf

        # determine the exclusion zone if not set
        if not exclusion_zone:
            exclusion_zone = int(np.floor(window_size / 2))

        # apply exclusion zone for motif pair
        for j in (first_idx, second_idx):
            distance_profile = core.apply_exclusion_zone(
                exclusion_zone, False, window_size, data_len, j,
                distance_profile)
            tmp2 = core.apply_exclusion_zone(exclusion_zone, False,
                                             window_size, data_len, j,
                                             tmp[min_row_idx])
            tmp[min_row_idx] = tmp2

        # find up to max_neighbors
        neighbors = []
        for j in range(max_neighbors):
            neighbor_idx = np.argmin(distance_profile)
            neighbor_dist = np.real(distance_profile[neighbor_idx])
            not_in_radius = not ((radius * min_dist) >= neighbor_dist)

            # no more neighbors exist based on radius
            if core.is_nan_inf(neighbor_dist) or not_in_radius:
                break

            # add neighbor and apply exclusion zone
            neighbors.append((min_row_idx, neighbor_idx))
            distance_profile = core.apply_exclusion_zone(
                exclusion_zone, False, window_size, data_len, neighbor_idx,
                distance_profile)
            tmp2 = core.apply_exclusion_zone(exclusion_zone, False,
                                             window_size, data_len,
                                             neighbor_idx, tmp[min_row_idx])
            tmp[min_row_idx] = tmp2

        # add the motifs and neighbors
        # note that they are (row, col) indices
        motifs.append({
            'motifs': [(min_row_idx, first_idx), (min_row_idx, second_idx)],
            'neighbors':
            neighbors
        })

    profile['motifs'] = motifs

    return profile