def pick_mp(profile, window): """ Utility function that extracts a MatrixProfile from a Pan-MatrixProfile placing it into the MatrixProfile data structure. Parameters ---------- profile : dict A Pan-MatrixProfile data structure. window : int The specific window size used to compute the desired MatrixProfile. Returns ------- dict : profile A MatrixProfile data structure. Raises ------ ValueError If profile is not a Pan-MatrixProfile data structure. If window is not an integer. If desired MatrixProfile is not found based on window. """ if not core.is_pmp_obj(profile): raise ValueError('pluck_mp expects profile as a PMP data structure!') if not isinstance(window, int): raise ValueError('pluck_mp expects window to be an int!') mp_profile = empty_mp() # find the window index windows = profile.get('windows') window_index = np.argwhere(windows == window) if len(window_index) < 1: raise RuntimeError( 'Unable to find window {} in the provided PMP!'.format(window)) window_index = window_index.flatten()[0] window = windows[window_index] mp = profile['pmp'][window_index] n = len(mp) mp_profile['mp'] = mp[0:n - window + 1] mp_profile['pi'] = profile['pmpi'][window_index][0:n - window + 1] mp_profile['metric'] = profile['metric'] mp_profile['data']['ts'] = profile['data']['ts'] mp_profile['join'] = False mp_profile['w'] = int(window) mp_profile['ez'] = int(np.floor(windows[window_index] / 4)) mp_profile['algorithm'] = 'mpx' return mp_profile
def visualize(profile): """ Automatically creates plots for the provided data structure. In some cases many plots are created. For example, when a MatrixProfile is passed with corresponding motifs and discords, the matrix profile, discords and motifs will be plotted. Parameters ---------- profile : dict_like A MatrixProfile, Pan-MatrixProfile or Statistics data structure. Returns ------- list : figures A list of matplotlib figures. """ figures = [] if not is_visualizable(profile): raise ValueError( 'MatrixProfile, Pan-MatrixProfile or Statistics data structure expected!' ) # plot MP if core.is_mp_obj(profile): figures = __combine(figures, plot_mp(profile)) if 'cmp' in profile and len(profile['cmp']) > 0: figures = __combine(figures, plot_cmp_mp(profile)) if 'av' in profile and len(profile['av']) > 0: figures = __combine(figures, plot_av_mp(profile)) if 'motifs' in profile and len(profile['motifs']) > 0: figures = __combine(figures, plot_motifs_mp(profile)) if 'discords' in profile and len(profile['discords']) > 0: figures = __combine(figures, plot_discords_mp(profile)) # plot PMP if core.is_pmp_obj(profile): figures = __combine(figures, plot_pmp(profile)) if 'motifs' in profile and len(profile['motifs']) > 0: figures = __combine(figures, plot_motifs_pmp(profile)) if 'discords' in profile and len(profile['discords']) > 0: figures = __combine(figures, plot_discords_pmp(profile)) # plot stats if core.is_stats_obj(profile): figures = __combine(figures, plot_stats(profile)) return figures
def from_json(profile): """ Converts a JSON formatted string into a profile data structure. Parameters ---------- profile : str The profile as a JSON formatted string. Returns ------- profile : dict_like A MatrixProfile or Pan-MatrixProfile data structure. """ dct = json.load(profile) # handle pmp and convert to appropriate types if core.is_pmp_obj(dct): dct['pmp'] = np.array(dct['pmp'], dtype='float64') dct['pmpi'] = np.array(dct['pmpi'], dtype=int) dct['data']['ts'] = np.array(dct['data']['ts'], dtype='float64') dct['windows'] = np.array(dct['windows'], dtype=int) # handle mp elif core.is_mp_obj(dct): dct['mp'] = np.array(dct['mp'], dtype='float64') dct['pi'] = np.array(dct['pi'], dtype=int) has_l = isinstance(dct['lmp'], list) has_l = has_l and isinstance(dct['lpi'], list) if has_l: dct['lmp'] = np.array(dct['lmp'], dtype='float64') dct['lpi'] = np.array(dct['lpi'], dtype=int) has_r = isinstance(dct['rmp'], list) has_r = has_r and isinstance(dct['rpi'], list) if has_r: dct['rmp'] = np.array(dct['rmp'], dtype='float64') dct['rpi'] = np.array(dct['rpi'], dtype=int) dct['data']['ts'] = np.array(dct['data']['ts'], dtype='float64') if isinstance(dct['data']['query'], list): dct['data']['query'] = np.array(dct['data']['query'], dtype='float64') else: raise ValueError('File is not of type profile!') return dct
def is_visualizable(obj): """ Helper function to determine if the passed in object can be visualized or not based on the data structure. Parameters ---------- obj : Object The object to test. Returns ------- A list of matplotlib figures. """ return core.is_mp_obj(obj) or core.is_pmp_obj(obj) or core.is_stats_obj(obj)
def get_windows(profile): """ Utility function to format the windows from a profile structure ensuring that the windows are in an array. Parameters ---------- profile : dict The MatrixProfile or PMP profile. Returns ------- list : The window(s) in a list. """ windows = [] if core.is_mp_obj(profile): windows.append(profile.get('w')) elif core.is_pmp_obj(profile): windows = profile.get('windows') return windows
def pmp_top_k_discords(profile, exclusion_zone=None, k=3): """ Computes the top K discords for the given Pan-MatrixProfile. The return values is a list of row by col indices. Notes ----- This algorithm is written to work with Euclidean distance. If you submit a PMP of Pearson metrics, then it is first converted to Euclidean. Parameters ---------- profile : dict Data structure from a PMP algorithm. exclusion_zone : int, Default window / 2 The zone to exclude around the found discords to reduce trivial findings. By default we use the row-wise window / 2. k : int Maximum number of discords to find. Returns ------- dict : profile A 2D array of indices. The first column corresponds to the row index and the second column corresponds to the column index of the submitted PMP. It is placed back on the original object passed in as 'discords' key. """ if not core.is_pmp_obj(profile): raise ValueError('Expecting PMP data structure!') # this function requires euclidean distance # convert if the metric is pearson metric = profile.get('metric', None) pmp = profile.get('pmp', None) windows = profile.get('windows', None) tmp = None if metric == 'pearson': tmp = core.pearson_to_euclidean(pmp, windows) else: tmp = np.copy(pmp).astype('d') # replace nan and infs with -infinity # for whatever reason numpy argmax finds infinity as max so # this is a way to get around it by converting to -infinity tmp[core.nan_inf_indices(tmp)] = -np.inf # iterate finding the max value k times or until negative # infinity is obtained found = [] for _ in range(k): max_idx = np.unravel_index(np.argmax(tmp), tmp.shape) window = windows[max_idx[0]] if tmp[max_idx] == -np.inf: break found.append(max_idx) # apply exclusion zone # the exclusion zone is based on 1/2 of the window size # used to compute that specific matrix profile n = tmp[max_idx[0]].shape[0] if exclusion_zone is None: exclusion_zone = int(np.floor(window / 2)) ez_start = np.max([0, max_idx[1] - exclusion_zone]) ez_stop = np.min([n, max_idx[1] + exclusion_zone]) tmp[max_idx[0]][ez_start:ez_stop] = -np.inf profile['discords'] = np.array(found) return profile
def test_is_pmp_obj(): assert (True == core.is_pmp_obj({'class': 'PMP'})) assert (False == core.is_pmp_obj('s')) assert (False == core.is_pmp_obj({}))
def pmp_top_k_motifs(profile, exclusion_zone=None, k=3, max_neighbors=10, radius=3): """ Find the top K number of motifs (patterns) given a pan matrix profile. By default the algorithm will find up to 3 motifs (k) and up to 10 of their neighbors with a radius of 3 * min_dist. Parameters ---------- profile : dict The output from one of the pan matrix profile algorithms. exclusion_zone : int, Default to algorithm ez Desired number of values to exclude on both sides of the motif. This avoids trivial matches. It defaults to half of the computed window size. Setting the exclusion zone to 0 makes it not apply. k : int, Default = 3 Desired number of motifs to find. neighbor_count : int, Default = 10 The maximum number of neighbors to include for a given motif. radius : int, Default = 3 The radius is used to associate a neighbor by checking if the neighbor's distance is less than or equal to dist * radius Returns ------- The original input obj with the addition of the "motifs" key. The motifs key consists of the following structure. A list of dicts containing motif indices and their corresponding neighbor indices. Note that each index is a (row, col) index corresponding to the pan matrix profile. [ { 'motifs': [first_index, second_index], 'neighbors': [index, index, index ...max_neighbors] } ] """ if not core.is_pmp_obj(profile): raise ValueError('Expecting PMP data structure!') data = profile.get('data', None) ts = data.get('ts', None) data_len = len(ts) pmp = profile.get('pmp', None) profile_len = pmp.shape[1] pmpi = profile.get('pmpi', None) windows = profile.get('windows', None) # make sure we are working with Euclidean distances tmp = None if core.is_pearson_array(pmp): tmp = core.pearson_to_euclidean(pmp, windows) else: tmp = np.copy(pmp).astype('d') # replace nan and infs with infinity tmp[core.nan_inf_indices(tmp)] = np.inf motifs = [] for _ in range(k): min_idx = np.unravel_index(np.argmin(tmp), tmp.shape) min_dist = tmp[min_idx] # nothing else to find... if core.is_nan_inf(min_dist): break # create the motif pair min_row_idx = min_idx[0] min_col_idx = min_idx[1] # motif pairs are respective to the column of the matching row first_idx = np.min([min_col_idx, pmpi[min_row_idx][min_col_idx]]) second_idx = np.max([min_col_idx, pmpi[min_row_idx][min_col_idx]]) # compute distance profile for first appearance window_size = windows[min_row_idx] query = ts[first_idx:first_idx + window_size] distance_profile = mass2(ts, query) # extend the distance profile to be as long as the original infs = np.full(profile_len - len(distance_profile), np.inf) distance_profile = np.append(distance_profile, infs) # exclude already picked motifs and neighbors mask = core.nan_inf_indices(pmp[min_row_idx]) distance_profile[mask] = np.inf # determine the exclusion zone if not set if not exclusion_zone: exclusion_zone = int(np.floor(window_size / 2)) # apply exclusion zone for motif pair for j in (first_idx, second_idx): distance_profile = core.apply_exclusion_zone( exclusion_zone, False, window_size, data_len, j, distance_profile) tmp2 = core.apply_exclusion_zone(exclusion_zone, False, window_size, data_len, j, tmp[min_row_idx]) tmp[min_row_idx] = tmp2 # find up to max_neighbors neighbors = [] for j in range(max_neighbors): neighbor_idx = np.argmin(distance_profile) neighbor_dist = np.real(distance_profile[neighbor_idx]) not_in_radius = not ((radius * min_dist) >= neighbor_dist) # no more neighbors exist based on radius if core.is_nan_inf(neighbor_dist) or not_in_radius: break # add neighbor and apply exclusion zone neighbors.append((min_row_idx, neighbor_idx)) distance_profile = core.apply_exclusion_zone( exclusion_zone, False, window_size, data_len, neighbor_idx, distance_profile) tmp2 = core.apply_exclusion_zone(exclusion_zone, False, window_size, data_len, neighbor_idx, tmp[min_row_idx]) tmp[min_row_idx] = tmp2 # add the motifs and neighbors # note that they are (row, col) indices motifs.append({ 'motifs': [(min_row_idx, first_idx), (min_row_idx, second_idx)], 'neighbors': neighbors }) profile['motifs'] = motifs return profile
def profile_to_proto(profile): """ Utility function that takes a MatrixProfile or PMP profile data structure and converts it to the MPFOutput protobuf message object. Parameters ---------- profile : dict The profile to convert. Returns ------- MPFOutput : The MPFOutput protobuf message object. """ output = MPFOutput() # add higher level attributes that work for PMP and MP output.klass = profile.get('class') output.algorithm = profile.get('algorithm') output.metric = profile.get('metric') output.sample_pct = profile.get('sample_pct') # add time series data ts = profile.get('data').get('ts') query = profile.get('data').get('query') rows, cols, data = get_matrix_attributes(ts) output.ts.rows = rows output.ts.cols = cols output.ts.data.extend(data) # add query data query = profile.get('data').get('query') rows, cols, data = get_matrix_attributes(query) if rows and cols and core.is_array_like(data): output.query.rows = rows output.query.cols = cols output.query.data.extend(data) # add window(s) output.windows.extend(get_windows(profile)) # add motifs motifs = profile.get('motifs') if not isinstance(motifs, type(None)): for motif in motifs: output.motifs.append(get_proto_motif(motif)) # add discords discords = profile.get('discords') if not isinstance(discords, type(None)): for discord in discords: output.discords.append(get_proto_discord(discord)) # add cmp cmp = profile.get('cmp') if not isinstance(cmp, type(None)): rows, cols, data = get_matrix_attributes(cmp) output.cmp.rows = rows output.cmp.cols = cols output.cmp.data.extend(data) # add av av = profile.get('av') if not isinstance(av, type(None)): rows, cols, data = get_matrix_attributes(av) output.av.rows = rows output.av.cols = cols output.av.data.extend(data) # add av_type av_type = profile.get('av_type') if not isinstance(av_type, type(None)) and len(av_type) > 0: output.av_type = av_type # add the matrix profile specific attributes if core.is_mp_obj(profile): output.mp.ez = profile.get('ez') output.mp.join = profile.get('join') # add mp rows, cols, data = get_matrix_attributes(profile.get('mp')) output.mp.mp.rows = rows output.mp.mp.cols = cols output.mp.mp.data.extend(data) # add pi rows, cols, data = get_matrix_attributes(profile.get('pi')) output.mp.pi.rows = rows output.mp.pi.cols = cols output.mp.pi.data.extend(data) # add lmp rows, cols, data = get_matrix_attributes(profile.get('lmp')) if rows and cols and core.is_array_like(data): output.mp.lmp.rows = rows output.mp.lmp.cols = cols output.mp.lmp.data.extend(data) # add lpi rows, cols, data = get_matrix_attributes(profile.get('lpi')) if rows and cols and core.is_array_like(data): output.mp.lpi.rows = rows output.mp.lpi.cols = cols output.mp.lpi.data.extend(data) # add rmp rows, cols, data = get_matrix_attributes(profile.get('rmp')) if rows and cols and core.is_array_like(data): output.mp.rmp.rows = rows output.mp.rmp.cols = cols output.mp.rmp.data.extend(data) # add rpi rows, cols, data = get_matrix_attributes(profile.get('rpi')) if rows and cols and core.is_array_like(data): output.mp.rpi.rows = rows output.mp.rpi.cols = cols output.mp.rpi.data.extend(data) # add the pan matrix profile specific attributes elif core.is_pmp_obj(profile): # add pmp rows, cols, data = get_matrix_attributes(profile.get('pmp')) output.pmp.pmp.rows = rows output.pmp.pmp.cols = cols output.pmp.pmp.data.extend(data) # add pmpi rows, cols, data = get_matrix_attributes(profile.get('pmpi')) output.pmp.pmpi.rows = rows output.pmp.pmpi.cols = cols output.pmp.pmpi.data.extend(data) else: raise ValueError('Expecting Pan-MatrixProfile or MatrixProfile!') return output