def mp_top_k_discords(profile, exclusion_zone=None, k=3): """ Find the top K number of discords (anomalies) given a matrix profile, exclusion zone and the desired number of discords. The exclusion zone nullifies entries on the left and right side of the first and subsequent discords to remove non-trivial matches. More specifically, a discord found at location X will more than likely have additional discords to the left or right of it. Parameters ---------- profile : dict The MatrixProfile data structure. exclusion_zone : int, Default mp algorithm ez Desired number of values to exclude on both sides of the anomaly. k : int Desired number of discords to find. Returns ------- dict : profile The original input profile with an additional "discords" key containing the a np.ndarray of discord indices. """ if not core.is_mp_obj(profile): raise ValueError('Expecting MP data structure!') found = [] tmp = np.copy(profile.get('mp', None)).astype('d') n = len(tmp) # TODO: this is based on STOMP standards when this motif finding algorithm # originally came out. Should we default this to 4.0 instead? That seems # to be the common value now per new research. window_size = profile.get('w', None) if exclusion_zone is None: exclusion_zone = profile.get('ez', None) # obtain indices in ascending order indices = np.argsort(tmp) # created flipped view for discords indices = indices[::-1] for idx in indices: if not np.isinf(tmp[idx]): found.append(idx) # apply exclusion zone if exclusion_zone > 0: exclusion_zone_start = np.max([0, idx - exclusion_zone]) exclusion_zone_end = np.min([n, idx + exclusion_zone]) tmp[exclusion_zone_start:exclusion_zone_end] = np.inf if len(found) >= k: break profile['discords'] = np.array(found, dtype='int') return profile
def test_is_mp_obj(): assert (True == core.is_mp_obj({'class': 'MatrixProfile'})) assert (False == core.is_mp_obj('s')) assert (False == core.is_mp_obj({}))
def profile_to_proto(profile): """ Utility function that takes a MatrixProfile or PMP profile data structure and converts it to the MPFOutput protobuf message object. Parameters ---------- profile : dict The profile to convert. Returns ------- MPFOutput : The MPFOutput protobuf message object. """ output = MPFOutput() # add higher level attributes that work for PMP and MP output.klass = profile.get('class') output.algorithm = profile.get('algorithm') output.metric = profile.get('metric') output.sample_pct = profile.get('sample_pct') # add time series data ts = profile.get('data').get('ts') query = profile.get('data').get('query') rows, cols, data = get_matrix_attributes(ts) output.ts.rows = rows output.ts.cols = cols output.ts.data.extend(data) # add query data query = profile.get('data').get('query') rows, cols, data = get_matrix_attributes(query) if rows and cols and core.is_array_like(data): output.query.rows = rows output.query.cols = cols output.query.data.extend(data) # add window(s) output.windows.extend(get_windows(profile)) # add motifs motifs = profile.get('motifs') if not isinstance(motifs, type(None)): for motif in motifs: output.motifs.append(get_proto_motif(motif)) # add discords discords = profile.get('discords') if not isinstance(discords, type(None)): for discord in discords: output.discords.append(get_proto_discord(discord)) # add cmp cmp = profile.get('cmp') if not isinstance(cmp, type(None)): rows, cols, data = get_matrix_attributes(cmp) output.cmp.rows = rows output.cmp.cols = cols output.cmp.data.extend(data) # add av av = profile.get('av') if not isinstance(av, type(None)): rows, cols, data = get_matrix_attributes(av) output.av.rows = rows output.av.cols = cols output.av.data.extend(data) # add av_type av_type = profile.get('av_type') if not isinstance(av_type, type(None)) and len(av_type) > 0: output.av_type = av_type # add the matrix profile specific attributes if core.is_mp_obj(profile): output.mp.ez = profile.get('ez') output.mp.join = profile.get('join') # add mp rows, cols, data = get_matrix_attributes(profile.get('mp')) output.mp.mp.rows = rows output.mp.mp.cols = cols output.mp.mp.data.extend(data) # add pi rows, cols, data = get_matrix_attributes(profile.get('pi')) output.mp.pi.rows = rows output.mp.pi.cols = cols output.mp.pi.data.extend(data) # add lmp rows, cols, data = get_matrix_attributes(profile.get('lmp')) if rows and cols and core.is_array_like(data): output.mp.lmp.rows = rows output.mp.lmp.cols = cols output.mp.lmp.data.extend(data) # add lpi rows, cols, data = get_matrix_attributes(profile.get('lpi')) if rows and cols and core.is_array_like(data): output.mp.lpi.rows = rows output.mp.lpi.cols = cols output.mp.lpi.data.extend(data) # add rmp rows, cols, data = get_matrix_attributes(profile.get('rmp')) if rows and cols and core.is_array_like(data): output.mp.rmp.rows = rows output.mp.rmp.cols = cols output.mp.rmp.data.extend(data) # add rpi rows, cols, data = get_matrix_attributes(profile.get('rpi')) if rows and cols and core.is_array_like(data): output.mp.rpi.rows = rows output.mp.rpi.cols = cols output.mp.rpi.data.extend(data) # add the pan matrix profile specific attributes elif core.is_pmp_obj(profile): # add pmp rows, cols, data = get_matrix_attributes(profile.get('pmp')) output.pmp.pmp.rows = rows output.pmp.pmp.cols = cols output.pmp.pmp.data.extend(data) # add pmpi rows, cols, data = get_matrix_attributes(profile.get('pmpi')) output.pmp.pmpi.rows = rows output.pmp.pmpi.cols = cols output.pmp.pmpi.data.extend(data) else: raise ValueError('Expecting Pan-MatrixProfile or MatrixProfile!') return output
def mp_top_k_motifs(profile, exclusion_zone=None, k=3, max_neighbors=10, radius=3): """ Find the top K number of motifs (patterns) given a matrix profile. By default the algorithm will find up to 3 motifs (k) and up to 10 of their neighbors with a radius of 3 * min_dist. Parameters ---------- profile : dict The output from one of the matrix profile algorithms. exclusion_zone : int, Default to algorithm ez Desired number of values to exclude on both sides of the motif. This avoids trivial matches. It defaults to half of the computed window size. Setting the exclusion zone to 0 makes it not apply. k : int, Default = 3 Desired number of motifs to find. neighbor_count : int, Default = 10 The maximum number of neighbors to include for a given motif. radius : int, Default = 3 The radius is used to associate a neighbor by checking if the neighbor's distance is less than or equal to dist * radius Returns ------- The original input obj with the addition of the "motifs" key. The motifs key consists of the following structure. A list of dicts containing motif indices and their corresponding neighbor indices. [ { 'motifs': [first_index, second_index], 'neighbors': [index, index, index ...max_neighbors] } ] """ if not core.is_mp_obj(profile): raise ValueError('Expecting MP data structure!') window_size = profile['w'] data = profile.get('data', None) if data: ts = data.get('ts', None) data_len = len(ts) motifs = [] mp = np.copy(profile['mp']) mpi = profile['pi'] # TODO: this is based on STOMP standards when this motif finding algorithm # originally came out. Should we default this to 4.0 instead? That seems # to be the common value now per new research. if exclusion_zone is None: exclusion_zone = profile.get('ez', None) for i in range(k): min_idx = np.argmin(mp) min_dist = mp[min_idx] # we no longer have any motifs to find as all values are nan/inf if core.is_nan_inf(min_dist): break # create a motif pair corresponding to the first appearance and # second appearance first_idx = np.min([min_idx, mpi[min_idx]]) second_idx = np.max([min_idx, mpi[min_idx]]) # compute distance profile using mass2 for first appearance query = ts[first_idx:first_idx + window_size] distance_profile = mass2(ts, query) # exclude already picked motifs and neighbors mask = core.nan_inf_indices(mp) distance_profile[mask] = np.inf # apply exclusion zone for motif pair for j in (first_idx, second_idx): distance_profile = core.apply_exclusion_zone( exclusion_zone, False, window_size, data_len, j, distance_profile) mp = core.apply_exclusion_zone(exclusion_zone, False, window_size, data_len, j, mp) # find up to max_neighbors neighbors = [] for j in range(max_neighbors): neighbor_idx = np.argmin(distance_profile) neighbor_dist = distance_profile[neighbor_idx] not_in_radius = not ((radius * min_dist) >= neighbor_dist) # no more neighbors exist based on radius if core.is_nan_inf(neighbor_dist) or not_in_radius: break # add neighbor and apply exclusion zone neighbors.append(neighbor_idx) distance_profile = core.apply_exclusion_zone( exclusion_zone, False, window_size, data_len, neighbor_idx, distance_profile) mp = core.apply_exclusion_zone(exclusion_zone, False, window_size, data_len, neighbor_idx, mp) # add motifs and neighbors to results motifs.append({ 'motifs': [first_idx, second_idx], 'neighbors': neighbors }) profile['motifs'] = motifs return profile
def apply_av(profile, av="default", custom_av=None): """ Utility function that returns a MatrixProfile data structure with a calculated annotation vector that has been applied to correct the matrix profile. Parameters ---------- profile : dict A MatrixProfile structure. av : str, Default = "default" The type of annotation vector to apply. custom_av : array_like, Default = None Custom annotation vector (will only be applied if av is "custom"). Returns ------- dict : profile A MatrixProfile data structure with a calculated annotation vector and a corrected matrix profile. Raises ------ ValueError If profile is not a MatrixProfile data structure. If custom_av parameter is not array-like when using a custom av. If av paramter is invalid. If lengths of annotation vector and matrix profile are different. If values in annotation vector are outside [0.0, 1.0]. """ if not core.is_mp_obj(profile): raise ValueError('apply_av expects profile as an MP data structure') temp_av = None av_type = None if av == "default": temp_av = make_default_av(profile['data']['ts'], profile['w']) av_type = av elif av == "complexity": temp_av = make_complexity_av(profile['data']['ts'], profile['w']) av_type = av elif av == "meanstd": temp_av = make_meanstd_av(profile['data']['ts'], profile['w']) av_type = av elif av == "clipping": temp_av = make_clipping_av(profile['data']['ts'], profile['w']) av_type = av elif av == "custom": try: temp_av = core.to_np_array(custom_av) except ValueError: raise ValueError('apply_av expects custom_av to be array-like') av_type = av else: raise ValueError("av parameter is invalid") if len(temp_av) != len(profile['mp']): raise ValueError("Lengths of annotation vector and mp are different") if (temp_av < 0.0).any() or (temp_av > 1.0).any(): raise ValueError("Annotation vector values must be between 0 and 1") max_val = np.max(profile['mp']) temp_cmp = profile['mp'] + (np.ones(len(temp_av)) - temp_av) * max_val profile['cmp'] = temp_cmp profile['av'] = temp_av profile['av_type'] = av_type return profile