def test_early_abandoned(): """Test euclidean distance.""" assert pytest.approx(np.sqrt(2), 0.0000001) ==\ distance.early_abandoned_dist(np.array([1., 1.]), np.array([2., 2.]), np.inf) a = np.array([0.5, 0.8, 0.9]) b = np.array([-0.15, 0.38, 0.92]) assert pytest.approx(0.7741447, 0.0000001) ==\ distance.early_abandoned_dist(a, b, np.inf) assert 1 == np.isnan(distance.early_abandoned_dist(a, b, 0.1))
def find_best_discord_brute_force(series, win_size, global_registry, z_threshold=0.01): """Early-abandoned distance-based discord discovery.""" best_so_far_distance = -1.0 best_so_far_index = -1 outerRegistry = global_registry.clone() outer_idx = outerRegistry.get_next_unvisited() while ~np.isnan(outer_idx): outerRegistry.mark_visited(outer_idx) candidate_seq = znorm(series[outer_idx:(outer_idx + win_size)], z_threshold) nnDistance = np.inf innerRegistry = VisitRegistry(len(series) - win_size) inner_idx = innerRegistry.get_next_unvisited() while ~np.isnan(inner_idx): innerRegistry.mark_visited(inner_idx) if abs(inner_idx - outer_idx) > win_size: curr_seq = znorm(series[inner_idx:(inner_idx + win_size)], z_threshold) dist = early_abandoned_dist(candidate_seq, curr_seq, nnDistance) if (~np.isnan(dist)) and (dist < nnDistance): nnDistance = dist inner_idx = innerRegistry.get_next_unvisited() if ~(np.inf == nnDistance) and (nnDistance > best_so_far_distance): best_so_far_distance = nnDistance best_so_far_index = outer_idx outer_idx = outerRegistry.get_next_unvisited() return (best_so_far_index)
def find_best_discord_brute_force(series, win_size, global_registry, z_threshold=0.01): """Early-abandoned distance-based discord discovery.""" best_so_far_distance = -1.0 best_so_far_index = -1 outerRegistry = global_registry.clone() # 随机找到一个未看过的index outer_idx = outerRegistry.get_next_unvisited() # 若 outer_idx 不是nan值 则进入循环 ~按位取反 while ~np.isnan(outer_idx): # 标记看过outer_idx outerRegistry.mark_visited(outer_idx) # 标准化候选子序列 开始索引outer_indx 结束索引outer_idx+win_size-1 candidate_seq = znorm(series[outer_idx:(outer_idx + win_size)], z_threshold) # 与candidate_seq的开始索引相距不小于窗口大小的 开始索引所代表的子序列 与其的最小距离 (两子序列形状相似) nnDistance = np.inf # 为什么不是 len(series) - win_size + 1 ??? innerRegistry = VisitRegistry(len(series) - win_size) inner_idx = innerRegistry.get_next_unvisited() # 遍历所有开始索引 在两子序列距离大于窗口大小的条件下 找到与candidate_seq的最近距离nnDistance while ~np.isnan(inner_idx): innerRegistry.mark_visited(inner_idx) # 若 inner_indx 与 outer_idx 距离 大于 窗口大小 即两子序列不能有重复部分且不相邻 if abs(inner_idx - outer_idx) > win_size: curr_seq = znorm(series[inner_idx:(inner_idx + win_size)], z_threshold) # 计算 标准化后两序列的欧式距离 dist = early_abandoned_dist(candidate_seq, curr_seq, nnDistance) # 更新 nnDistance 使其逐渐变小 if (~np.isnan(dist)) and (dist < nnDistance): nnDistance = dist inner_idx = innerRegistry.get_next_unvisited() # 更新 best_so_far_distance 和 best_so_far_index """ best_so_far_distance max(min(distance)) 相似性最小的子序列 与 距离最近的子序列 的距离 best_so_far_index 当前时间序列的异常子序列 开始索引 这段子序列在当前时间序列中与其他子序列的相似性是最小的 """ if ~(np.inf == nnDistance) and (nnDistance > best_so_far_distance): best_so_far_distance = nnDistance best_so_far_index = outer_idx outer_idx = outerRegistry.get_next_unvisited() return (best_so_far_index, best_so_far_distance)