def multi_knee(get_knee: typing.Callable, points: np.ndarray, t1: float = 0.99, t2: int = 3) -> np.ndarray: """ Wrapper that convert a single knee point detection into a multi knee point detector. It uses recursion on the left and right parts of the curve after detecting the current knee. Args: get_knee (typing.Callable): method that returns a single knee point points (np.ndarray): numpy array with the points (x, y) t1 (float): the coefficient of determination used as a threshold (default 0.99) t2 (int): the mininum number of points used as a threshold (default 3) Returns: np.ndarray: knee points on the curve """ stack = [(0, len(points))] knees = [] while stack: left, right = stack.pop() pt = points[left:right] if len(pt) > t2: coef = linear_fit_points(pt) if linear_r2_points(pt, coef) < t1: rv = get_knee(pt) if rv is not None: idx = rv + left knees.append(idx) stack.append((left, idx+1)) stack.append((idx+1, right)) knees.sort() return np.array(knees)
def multi_knee(get_knee: typing.Callable, points: np.ndarray, t1: float = 0.01, t2: int = 3, cost: lf.Linear_Metrics = lf.Linear_Metrics.rmspe) -> np.ndarray: """ Wrapper that convert a single knee point detection into a multi knee point detector. It uses recursion on the left and right parts of the curve after detecting the current knee. Args: get_knee (typing.Callable): method that returns a single knee point points (np.ndarray): numpy array with the points (x, y) t1 (float): the coefficient of determination used as a threshold (default 0.01) t2 (int): the mininum number of points used as a threshold (default 3) cost (lf.Linear_Metrics): the cost method used to evaluate a point set (default: lf.Linear_Metrics.rmspe) Returns: np.ndarray: knee points on the curve """ stack = [(0, len(points))] knees = [] while stack: left, right = stack.pop() pt = points[left:right] if len(pt) > t2: if len(pt) <= 2: if cost is lf.Linear_Metrics.rmspe: r = 0.0 else: r = 1.0 else: coef = lf.linear_fit_points(pt) if cost is lf.Linear_Metrics.rmspe: r = lf.rmspe_points(pt, coef) else: r = lf.linear_r2_points(pt, coef) curved = r >= t1 if cost is lf.Linear_Metrics.rmspe else r < t1 #coef = lf.linear_fit_points(pt) # if lf.linear_r2_points(pt, coef) < t1: if curved: rv = get_knee(pt) if rv is not None: idx = rv + left knees.append(idx) stack.append((left, idx+1)) stack.append((idx+1, right)) knees.sort() return np.array(knees)
def compute_cost_sequence(points: np.ndarray, reduced, cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd, distance: RDP_Distance = RDP_Distance.shortest): # sort indexes reduced.sort() # select the distance metric to be used distance_points = None if distance is RDP_Distance.shortest: distance_points = lf.shortest_distance_points elif distance is RDP_Distance.perpendicular: distance_points = lf.perpendicular_distance_points else: distance_points = lf.shortest_distance_points left = 0 for right in reduced: pt = points[left:right] coef = lf.linear_fit_points(pt)
def auto_knees(points: np.ndarray, t: float = 1.0, sensitivity: float = 1.0, p: PeakDetection = PeakDetection.Kneedle) -> np.ndarray: """Returns the index of the knees point based on the Kneedle method. This implementation uses an heuristic to automatically define the direction and rotation of the concavity. Furthermore, it support three different methods to select the relevant knees: 1. Kneedle : classical algorithm 2. Significant: significant knee peak detection 3. ZScore : significant knee peak detection based on zscore Args: points (np.ndarray): numpy array with the points (x, y) t (float): tau of the side window used to smooth the curve sensitivity (float): controls the sensitivity of the peak detection p (PeakDetection): selects the peak detection method Returns: np.ndarray: the indexes of the knee points """ _, m = lf.linear_fit_points(points) if m > 0.0: cd = Direction.Increasing else: cd = Direction.Decreasing knees_1 = knees(points, sensitivity, t, cd, Concavity.Counterclockwise, p) knees_2 = knees(points, sensitivity, t, cd, Concavity.Clockwise, p) knees_idx = np.concatenate((knees_1, knees_2)) # np.concatenate generates float array when one is empty (see https://github.com/numpy/numpy/issues/8878) knees_idx = knees_idx.astype(int) knees_idx = np.unique(knees_idx) knees_idx.sort() return knees_idx
def rdp(points: np.ndarray, r: float = 0.9) -> tuple: """ Ramer–Douglas–Peucker (RDP) algorithm. Is an algorithm that decimates a curve composed of line segments to a similar curve with fewer points. This version uses the coefficient of determination to decided whenever to keep or remove a line segment. Args: points (np.ndarray): numpy array with the points (x, y) r(float): the coefficient of determination threshold (default 0.9) Returns: tuple: the reduced space, the points that were removed """ if len(points) <= 2: determination = 1.0 else: coef = lf.linear_fit_points(points) determination = lf.linear_r2_points(points, coef) if determination < r: d = perpendicular_distance_points(points, points[0], points[-1]) index = np.argmax(d) left, left_points = rdp(points[0:index + 1], r) right, right_points = rdp(points[index:len(points)], r) points_removed = np.concatenate((left_points, right_points), axis=0) return np.concatenate((left[0:len(left) - 1], right)), points_removed else: rv = np.empty([2, 2]) rv[0] = points[0] rv[1] = points[-1] points_removed = np.array([[points[0][0], len(points) - 2.0]]) return rv, points_removed
def auto_knee(points: np.ndarray, t: float = 1.0) -> int: """Returns the index of the knee point based on the Kneedle method. This implementation uses an heuristic to automatically define the direction and rotation of the concavity. Args: points (np.ndarray): numpy array with the points (x, y) t (float): tau of the side window used to smooth the curve Returns: int: the index of the knee point """ b, m = lf.linear_fit_points(points) if m > 0.0: cd = Direction.Increasing else: cd = Direction.Decreasing y = points[:, 1] yhat = np.empty(len(points)) for i in range(0, len(points)): yhat[i] = points[i][0] * m + b vote = np.sum(y - yhat) if cd is Direction.Increasing and vote > 0: cc = Concavity.Clockwise elif cd is Direction.Increasing and vote <= 0: cc = Concavity.Counterclockwise elif cd is Direction.Decreasing and vote > 0: cc = Concavity.Clockwise else: cc = Concavity.Counterclockwise return single_knee(points, t, cd, cc)
def test_r2_two(self): points = np.array([[0.0, 1.0], [1.0, 5.0]]) coef = lf.linear_fit_points(points) result = lf.linear_r2_points(points, coef) desired = 1.0 self.assertEqual(result, desired)
def filter_clusters( points: np.ndarray, knees: np.ndarray, clustering: typing.Callable[[np.ndarray, float], np.ndarray], t: float = 0.01, method: kr.ClusterRanking = kr.ClusterRanking.linear) -> np.ndarray: """ Filter the knee points based on clustering. For each cluster a single point is selected based on the ranking. The ranking is computed based on the slope and the improvement (on the y axis). Args: points (np.ndarray): numpy array with the points (x, y) knees (np.ndarray): knees indexes clustering (typing.Callable[[np.ndarray, float]): the clustering function t (float): the threshold for merging (in percentage, default 0.01) method (ranking.ClusterRanking): represents the direction of the ranking within a cluster (default ranking.ClusterRanking.linear) Returns: np.ndarray: the filtered knees """ if method is kr.ClusterRanking.hull: hull = ch.graham_scan_lower(points) logger.info(f'hull {len(hull)}') x = points[:, 0] y = points[:, 1] if len(knees) <= 1: return knees else: knee_points = points[knees] clusters = clustering(knee_points, t) max_cluster = clusters.max() filtered_knees = [] for i in range(0, max_cluster + 1): current_cluster = knees[clusters == i] #logger.info(f'Cluster {i} with {len(current_cluster)} elements') if len(current_cluster) > 1: if method is kr.ClusterRanking.hull: # select the hull points that exist within the cluster a, b = current_cluster[[0, -1]] #logger.info(f'Bounds [{a}, {b}]') idx = (hull >= a) * (hull <= b) hull_within_cluster = hull[idx] #logger.info(f'Hull (W\\C) {hull_within_cluster} ({len(hull_within_cluster)})') # only consider clusters with at least a single hull point rankings = np.zeros(len(current_cluster)) if len(hull_within_cluster) > 1: length = x[b + 1] - x[a - 1] for cluster_idx in range(len(current_cluster)): j = current_cluster[cluster_idx] if j in hull_within_cluster: length_l = (x[j] - x[a - 1]) / length length_r = (x[b + 1] - x[j]) / length left = points[a - 1:j + 1] right = points[j:b + 2] coef_l = lf.linear_fit_points(left) coef_r = lf.linear_fit_points(right) #r_l = lf.linear_residuals(x[a-1:j+1], y[a-1:j+1], coef_l) #r_r = lf.linear_residuals(x[j:b+2], y[j:b+2], coef_r) #r_l = lf.rmse_points(left, coef_l) #r_r = lf.rmse_points(right, coef_r) r_l = np.sum( lf.shortest_distance_points( left, left[0], left[-1])) r_r = np.sum( lf.shortest_distance_points( right, right[0], right[-1])) current_error = r_l * length_l + r_r * length_r rankings[cluster_idx] = current_error else: rankings[cluster_idx] = -1.0 # replace all -1 with maximum distance #logger.info(f'CHR {rankings}') rankings[rankings < 0] = np.amax(rankings) rankings = kr.distance_to_similarity(rankings) #logger.info(f'CHRF {rankings}') elif len(hull_within_cluster) == 1: for cluster_idx in range(len(current_cluster)): j = current_cluster[cluster_idx] if j in hull_within_cluster: rankings[cluster_idx] = 1.0 else: rankings = None else: rankings = kr.smooth_ranking(points, current_cluster, method) # Compute relative ranking if rankings is None: best_knee = None else: rankings = kr.rank(rankings) #logger.info(f'Rankings {rankings}') # Min Max normalization #rankings = (rankings - np.min(rankings))/np.ptp(rankings) idx = np.argmax(rankings) best_knee = knees[clusters == i][idx] else: if method is kr.ClusterRanking.hull: knee = knees[clusters == i][0] if knee in hull: best_knee = knee else: best_knee = None else: best_knee = knees[clusters == i][0] if best_knee is not None: filtered_knees.append(best_knee) """# plot clusters within the points plt.plot(x, y) plt.plot(x[current_cluster], y[current_cluster], 'ro') if method is kr.ClusterRanking.hull: plt.plot(x[hull], y[hull], 'g+') plt.plot(x[best_knee], y[best_knee], 'yx') plt.show()""" return np.array(filtered_knees)
def rdp(points: np.ndarray, t: float = 0.01, cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd, distance: RDP_Distance = RDP_Distance.shortest) -> tuple: """ Ramer–Douglas–Peucker (RDP) algorithm. Is an algorithm that decimates a curve composed of line segments to a similar curve with fewer points. This version uses different cost functions to decided whenever to keep or remove a line segment. Args: points (np.ndarray): numpy array with the points (x, y) t (float): the coefficient of determination threshold (default 0.01) cost (lf.Linear_Metrics): the cost method used to evaluate a point set (default: lf.Linear_Metrics.rmspe) distance (RDP_Distance): the distance metric used to decide the split point (default: RDP_Distance.shortest) Returns: tuple: the index of the reduced space, the points that were removed """ stack = [(0, len(points))] reduced = [] removed = [] # select the distance metric to be used distance_points = None if distance is RDP_Distance.shortest: distance_points = lf.shortest_distance_points elif distance is RDP_Distance.perpendicular: distance_points = lf.perpendicular_distance_points else: distance_points = lf.shortest_distance_points while stack: left, right = stack.pop() pt = points[left:right] if len(pt) <= 2: if cost is lf.Linear_Metrics.r2: r = 1.0 else: r = 0.0 else: coef = lf.linear_fit_points(pt) if cost is lf.Linear_Metrics.r2: r = lf.linear_r2_points(pt, coef) elif cost is lf.Linear_Metrics.rmspe: r = lf.rmspe_points(pt, coef) elif cost is lf.Linear_Metrics.rmsle: r = lf.rmsle_points(pt, coef) else: r = lf.rpd_points(pt, coef) curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t if curved: d = distance_points(pt, pt[0], pt[-1]) index = np.argmax(d) stack.append((left + index, left + len(pt))) stack.append((left, left + index + 1)) else: reduced.append(left) removed.append([left, len(pt) - 2.0]) reduced.append(len(points) - 1) return np.array(reduced), np.array(removed)
def grdp(points: np.ndarray, t: float = 0.01, cost: lf.Linear_Metrics = lf.Linear_Metrics.rpd, distance: RDP_Distance = RDP_Distance.shortest) -> tuple: stack = [(0, len(points))] reduced = [] removed = [] curved = True while curved: _, left, right = stack.pop() pt = points[left:right] d = distance_points(pt, pt[0], pt[-1]) index = np.argmax(d) # add the relevant point to the reduced set reduced.append(left + index) # compute the cost of the left and right parts left_cost = np.max(distance_points(pt[0:index + 1], pt[0], pt[index])) right_cost = np.max(distance_points(pt[index:len(pt)], pt[0], pt[-1])) # Add the points to the stack stack.append((right_cost, left + index, left + len(pt))) stack.append((left_cost, left, left + index + 1)) # Sort the stack based on the cost stack.sort(key=lambda t: t[0]) length -= 1 # compute the cost of the current solution curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t # add first and last points reduced.append(0) reduced.append(len(points) - 1) # sort indexes reduced.sort() return np.array(reduced) while stack: left, right = stack.pop() pt = points[left:right] if len(pt) <= 2: if cost is lf.Linear_Metrics.r2: r = 1.0 else: r = 0.0 else: coef = lf.linear_fit_points(pt) if cost is lf.Linear_Metrics.r2: r = lf.linear_r2_points(pt, coef) elif cost is lf.Linear_Metrics.rmspe: r = lf.rmspe_points(pt, coef) elif cost is lf.Linear_Metrics.rmsle: r = lf.rmsle_points(pt, coef) else: r = lf.rpd_points(pt, coef) curved = r < t if cost is lf.Linear_Metrics.r2 else r >= t if curved: d = distance_points(pt, pt[0], pt[-1]) index = np.argmax(d) stack.append((left + index, left + len(pt))) stack.append((left, left + index + 1)) else: reduced.append(left) removed.append([left, len(pt) - 2.0]) reduced.append(len(points) - 1) return np.array(reduced), np.array(removed)