def ransac_line(pts_homo, num_iter=24, kernel_size=4, threshold=5): assert pts_homo.ndim == 2 and pts_homo.shape[ 1] == 3, f"[ransac_line]: pts_homo has wrong shape {pts_homo.shape}!" num_pts = pts_homo.shape[0] # check number of points if num_pts < 2: print(f"[ransac_line]: not enough points {num_pts} < 2!") return None, 0, None, None # check kernel size if kernel_size > num_pts: print( f"[ransac_line]: kernel size {kernel_size} > number of points {num_pts}! Set kernel size = {num_pts}!" ) kernel_size = num_pts elif kernel_size < 2: print( f"[ransac_line]: kernel size {kernel_size} < minimal requirement 2! Set kernel size = 2!" ) kernel_size = 2 ## sample randomly or go through all possible combinations is_sampled = True if num_iter >= nCr( pts_homo.shape[0], kernel_size ): # number of iterations are more than the number of combinations cb = list(combinations(pts_homo, kernel_size)) num_iter = len(cb) is_sampled = False ## recorder best_kernel_pts = None # points used to estimate the line best_correctness = 0 best_line = None # line with highest correctness best_idx_fit = None # points support the best line ## RANSAC for i in range(num_iter): # sample/generate kernel if is_sampled: kernel_pts = pts_homo[np.random.choice( pts_homo.shape[0], kernel_size, replace=False), :] else: kernel_pts = np.vstack(cb[i]) # fit line line = fit_line(kernel_pts) # line normal parameters (a, b, c) # evaluate line correctness, idx_fit = evaluate_line(line, pts_homo, threshold) # update best records if correctness > best_correctness: best_kernel_pts = kernel_pts best_correctness = correctness best_line = line best_idx_fit = idx_fit # with open('before.txt', 'a') as f: # row = ' '.join(str(p) for p in best_line) # f.write(row + '\n') # estimate the best line using support points (SVD) best_line = fit_line(pts_homo[best_idx_fit, :]) # with open('after.txt', 'a') as f: # row = ' '.join(str(p) for p in best_line) # f.write(row + '\n') # exit() return best_line, best_correctness, best_idx_fit, best_kernel_pts
def get_true_cluster_eps(cluster, models, weights, nodes, fog_graph, param='weight', normalize=False): tuple_norms = [] num_nodes = len(cluster) for i in range(num_nodes): for j in range(i + 1, num_nodes): node_i, node_j = cluster[i], cluster[j] m_i, m_j = models[node_i].get(), models[node_j].get() w_i = torch.cat([ val.flatten() for _, val in m_i.state_dict().items() if param in _ ]) * weights[node_i] w_j = torch.cat([ val.flatten() for _, val in m_j.state_dict().items() if param in _ ]) * weights[node_j] tuple_norms.append(torch.norm(w_i - w_j).item()) models[node_i] = m_i.copy().send(nodes[node_i]) models[node_j] = m_j.copy().send(nodes[node_j]) assert len(tuple_norms) == nCr(num_nodes, 2) return max(tuple_norms)
def generate_lines(pts, num_lines, dist_thresh=1, correctness_thresh=4, kernel_size=2, faster=True, keep_trace=True): num_pts = pts.shape[0] if pts.shape == (num_pts, 2): pts_homo = np.hstack((pts, np.ones((num_pts, 1)))) elif pts.shape == (num_pts, 3): pts_homo = pts.copy() else: print(f"[generate_lines]: 'pts' has wrong shape: {pts.shape}!") exit() # fit line by ransac lines = list() # list of ndarray(3, ) terminals = list() # list of ndarray(2, 2) pts_target = pts_homo.copy() for i in tqdm(range(num_lines), desc="Generating Line", leave=keep_trace, position=0): # fit line num_target = pts_target.shape[0] # check number of points if num_target < 2: continue if faster: p = 0.999 s = kernel_size # epsilon = 1. - 0.1/(num_lines-i) # probability of outliers # max_iter = int(np.log2(1-p)/np.log2(1-(1-epsilon)**s+1e-8)) w = 1. / (num_lines - i)**2 # probability of inliers max_iter = int(np.log2(1 - p) / np.log2(1 - w**s + 1e-8)) else: max_iter = nCr(num_target, kernel_size) if max_iter < 1: max_iter = 1 line, correctness, idx_fit, _ = ransac_line( pts_target, num_iter=max_iter, kernel_size=kernel_size, threshold=dist_thresh ) # threshold: point to line distance upper bound # filter low correctness lines if correctness < correctness_thresh: # at least 'correctness_thresh' number of points fit to a line continue if idx_fit is None: # no more lines fit break pts_fit = pts_target[idx_fit, :-1] # ?x2 terminals.append(get_terminals(pts_fit)) # store line and terminal points lines.append(line) # remove points already fitted with lines pts_target = np.delete(pts_target, idx_fit, axis=0) # print correctness # tqdm.write(f"correctness = {correctness}") return lines, terminals
def ransac_line_paral(pts, num_iter=24, kernel_size=2, threshold=0.5): # create homogeneous coordinates of points assert pts.ndim == 2, f"[estimate_line_paral]: pts has invalid dimension {pts.ndim}!" pts_homo = homogenize_ncoord(pts, ncoord=2) num_pts = pts_homo.shape[0] # check number of points if num_pts < 2: print(f"[estimate_line_paral]: not enough points {num_pts} < 2!") return None, 0, None, None # check kernel size if kernel_size > num_pts: print( f"[estimate_line_paral]: kernel size {kernel_size} > number of points {num_pts}! Set kernel size = {num_pts}!" ) kernel_size = num_pts elif kernel_size < 2: print( f"[estimate_line_paral]: kernel size {kernel_size} < minimal requirement 2! Set kernel size = 2!" ) kernel_size = 2 # list all possible combinations if num_iter >= nCr( num_pts, kernel_size ): # number of iterations are more than that of combinations cb = list(combinations(pts_homo, kernel_size)) else: cb = list() for it in range(num_iter): cb.append(pts_homo[ np.random.choice(num_pts, kernel_size, replace=False), :]) # estimate lines parallelly pool = Pool(cpu_count()) tmp_func = partial(estimate_line, pts_homo=pts_homo, threshold=threshold, return_idx=False) correctness_list = pool.map(tmp_func, cb) assert len(correctness_list) == len( cb ), f"[estimate_line_paral]: Number of combinations {len(cb)} != Number of correctness {len(correctness_list)}!" pool.close() # find the line with highest correctness cb_idx = correctness_list.index(max(correctness_list)) best_idx_fit = estimate_line(cb[cb_idx], pts_homo, threshold) best_line = fit_line(pts_homo[best_idx_fit, :]) # check consistency best_correctness = correctness_list[cb_idx] assert len( best_idx_fit ) == best_correctness, f"[estimate_line_paral]: Correctness {best_correctness} != Number of support points {len(best_idx_fit)}!" return best_line, best_correctness, best_idx_fit, np.vstack(cb[cb_idx])
def generate_lines_paral(pts, num_lines, dist_thresh=1, correctness_thresh=8, kernel_size=2, faster=True, keep_trace=True): # create homogeneous coordinates of points assert pts.ndim == 2, f"[estimate_line_paral]: pts has invalid dimension {pts.ndim}!" pts_homo = homogenize_ncoord(pts, ncoord=2) # fit estimate lines lines = list() # list of ndarray(3, ) terminals = list() # list of ndarray(2, 2) pts_target = pts_homo.copy( ) # candidate points (homogeneous) for estimating ONE line for i in tqdm(range(num_lines), desc="Generating Line", leave=keep_trace, position=0): num_targets = pts_target.shape[0] # check number of points if num_targets < 2: continue if faster: p = 0.999 s = kernel_size w = 1. / (num_lines - i)**2 # probability of inliers max_iter = int(np.log2(1 - p) / np.log2(1 - w**s + 1e-8)) else: max_iter = nCr(num_targets, kernel_size) if max_iter < 1: max_iter = 1 line, correctness, idx_fit, _ = ransac_line_paral( pts_target, num_iter=max_iter, kernel_size=kernel_size, threshold=dist_thresh) # drop lines with low correctness if correctness < correctness_thresh: continue # no more lines can be estimated if idx_fit is None: break # calculate line terminal pairs from the support points pts_fit = pts_target[idx_fit, :-1] # ?x2 terminals.append(get_terminals(pts_fit)) # store the line lines.append(line) # remove points already fitted with lines pts_target = np.delete(pts_target, idx_fit, axis=0) return lines, terminals
def create_distance_model(digit_count: int, distance_func: Callable = None, model_name: str = None, enc_dist: bool = False, is_edit_distance: bool = False) -> Callable: """ the distance model is a model such that the probability of observing O given that A is the true value grows as O is more different than A and shrinks as the two are more similar. Intuition: This model is based on the intuition that whenever someone shuffles their lock, they will try as much as possible to shy away from their true code. This, however, may not be exactly true and could be too generalizing. For example, one may always put the same exact code whenever they leave their bike. Note: We automatically store all distance models and then check whether they are stored in the first place before creating a new one :param distance_func: a function that takes in observation, actual, and digit_count and outputs how far observation is from actual (or vice versa) :param model_name: name of the model chosen when created or now :param enc_dist: this is a boolean that, if True, increases the probability of further distances. The choice of how to increase the probability linearly was made somewhat arbitrarily. See below. :param is_edit_distance: this is a boolean that, if True, means that we're using an edit distance function. The reason this is important is because edit distances are symmetrical, so we don't need a mapping for each possible pairs of observation and actual. """ # try to load the model, if it fails, create it then save it try: distance_model_map = Models.load_model(model_name) assert distance_func is not None, 'we only store for non-null distance function' def prob_observation_given_actual(obs: int, actual: int) -> float: return distance_model_map[actual][distance_func( obs, actual, digit_count)] return prob_observation_given_actual except Exception as e: print('* failed to load model *') pass # if no distance function given, use the digit_distance function sample_space_size = 10**digit_count if distance_func is None: distance_func = Models.digit_distance # create the mapping to use for every time a digit needs to be replaced mapping, multiplier = {}, 1 for i in range(digit_count + 1): mapping[i] = utils.nCr(digit_count, i) * multiplier / sample_space_size multiplier *= 9 def prob_observation_given_actual(obs: int, actual: int) -> float: return mapping[distance_func(obs, actual, digit_count)] # no need to store in this case because it's efficient enough return prob_observation_given_actual # if we're using an edit distance function, we can create the mapping # more efficiently than every pairwise mapping of observation and actual if is_edit_distance: # create the mapping to use for every time a digit needs to be replaced mapping: dict = {} encouraging_distance: dict = {} actual = 0 for observation in range(10**digit_count): # the encouraging distance was made such that it doesn't increase # too fast until it starts reaching really high values (around 16) distance = distance_func(observation, actual, digit_count) if not distance in encouraging_distance: if enc_dist: encouraging_distance[distance] = 0 else: encouraging_distance[distance] = int(distance**1.7 + (2.1**distance / 1000)) mapping[distance] = mapping.get( distance, 0) + 1 + encouraging_distance[distance] def prob_observation_given_actual(obs: int, actual: int) -> float: return mapping[distance_func(obs, actual, digit_count)] # no need to store in this case because it's efficient enough return prob_observation_given_actual # get distance between all pairs for each potential actual, then # create a distribution to use for each. # WARNING: This is really inefficient and will take a very long time # runs in O(n^2), and n is usually in the set {10000, 100000}. so, we # amortize it by storing the model using the pickle package. prob_observation_given_actual_map: dict = {} # map distances to their encouraging distance to avoid multiple # computations. encouraging_distance = {} for actual in range(sample_space_size): mapping = {} for observation in range(sample_space_size): distance = distance_func(observation, actual, digit_count) # the encouraging distance was made such that it doesn't increase # too fast until it starts reaching really high values (around 16) if not distance in encouraging_distance: if enc_dist: encouraging_distance[distance] = 0 else: encouraging_distance[distance] = int(distance**1.7 + (2.1**distance / 1000)) mapping[distance] = mapping.get( distance, 0) + 1 + encouraging_distance[distance] # create a distribution to normalize the mapping temp_dist = Distribution(mapping) prob_observation_given_actual_map[actual] = { obs: temp_dist[obs] for obs in temp_dist } print('finished conditional of %d' % actual) def prob_observation_given_actual(obs: int, actual: int) -> float: distance = distance_func(obs, actual, digit_count) return prob_observation_given_actual_map[actual][distance] # store the model Models.store_model(prob_observation_given_actual_map, model_name) return prob_observation_given_actual