예제 #1
0
def ransac_line(pts_homo, num_iter=24, kernel_size=4, threshold=5):
    assert pts_homo.ndim == 2 and pts_homo.shape[
        1] == 3, f"[ransac_line]: pts_homo has wrong shape {pts_homo.shape}!"
    num_pts = pts_homo.shape[0]
    # check number of points
    if num_pts < 2:
        print(f"[ransac_line]: not enough points {num_pts} < 2!")
        return None, 0, None, None
    # check kernel size
    if kernel_size > num_pts:
        print(
            f"[ransac_line]: kernel size {kernel_size} > number of points {num_pts}! Set kernel size = {num_pts}!"
        )
        kernel_size = num_pts
    elif kernel_size < 2:
        print(
            f"[ransac_line]: kernel size {kernel_size} < minimal requirement 2! Set kernel size = 2!"
        )
        kernel_size = 2
    ## sample randomly or go through all possible combinations
    is_sampled = True
    if num_iter >= nCr(
            pts_homo.shape[0], kernel_size
    ):  # number of iterations are more than the number of combinations
        cb = list(combinations(pts_homo, kernel_size))
        num_iter = len(cb)
        is_sampled = False
    ## recorder
    best_kernel_pts = None  # points used to estimate the line
    best_correctness = 0
    best_line = None  # line with highest correctness
    best_idx_fit = None  # points support the best line
    ## RANSAC
    for i in range(num_iter):
        # sample/generate kernel
        if is_sampled:
            kernel_pts = pts_homo[np.random.choice(
                pts_homo.shape[0], kernel_size, replace=False), :]
        else:
            kernel_pts = np.vstack(cb[i])
        # fit line
        line = fit_line(kernel_pts)  # line normal parameters (a, b, c)
        # evaluate line
        correctness, idx_fit = evaluate_line(line, pts_homo, threshold)
        # update best records
        if correctness > best_correctness:
            best_kernel_pts = kernel_pts
            best_correctness = correctness
            best_line = line
            best_idx_fit = idx_fit
    # with open('before.txt', 'a') as f:
    #     row = ' '.join(str(p) for p in best_line)
    #     f.write(row + '\n')
    # estimate the best line using support points (SVD)
    best_line = fit_line(pts_homo[best_idx_fit, :])
    # with open('after.txt', 'a') as f:
    #     row = ' '.join(str(p) for p in best_line)
    #     f.write(row + '\n')
    # exit()
    return best_line, best_correctness, best_idx_fit, best_kernel_pts
예제 #2
0
def get_true_cluster_eps(cluster,
                         models,
                         weights,
                         nodes,
                         fog_graph,
                         param='weight',
                         normalize=False):
    tuple_norms = []
    num_nodes = len(cluster)
    for i in range(num_nodes):
        for j in range(i + 1, num_nodes):
            node_i, node_j = cluster[i], cluster[j]
            m_i, m_j = models[node_i].get(), models[node_j].get()
            w_i = torch.cat([
                val.flatten()
                for _, val in m_i.state_dict().items() if param in _
            ]) * weights[node_i]
            w_j = torch.cat([
                val.flatten()
                for _, val in m_j.state_dict().items() if param in _
            ]) * weights[node_j]
            tuple_norms.append(torch.norm(w_i - w_j).item())
            models[node_i] = m_i.copy().send(nodes[node_i])
            models[node_j] = m_j.copy().send(nodes[node_j])

    assert len(tuple_norms) == nCr(num_nodes, 2)

    return max(tuple_norms)
예제 #3
0
def generate_lines(pts,
                   num_lines,
                   dist_thresh=1,
                   correctness_thresh=4,
                   kernel_size=2,
                   faster=True,
                   keep_trace=True):
    num_pts = pts.shape[0]
    if pts.shape == (num_pts, 2):
        pts_homo = np.hstack((pts, np.ones((num_pts, 1))))
    elif pts.shape == (num_pts, 3):
        pts_homo = pts.copy()
    else:
        print(f"[generate_lines]: 'pts' has wrong shape: {pts.shape}!")
        exit()

    # fit line by ransac
    lines = list()  # list of ndarray(3, )
    terminals = list()  # list of ndarray(2, 2)
    pts_target = pts_homo.copy()
    for i in tqdm(range(num_lines),
                  desc="Generating Line",
                  leave=keep_trace,
                  position=0):
        # fit line
        num_target = pts_target.shape[0]
        # check number of points
        if num_target < 2: continue
        if faster:
            p = 0.999
            s = kernel_size
            # epsilon = 1. - 0.1/(num_lines-i) # probability of outliers
            # max_iter = int(np.log2(1-p)/np.log2(1-(1-epsilon)**s+1e-8))
            w = 1. / (num_lines - i)**2  # probability of inliers
            max_iter = int(np.log2(1 - p) / np.log2(1 - w**s + 1e-8))
        else:
            max_iter = nCr(num_target, kernel_size)
        if max_iter < 1: max_iter = 1
        line, correctness, idx_fit, _ = ransac_line(
            pts_target,
            num_iter=max_iter,
            kernel_size=kernel_size,
            threshold=dist_thresh
        )  # threshold: point to line distance upper bound
        # filter low correctness lines
        if correctness < correctness_thresh:  # at least 'correctness_thresh' number of points fit to a line
            continue
        if idx_fit is None:  # no more lines fit
            break
        pts_fit = pts_target[idx_fit, :-1]  # ?x2
        terminals.append(get_terminals(pts_fit))
        # store line and terminal points
        lines.append(line)
        # remove points already fitted with lines
        pts_target = np.delete(pts_target, idx_fit, axis=0)
        # print correctness
        # tqdm.write(f"correctness = {correctness}")
    return lines, terminals
예제 #4
0
def ransac_line_paral(pts, num_iter=24, kernel_size=2, threshold=0.5):
    # create homogeneous coordinates of points
    assert pts.ndim == 2, f"[estimate_line_paral]: pts has invalid dimension {pts.ndim}!"
    pts_homo = homogenize_ncoord(pts, ncoord=2)
    num_pts = pts_homo.shape[0]
    # check number of points
    if num_pts < 2:
        print(f"[estimate_line_paral]: not enough points {num_pts} < 2!")
        return None, 0, None, None
    # check kernel size
    if kernel_size > num_pts:
        print(
            f"[estimate_line_paral]: kernel size {kernel_size} > number of points {num_pts}! Set kernel size = {num_pts}!"
        )
        kernel_size = num_pts
    elif kernel_size < 2:
        print(
            f"[estimate_line_paral]: kernel size {kernel_size} < minimal requirement 2! Set kernel size = 2!"
        )
        kernel_size = 2
    # list all possible combinations
    if num_iter >= nCr(
            num_pts, kernel_size
    ):  # number of iterations are more than that of combinations
        cb = list(combinations(pts_homo, kernel_size))
    else:
        cb = list()
        for it in range(num_iter):
            cb.append(pts_homo[
                np.random.choice(num_pts, kernel_size, replace=False), :])
    # estimate lines parallelly
    pool = Pool(cpu_count())
    tmp_func = partial(estimate_line,
                       pts_homo=pts_homo,
                       threshold=threshold,
                       return_idx=False)
    correctness_list = pool.map(tmp_func, cb)
    assert len(correctness_list) == len(
        cb
    ), f"[estimate_line_paral]: Number of combinations {len(cb)} != Number of correctness {len(correctness_list)}!"
    pool.close()
    # find the line with highest correctness
    cb_idx = correctness_list.index(max(correctness_list))
    best_idx_fit = estimate_line(cb[cb_idx], pts_homo, threshold)
    best_line = fit_line(pts_homo[best_idx_fit, :])
    # check consistency
    best_correctness = correctness_list[cb_idx]
    assert len(
        best_idx_fit
    ) == best_correctness, f"[estimate_line_paral]: Correctness {best_correctness} != Number of support points {len(best_idx_fit)}!"

    return best_line, best_correctness, best_idx_fit, np.vstack(cb[cb_idx])
예제 #5
0
def generate_lines_paral(pts,
                         num_lines,
                         dist_thresh=1,
                         correctness_thresh=8,
                         kernel_size=2,
                         faster=True,
                         keep_trace=True):
    # create homogeneous coordinates of points
    assert pts.ndim == 2, f"[estimate_line_paral]: pts has invalid dimension {pts.ndim}!"
    pts_homo = homogenize_ncoord(pts, ncoord=2)
    # fit estimate lines
    lines = list()  # list of ndarray(3, )
    terminals = list()  # list of ndarray(2, 2)
    pts_target = pts_homo.copy(
    )  # candidate points (homogeneous) for estimating ONE line
    for i in tqdm(range(num_lines),
                  desc="Generating Line",
                  leave=keep_trace,
                  position=0):
        num_targets = pts_target.shape[0]
        # check number of points
        if num_targets < 2: continue
        if faster:
            p = 0.999
            s = kernel_size
            w = 1. / (num_lines - i)**2  # probability of inliers
            max_iter = int(np.log2(1 - p) / np.log2(1 - w**s + 1e-8))
        else:
            max_iter = nCr(num_targets, kernel_size)
        if max_iter < 1: max_iter = 1
        line, correctness, idx_fit, _ = ransac_line_paral(
            pts_target,
            num_iter=max_iter,
            kernel_size=kernel_size,
            threshold=dist_thresh)
        # drop lines with low correctness
        if correctness < correctness_thresh: continue
        # no more lines can be estimated
        if idx_fit is None: break
        # calculate line terminal pairs from the support points
        pts_fit = pts_target[idx_fit, :-1]  # ?x2
        terminals.append(get_terminals(pts_fit))
        # store the line
        lines.append(line)
        # remove points already fitted with lines
        pts_target = np.delete(pts_target, idx_fit, axis=0)
    return lines, terminals
    def create_distance_model(digit_count: int,
                              distance_func: Callable = None,
                              model_name: str = None,
                              enc_dist: bool = False,
                              is_edit_distance: bool = False) -> Callable:
        """
		the distance model is a model such that the probability of observing
		O given that A is the true value grows as O is more different than
		A and shrinks as the two are more similar.

		Intuition:
			This model is based on the intuition that whenever someone shuffles
			their lock, they will try as much as possible to shy away from their
			true code. This, however, may not be exactly true and could be too
			generalizing. For example, one may always put the same exact code
			whenever they leave their bike.

		Note:
			We automatically store all distance models and then check whether
			they are stored in the first place before creating a new one

		:param distance_func:
			a function that takes in observation, actual, and digit_count
			and outputs how far observation is from actual (or vice versa)
		:param model_name:
			name of the model chosen when created or now
		:param enc_dist:
			this is a boolean that, if True, increases the probability of
			further distances. The choice of how to increase the probability
			linearly was made somewhat arbitrarily. See below.
		:param is_edit_distance:
			this is a boolean that, if True, means that we're using an edit
			distance function. The reason this is important is because edit
			distances are symmetrical, so we don't need a mapping for each
			possible pairs of observation and actual.
		"""
        # try to load the model, if it fails, create it then save it
        try:
            distance_model_map = Models.load_model(model_name)
            assert distance_func is not None, 'we only store for non-null distance function'

            def prob_observation_given_actual(obs: int, actual: int) -> float:
                return distance_model_map[actual][distance_func(
                    obs, actual, digit_count)]

            return prob_observation_given_actual
        except Exception as e:
            print('* failed to load model *')
            pass

        # if no distance function given, use the digit_distance function
        sample_space_size = 10**digit_count
        if distance_func is None:
            distance_func = Models.digit_distance

            # create the mapping to use for every time a digit needs to be replaced
            mapping, multiplier = {}, 1
            for i in range(digit_count + 1):
                mapping[i] = utils.nCr(digit_count,
                                       i) * multiplier / sample_space_size
                multiplier *= 9

            def prob_observation_given_actual(obs: int, actual: int) -> float:
                return mapping[distance_func(obs, actual, digit_count)]

            # no need to store in this case because it's efficient enough
            return prob_observation_given_actual

        # if we're using an edit distance function, we can create the mapping
        # more efficiently than every pairwise mapping of observation and actual
        if is_edit_distance:
            # create the mapping to use for every time a digit needs to be replaced
            mapping: dict = {}
            encouraging_distance: dict = {}
            actual = 0
            for observation in range(10**digit_count):
                # the encouraging distance was made such that it doesn't increase
                # too fast until it starts reaching really high values (around 16)
                distance = distance_func(observation, actual, digit_count)
                if not distance in encouraging_distance:
                    if enc_dist:
                        encouraging_distance[distance] = 0
                    else:
                        encouraging_distance[distance] = int(distance**1.7 +
                                                             (2.1**distance /
                                                              1000))
                mapping[distance] = mapping.get(
                    distance, 0) + 1 + encouraging_distance[distance]

            def prob_observation_given_actual(obs: int, actual: int) -> float:
                return mapping[distance_func(obs, actual, digit_count)]

            # no need to store in this case because it's efficient enough
            return prob_observation_given_actual

        # get distance between all pairs for each potential actual, then
        # create a distribution to use for each.
        # WARNING: This is really inefficient and will take a very long time
        # runs in O(n^2), and n is usually in the set {10000, 100000}. so, we
        # amortize it by storing the model using the pickle package.
        prob_observation_given_actual_map: dict = {}
        # map distances to their encouraging distance to avoid multiple
        # computations.
        encouraging_distance = {}
        for actual in range(sample_space_size):
            mapping = {}
            for observation in range(sample_space_size):
                distance = distance_func(observation, actual, digit_count)
                # the encouraging distance was made such that it doesn't increase
                # too fast until it starts reaching really high values (around 16)
                if not distance in encouraging_distance:
                    if enc_dist:
                        encouraging_distance[distance] = 0
                    else:
                        encouraging_distance[distance] = int(distance**1.7 +
                                                             (2.1**distance /
                                                              1000))
                mapping[distance] = mapping.get(
                    distance, 0) + 1 + encouraging_distance[distance]
            # create a distribution to normalize the mapping
            temp_dist = Distribution(mapping)
            prob_observation_given_actual_map[actual] = {
                obs: temp_dist[obs]
                for obs in temp_dist
            }
            print('finished conditional of %d' % actual)

        def prob_observation_given_actual(obs: int, actual: int) -> float:
            distance = distance_func(obs, actual, digit_count)
            return prob_observation_given_actual_map[actual][distance]

        # store the model
        Models.store_model(prob_observation_given_actual_map, model_name)
        return prob_observation_given_actual