Пример #1
0
def _kmeans(descriptors):
    """ Map a set of brisk descriptors (each descriptor is 64 floats) into a
        pre-training cluster space. Each descriptor maps to an integer (i.e.
        the cluster id) so we transform the (n,64) descriptor matrix into
        an n-vector. A k-histogram of the n-vector is returned.  """

    kmeans_model = KMeans(centroids=centroids)

    cluster_ids = [kmeans_model.classify(d) for d in descriptors]
    """ Take the multiplicity of each cluster id in the mapped descriptor space.
        The n-vector of cluster ids becomes a histogram of cardinality 128 """
    histogram = [0] * brisk_constants.N_CLUSTERS

    for cluster_id in set(cluster_ids):
        histogram[cluster_id] = cluster_ids.count(cluster_id)

    return histogram
Пример #2
0
kmeans.convergence = 0.001  # 收敛误差
kmeans.setup()  # 在开始训练前跑个设定


# 每迭代的回呼函式
def iteration_callback(iteration_times, groups):
    print("iteration %r" % iteration_times)
    for group in groups:
        print("新旧群心 (%r) : % r -> %r" %
              (group.tag, group.old_center, group.center))


# 完成训练时的回呼函式
def completion_callback(iteration_times, groups, sse):
    print("completion %r and sse %r" % (iteration_times, sse))
    for group in groups:
        print("最终的群心 (%r): %r, %r" % (group.tag, group.center, group.samples))


# 开始训练
kmeans.run(iteration_callback, completion_callback)


# 对新样本进行分类后的结果回呼函式
def classified_callback(point, group):
    print("%r classified to %r" % (point, group.tag))


# 用已训练好的模型来对新样本进行分类
kmeans.classify([[3, 4], [5, 6]], classified_callback)
Пример #3
0
def main():
    print("Clustering Fun\n")

    verbose = False
    k = None
    plot = False
    num_points = None
    csv_path = None
    lim_distance = None
    argCount = len(sys.argv)
    args = sys.argv
    locations = None

    for i, arg in enumerate(args):
        if i == 0:
            continue  # ignore script name

        if arg.startswith("-"):
            if arg.startswith('-k='):
                k = int(arg.split('=', 1)[1])
            elif arg.startswith('-n='):
                num_points = int(arg.split('=', 1)[1])
            elif arg.startswith('-l='):
                lim_distance = int(arg.split('=', 1)[1])
            elif arg == "-v":
                verbose = True
            elif arg == "-p":
                plot = True
            else:
                print(f"Unknown argument: {arg}")
                print_usage()
                exit(1)
        elif not csv_path:
            csv_path = arg
        else:
            print(f"Too many parameters!")
            print_usage()
            exit(1)

    if not csv_path and not num_points or not k:
        print(f"Too few parameters!")
        print_usage()
        exit(1)

    if verbose:
        print_options(verbose, csv_path, k, plot, lim_distance)

    if csv_path:
        locations = read_csv(csv_path)
        if verbose:
            print(f"\nRead {len(locations)} locations from {csv_path}")
    else:
        locations = generate_points_gauss(num_points, k)

    print(f"\nSetting up K-Means Classifier:")
    km = KMeans(locations, k, verbose, dist_limit=lim_distance)

    print(f"Finding best initial center points with K-Means++")
    centers = np.array(np.empty)
    min_var = None
    for i in range(8):
        tmp_centers, variance = km.init_centers()
        if verbose:
            print(f"  Iteration {i}, variance = {variance} km")
        if min_var is None or variance < min_var:
            min_var = variance
            centers = np.copy(tmp_centers)

    if plot and verbose:
        plot_locations(
            locations,
            k,
            centers,
            img_tag=f"{'' if not csv_path else csv_path+'_'}initial_state")

    centers, locations = km.classify(centers)

    if plot:
        plot_locations(locations,
                       k,
                       centers,
                       img_tag=f"{'' if not csv_path else csv_path+'_'}")

    print(f"\nClass centers:")
    for idx, c in enumerate(centers):
        print(f'  Class = {idx}:')
        print(f'    Center = {c}')
        print(
            f'    Farthest point from center = {max([loc.class_distance for loc in locations if loc.classification == idx])} km'
        )
        print(
            f'    Attribute1 = {any(loc.attr1 for loc in locations if loc.classification == idx)} (Logical OR)'
        )
        print(
            f'    Attribute2 = {any(loc.attr2 for loc in locations if loc.classification == idx)} (Logical OR)'
        )
        print()

    if lim_distance:
        print(f"\nWith distance limit of {lim_distance} km:")
        print(
            f"  {len([loc for loc in locations if loc.classification == None])} locations are left unclassified"
        )