Пример #1
0
def get_sorted_distances_from_cluster(cluster_number):
    '''
    get clusters in order of least-greatest distance from
    a certain cluster
    :param cluster_number: the reference cluster
    :return: list of clusters ordered by distance
    '''
    kmeans_clusters_request = get_kmeans_clusters()
    if kmeans_clusters_request["error"]:
        print(kmeans_clusters_request["error"])
        return None
    kmeans_clusters = kmeans_clusters_request["data"]
    cluster = None
    distances = {}
    sorted_distances = []
    for c in kmeans_clusters:
        if c.cluster_number == cluster_number:
            cluster = c
    if cluster == None:
        return {}
    for other_cluster in kmeans_clusters:
        if other_cluster.cluster_number != cluster.cluster_number:
            distances[
                other_cluster.cluster_number] = distance_between_coordinates(
                    cluster.get_coordinates(), other_cluster.get_coordinates())
    for key, value in sorted(distances.items(), key=lambda item: item[1]):
        sorted_distances.append(key)
    return sorted_distances
Пример #2
0
def find_closest_cluster(cluster_number):
    '''
    Find the closest cluster to a specific cluster
    :param cluster_number: reference cluster
    :return: number of closest cluster
    '''
    cluster_request = get_kmeans_cluster(cluster_number)
    clusters_request = get_kmeans_clusters()
    closest_cluster = cluster_number
    min_distance = sys.maxint
    if cluster_request["error"]:
        print(cluster_request["error"])
        return closest_cluster
    if clusters_request["error"]:
        print(clusters_request["error"])
        return closest_cluster
    cluster = cluster_request["data"]
    clusters = clusters_request["data"]
    cluster_coordinates = cluster.get_coordinates()
    if not cluster:
        return closest_cluster
    for c in clusters:
        if cluster.cluster_number != c.cluster_number:
            distance = distance_between_coordinates(cluster_coordinates,
                                                    c.get_coordinates())
            if distance < min_distance:
                min_distance = distance
                closest_cluster = c.cluster_number
    return closest_cluster
Пример #3
0
def cluster_size_upper_threshold():
    '''
    get the size threshold for compatible clusters
    calculations
    :return: size threshold
    '''
    clusters_request = get_kmeans_clusters()
    if clusters_request["error"]:
        print(clusters_request["error"])
        return 0
    clusters = clusters_request["data"]
    cluster_lengths = [len(cluster.ingredients) for cluster in clusters]
    kmeans_stats = find_stats(cluster_lengths)
    return kmeans_stats["mean"]
Пример #4
0
def largest_cluster_size():
    '''
    Get the largest cluster size
    :return: size of largest cluster
    '''
    clusters_request = get_kmeans_clusters()
    max_size = 0
    if clusters_request["error"]:
        print(clusters_request["error"])
        return max_size
    clusters = clusters_request["data"]
    for cluster in clusters:
        if len(cluster.ingredients) < max_size:
            max_size = len(cluster.ingredients)
    return max_size
Пример #5
0
def smallest_cluster_size():
    '''
    Find the size of the smallest cluster
    :return: size of smallest cluster
    '''
    clusters_request = get_kmeans_clusters()
    min_size = sys.maxint
    if clusters_request["error"]:
        print(clusters_request["error"])
        return min_size
    clusters = clusters_request["data"]
    for cluster in clusters:
        if len(cluster.ingredients) < min_size:
            min_size = len(cluster.ingredients)
    return min_size
Пример #6
0
def create_mean_shift_clusters():
    clusters_request = get_kmeans_clusters()
    if clusters_request["error"]:
        return None
    clusters = clusters_request["data"]
    keys = []
    mean_shift_coordinates = []
    for cluster in clusters:
        mean_shift_coordinates.append(cluster.get_coordinates_list())
        keys.append(cluster.cluster_number)
    mean_shift_array = np.array(mean_shift_coordinates)
    bandwidth = estimate_bandwidth(mean_shift_array, quantile=0.1)
    clustering = MeanShift(bandwidth=bandwidth).fit(mean_shift_array)
    for i, key in enumerate(keys):
        print(str(key) + ": " + str(clustering.labels_[i]))
Пример #7
0
def kmeans_test():
    '''

    :return:
    '''

    clusters_request = get_kmeans_clusters()
    if clusters_request["error"] != None:
        raise (clusters_request["error"])
    clusters = clusters_request["data"]
    cluster_distances = get_all_kmeans_cluster_distances_dictionary()
    cluster_lengths = [len(cluster.ingredients) for cluster in clusters]
    kmeans_stats = find_stats(cluster_lengths)
    with open(os.getcwd() + '/app/test/kmeans_clusters.txt', 'w') as textfile:
        textfile.write("Size stats\n")
        textfile.write("-------------------------\n")
        textfile.write("Mean: " + str(kmeans_stats["mean"]))
        textfile.write("\n")
        textfile.write("Median: " + str(kmeans_stats["median"]))
        textfile.write("\n")
        textfile.write("St Dev: " + str(kmeans_stats["stdev"]))
        textfile.write("\n")
        textfile.write("Quartile 1: " + str(kmeans_stats["qt1"]))
        textfile.write("\n")
        textfile.write("Quartile 2: " + str(kmeans_stats["qt2"]))
        textfile.write("\n")
        textfile.write("-------------------------\n")
        textfile.write("\n")
        for x, cluster in enumerate(clusters):
            textfile.write("Cluster " + str(x) + "\n")
            textfile.write("-------------------------\n")
            textfile.write("Cluster size: " + str(len(cluster.ingredients)) +
                           "\n")
            textfile.write(",".join(cluster.get_ingredient_strings()) + "\n")
            textfile.write("-------------------------\n")
            textfile.write("\n")
        textfile.write("\n")
        for i, entry in enumerate(cluster_distances):
            textfile.write("Distances from Cluster " + str(i) + "\n")
            textfile.write("-------------------------\n")
            for key, value in sorted(entry.items(), key=lambda item: item[1]):
                textfile.write("%s: %s \n" % (key, value))
            textfile.write("-------------------------\n")
            textfile.write("\n")
Пример #8
0
def get_all_kmeans_cluster_distances():
    '''
    get the distances from each k-means cluster to the other
    :return: list of cluster distances with indices
    corresponding to cluster number
    '''
    kmeans_clusters_request = get_kmeans_clusters()
    if kmeans_clusters_request["error"]:
        print(kmeans_clusters_request["error"])
        return None
    kmeans_clusters = kmeans_clusters_request["data"]
    cluster_distances = [None] * len(kmeans_clusters)
    for c1 in kmeans_clusters:
        distances = [None] * len(kmeans_clusters)
        for c2 in kmeans_clusters:
            distances[c2.cluster_number] = distance_between_coordinates(
                c1.get_coordinates(), c2.get_coordinates())
        cluster_distances[c1.cluster_number] = distances
    return cluster_distances