Пример #1
0
def kMeans(data_set, score_funcs, k):
    assert (k <= len(data_set))
    results_list = []
    old_centroids = _initializeCentroids(k, data_set)
    clusters = _group_points(data_set, old_centroids)

    results_list.append(Analyze.analyze_clusters(clusters, score_funcs))

    new_centroids = _findMeanVectors(clusters, data_set)
    while not _hasConverged(old_centroids, new_centroids):
        old_centroids = new_centroids
        clusters = _group_points(data_set, new_centroids)
        # keep recording the data
        results_list.append(Analyze.analyze_clusters(clusters, score_funcs))
        new_centroids = _findMeanVectors(clusters, data_set)
    # last item is a repeat:
    return results_list[:-1]
Пример #2
0
def dbscan(data_pts, radius, minpts, score_funcs=None):
    # labels is a dictionary with points as keys and values as the cluster label
    labels = _cluster(data_pts, radius, minpts)

    # Assign points to cluster making labels keys and values list of points belonging to individual clusters
    clusters = defaultdict(list)
    for key, label in labels.items():
        clusters[label].append(key)

    # Calculate cluster sse
    result = [Analyze.analyze_clusters(clusters, score_funcs)]

    return result
Пример #3
0
def ACO(dataset, iterations, num_clusters, num_ants, beta, prob_cutoff,
        num_elite_ants, decay_rate, q, score_funcs):
    ''' The main function for the ACO algorithm. Takes in the dataset to be clustered, 
        maximum number of iterations, the number of ants to be included, and the score
        functions to be used. Creates individual ants, tracks the pheromone matrix,
        and updates the best clustering found so far. '''

    pheromone_matrix = _initialize_pheromones(dataset, num_clusters)
    ants = _initialize_ants(dataset, num_ants, num_clusters, beta, prob_cutoff,
                            pheromone_matrix)
    best_score = iteration_best_score = float("inf")
    best_clustering = None
    results = []

    #_print_ant_info(ants)

    for iteration in range(iterations):

        #Loop through all data points and have all ants cluster each data point
        for point_number in range(len(dataset)):

            for i, ant in enumerate(ants):

                ant.update_beliefs()

        #After all data points have been assigned to a cluster for all ants, rank the ants by objective function
        rank_info = _rank_ants(ants)
        ants = [ranked_ant[0] for ranked_ant in rank_info.ants_and_scores]

        #Let the elite (best scoring) ants update the pheromone matrix, then update ants' matrices
        pheromone_matrix = _update_pheromones(pheromone_matrix,
                                              ants[0:num_elite_ants],
                                              decay_rate, q)
        _update_ants_pheromones(pheromone_matrix, ants)

        iteration_best_score = rank_info.best_score
        iteration_best_clustering = rank_info.best_clustering

        #If we found a better clustering this iteration, update the global best
        if iteration_best_score < best_score:
            best_score = iteration_best_score
            best_clustering = iteration_best_clustering

        #Reset the ants' memory lists
        _reset_ants(ants)

        #Score the best cluster, and append it to the list of values to be returned
        result = Analyze.analyze_clusters(best_clustering, score_funcs)
        results.append(result)

    return results
def competitive_learning(data_set, eta, num_clusters, iterations, score_funcs):
    ''' The main competitive learning algorithm. Creates a two layer network,
        then trains the weights of the network by updating the weights of the
        node with the strongest output for each training example '''

    #Initialize variables
    num_inputs = len(
        data_set[0])  # Number of inputs is equal to the number of features
    weight_layer = Layer.Layer(num_inputs, num_clusters, eta)
    results = []

    for iteration in range(iterations):

        #Train the network, score the resulting clustering, append the score
        #to the list of scores, and move on to next iteration
        weight_layer = _train_network(data_set, weight_layer, num_clusters)
        clustering = _cluster(data_set, weight_layer)
        result = Analyze.analyze_clusters(clustering, score_funcs)
        results.append(result)

    return results