def template_clustering(file, map_size, trust_order, sync_order = 0.999, show_dyn = False, show_layer1 = False, show_layer2 = False, show_clusters = True): # Read sample sample = read_sample(file); # Create network network = syncsom(sample, map_size[0], map_size[1]); # Run processing (ticks, (dyn_time, dyn_phase)) = timedcall(network.process, trust_order, show_dyn, sync_order); print("Sample: ", file, "\t\tExecution time: ", ticks, "\n"); # Show dynamic of the last layer. if (show_dyn == True): draw_dynamics(dyn_time, dyn_phase, x_title = "Time", y_title = "Phase", y_lim = [0, 2 * 3.14]); if (show_clusters == True): clusters = network.get_som_clusters(); draw_clusters(network.som_layer.weights, clusters); # Show network stuff. if (show_layer1 == True): network.show_som_layer(); if (show_layer2 == True): network.show_sync_layer(); if (show_clusters == True): clusters = network.get_clusters(); draw_clusters(sample, clusters);
def cluster(number_clusters, iterations, maxneighbours): data = read_sample('data.data') m_clarans = clarans(data, number_clusters, iterations, maxneighbours) (ticks, result) = timedcall(m_clarans.process) print("Execution time: ", ticks, "\n") clusters = m_clarans.get_clusters() draw_clusters(data, clusters)
def template_clustering( start_centers, path, tolerance=0.025, criterion=splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore=False): sample = read_sample(path) xmeans_instance = xmeans(sample, start_centers, 20, tolerance, criterion, ccore) (ticks, result) = timedcall(xmeans_instance.process) clusters = xmeans_instance.get_clusters() criterion_string = "UNKNOWN" if (criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION): criterion_string = "BAYESIAN_INFORMATION_CRITERION" elif (criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH): criterion_string = "MINIMUM_NOISELESS_DESCRIPTION_LENGTH" print("Sample: ", path, "\nInitial centers: '", (start_centers is not None), "', Execution time: '", ticks, "', Number of clusters:", len(clusters), ",", criterion_string, "\n") draw_clusters(sample, clusters)
def template_clustering(file, radius, order, show_dyn=False, show_conn=False, show_clusters=True, ena_conn_weight=False, ccore_flag=True, tolerance=0.1): sample = read_sample(file) network = syncnet(sample, radius, enable_conn_weight=ena_conn_weight, ccore=ccore_flag) (ticks, analyser) = timedcall(network.process, order, solve_type.FAST, show_dyn) print("Sample: ", file, "\t\tExecution time: ", ticks, "\n") if (show_dyn == True): sync_visualizer.show_output_dynamic(analyser) sync_visualizer.animate(analyser) #sync_visualizer.animate_output_dynamic(analyser); #sync_visualizer.animate_correlation_matrix(analyser, colormap = 'hsv'); if ((show_conn == True) and (ccore_flag == False)): network.show_network() if (show_clusters == True): clusters = analyser.allocate_clusters(tolerance) print("amout of clusters: ", len(clusters)) draw_clusters(sample, clusters)
def template_clustering(number_clusters, path, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.0, type_measurement = measurement_type.CENTROID_EUCLIDIAN_DISTANCE, entry_size_limit = 200, ccore = True): sample = read_sample(path); birch_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, ccore); (ticks, result) = timedcall(birch_instance.process); print("Sample: ", path, "\t\tExecution time: ", ticks, "\n"); clusters = birch_instance.get_clusters(); draw_clusters(sample, clusters);
def template_clustering(number_clusters, path, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.0, type_measurement = measurement_type.CENTROID_EUCLIDIAN_DISTANCE, entry_size_limit = 200, ccore = True): sample = read_sample(path); birch_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, ccore) (ticks, result) = timedcall(birch_instance.process); print("Sample: ", path, "\t\tExecution time: ", ticks, "\n"); clusters = birch_instance.get_clusters(); draw_clusters(sample, clusters);
def template_clustering(number_clusters, path, iterations, maxneighbors): sample = read_sample(path); clarans_instance = clarans(sample, number_clusters, iterations, maxneighbors); (ticks, result) = timedcall(clarans_instance.process); print("Sample: ", path, "\t\tExecution time: ", ticks, "\n"); clusters = clarans_instance.get_clusters(); draw_clusters(sample, clusters);
def template_clustering(start_medians, path, tolerance=0.25): sample = read_sample(path) kmedians_instance = kmedians(sample, start_medians, tolerance) (ticks, result) = timedcall(kmedians_instance.process) clusters = kmedians_instance.get_clusters() print("Sample: ", path, "\t\tExecution time: ", ticks, "\n") draw_clusters(sample, clusters)
def template_clustering(start_centers, path, tolerance = 0.25): sample = read_sample(path); kmedians_instance = kmedians(sample, start_centers, tolerance); (ticks, result) = timedcall(kmedians_instance.process); clusters = kmedians_instance.get_clusters(); print("Sample: ", path, "\t\tExecution time: ", ticks, "\n"); draw_clusters(sample, clusters);
def template_clustering(file, number_clusters, arg_order = 0.999, arg_collect_dynamic = True, ccore_flag = False): sample = read_sample(file); network = hsyncnet(sample, number_clusters, ccore = ccore_flag); analyser = network.process(arg_order, collect_dynamic = arg_collect_dynamic); clusters = analyser.allocate_clusters(); if (arg_collect_dynamic == True): sync_visualizer.show_output_dynamic(analyser); draw_clusters(sample, clusters);
def template_clustering(path, radius, cluster_numbers, threshold, draw = True, ccore = True): sample = read_sample(path); rock_instance = rock(sample, radius, cluster_numbers, threshold, ccore); (ticks, result) = timedcall(rock_instance.process); clusters = rock_instance.get_clusters(); print("Sample: ", path, "\t\tExecution time: ", ticks, "\n"); if (draw == True): draw_clusters(sample, clusters);
def template_clustering(file, number_clusters, arg_order = 0.999, arg_collect_dynamic = True, ccore_flag = False): sample = read_sample(file); network = hsyncnet(sample, number_clusters, initial_neighbors = int(len(sample) * 0.15), osc_initial_phases = initial_type.EQUIPARTITION, ccore = ccore_flag); (ticks, analyser) = timedcall(network.process, arg_order, solve_type.FAST, arg_collect_dynamic); print("Sample: ", file, "\t\tExecution time: ", ticks, "\n"); clusters = analyser.allocate_clusters(); if (arg_collect_dynamic == True): sync_visualizer.show_output_dynamic(analyser); draw_clusters(sample, clusters);
def template_clustering(number_clusters, path, number_represent_points = 5, compression = 0.5, draw = True, ccore_flag = False): sample = read_sample(path); cure_instance = cure(sample, number_clusters, number_represent_points, compression, ccore_flag); (ticks, result) = timedcall(cure_instance.process); clusters = cure_instance.get_clusters(); print("Sample: ", path, "\t\tExecution time: ", ticks, "\n"); if (draw is True): if (ccore_flag is True): draw_clusters(sample, clusters); else: draw_clusters(None, clusters);
def template_clustering(file, number_clusters, arg_order=0.999, arg_collect_dynamic=True, ccore_flag=False): sample = read_sample(file) network = hsyncnet(sample, number_clusters, ccore=ccore_flag) analyser = network.process(arg_order, collect_dynamic=arg_collect_dynamic) clusters = analyser.allocate_clusters() if (arg_collect_dynamic == True): sync_visualizer.show_output_dynamic(analyser) draw_clusters(sample, clusters)
def template_clustering(start_centers, path, tolerance = 0.025, criterion = splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore = False): sample = read_sample(path); xmeans_instance = xmeans(sample, start_centers, 20, tolerance, criterion, ccore); (ticks, result) = timedcall(xmeans_instance.process); clusters = xmeans_instance.get_clusters(); criterion_string = "UNKNOWN"; if (criterion == splitting_type.BAYESIAN_INFORMATION_CRITERION): criterion_string = "BAYESIAN_INFORMATION_CRITERION"; elif (criterion == splitting_type.MINIMUM_NOISELESS_DESCRIPTION_LENGTH): criterion_string = "MINIMUM_NOISELESS_DESCRIPTION_LENGTH"; print("Sample: ", path, "\tExecution time: ", ticks, "Number of clusters: ", len(clusters), criterion_string, "\n"); draw_clusters(sample, clusters);
def template_clustering(path_sample, eps, minpts): sample = read_sample(path_sample) optics_instance = optics(sample, eps, minpts) optics_instance.process() clusters = optics_instance.get_clusters() noise = optics_instance.get_noise() draw_clusters(sample, clusters, [], ".") ordering = optics_instance.get_cluster_ordering() indexes = [i for i in range(0, len(ordering))] # visualization of cluster ordering in line with reachability distance. plt.bar(indexes, ordering) plt.show()
def template_clustering(path_sample, eps, minpts): sample = read_sample(path_sample) optics_instance = optics(sample, eps, minpts) optics_instance.process() clusters = optics_instance.get_clusters() noise = optics_instance.get_noise() draw_clusters(sample, clusters, [], '.') ordering = optics_instance.get_cluster_ordering() indexes = [i for i in range(0, len(ordering))] # visualization of cluster ordering in line with reachability distance. plt.bar(indexes, ordering) plt.show()
def template_clustering(file, radius, order, show_dyn = False, show_conn = False, show_clusters = True, ena_conn_weight = False, ccore_flag = True, tolerance = 0.1): sample = read_sample(file); network = syncnet(sample, radius, enable_conn_weight = ena_conn_weight, ccore = ccore_flag); (ticks, analyser) = timedcall(network.process, order, solve_type.FAST, show_dyn); print("Sample: ", file, "\t\tExecution time: ", ticks, "\n"); if (show_dyn == True): sync_visualizer.show_output_dynamic(analyser); sync_visualizer.animate_output_dynamic(analyser); if ( (show_conn == True) and (ccore_flag == False) ): network.show_network(); if (show_clusters == True): clusters = analyser.allocate_clusters(tolerance); draw_clusters(sample, clusters);
def template_clustering(number_clusters, path, number_represent_points=5, compression=0.5, draw=True, ccore_flag=False): sample = read_sample(path) cure_instance = cure(sample, number_clusters, number_represent_points, compression, ccore_flag) (ticks, result) = timedcall(cure_instance.process) clusters = cure_instance.get_clusters() print("Sample: ", path, "\t\tExecution time: ", ticks, "\n") if (draw is True): if (ccore_flag is True): draw_clusters(sample, clusters) else: draw_clusters(None, clusters)
def templateDrawClustersNoFailure(self, data_path, amount_clusters): sample = read_sample(data_path); initial_centers = kmeans_plusplus_initializer(sample, amount_clusters).initialize(); kmeans_instance = kmeans(sample, initial_centers, amount_clusters); kmeans_instance.process(); clusters = kmeans_instance.get_clusters(); ax = draw_clusters(sample, clusters); assert None != ax;
def templateClusterAllocation(self, path, cluster_sizes, number_clusters, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.1, type_measurement = measurement_type.CENTROID_EUCLIDIAN_DISTANCE, entry_size_limit = 200, ccore = True): sample = read_sample(path); cure_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, ccore); cure_instance.process(); clusters = cure_instance.get_clusters(); obtained_cluster_sizes = [len(cluster) for cluster in clusters]; total_length = sum(obtained_cluster_sizes); if (total_length != len(sample)): draw_clusters(sample, clusters); assert total_length == len(sample); cluster_sizes.sort(); obtained_cluster_sizes.sort(); if (cluster_sizes != obtained_cluster_sizes): draw_clusters(sample, clusters); assert cluster_sizes == obtained_cluster_sizes;
def clustering(genomes): keys = set() for gid, g in genomes: for key in g.info.keys(): keys.add(key) keys = sorted(list(keys)) keys_to_i = {keys[i]: i for i in range(len(keys))} ng = len(genomes) na = len(keys) props = np.zeros((ng, na)) for i in range(len(genomes)): gid, g = genomes[i] for key, value in g.info.items(): props[i][keys_to_i[key]] = random.random() props = scipy.stats.zscore(props) init_center = kmeans_plusplus_initializer(props, 2).initialize() xm = xmeans(props, init_center, ccore=False) xm.process() clusters = xm.get_clusters() draw_clusters(props, clusters)
def template_clustering(file, radius, order, show_dyn = False, show_conn = False, show_clusters = True, ena_conn_weight = False, ccore_flag = True, tolerance = 0.1): sample = read_sample(file) syncnet_instance = syncnet(sample, radius, enable_conn_weight = ena_conn_weight, ccore = ccore_flag) (ticks, analyser) = timedcall(syncnet_instance.process, order, solve_type.FAST, show_dyn) print("Sample: ", file, "\t\tExecution time: ", ticks) if show_dyn == True: sync_visualizer.show_output_dynamic(analyser) sync_visualizer.animate(analyser) sync_visualizer.show_local_order_parameter(analyser, syncnet_instance) #sync_visualizer.animate_output_dynamic(analyser); #sync_visualizer.animate_correlation_matrix(analyser, colormap = 'hsv') if show_conn == True: syncnet_instance.show_network() if show_clusters == True: clusters = analyser.allocate_clusters(tolerance) print("Amount of allocated clusters: ", len(clusters)) draw_clusters(sample, clusters) print("----------------------------\n") return (sample, clusters)
def calculate_clusters_and_save_plot(data, plot_name, tolerance=0.025, kmax=20): centers = data[np.random.choice(data.shape[0], NUM_INIT_CLUSTERS, replace=False)] xmeans_instance = xmeans.xmeans( data, initial_centers=centers, tolerance=tolerance, criterion=xmeans.splitting_type.BAYESIAN_INFORMATION_CRITERION, kmax=kmax, ccore=False) xmeans_instance.process() clusters = xmeans_instance.get_clusters() centers = xmeans_instance.get_centers() plot = draw_clusters(unique_pixels, clusters) plot.get_figure().save_fig(plot_name, dpi=200) return clusters, centers
def display_two_dimensional_cluster_distances(path_sample, amount_clusters): distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance']; ajacency = [ [0] * amount_clusters for i in range(amount_clusters) ]; sample = utils.read_sample(path_sample); agglomerative_instance = agglomerative(sample, amount_clusters); agglomerative_instance.process(); obtained_clusters = agglomerative_instance.get_clusters(); stage = utils.draw_clusters(sample, obtained_clusters, display_result = False); for index_cluster in range(len(ajacency)): for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)): if ( (index_cluster == index_neighbor_cluster) or (ajacency[index_cluster][index_neighbor_cluster] is True) ): continue; ajacency[index_cluster][index_neighbor_cluster] = True; ajacency[index_neighbor_cluster][index_cluster] = True; cluster1 = obtained_clusters[index_cluster]; cluster2 = obtained_clusters[index_neighbor_cluster]; center_cluster1 = utils.centroid(sample, cluster1); center_cluster2 = utils.centroid(sample, cluster2); x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None; x_index_maximum, y_index_maximum = 1, 1; if (center_cluster2[0] > center_cluster1[0]): x_maximum = center_cluster2[0]; x_minimum = center_cluster1[0]; x_index_maximum = 1; else: x_maximum = center_cluster1[0]; x_minimum = center_cluster2[0]; x_index_maximum = -1; if (center_cluster2[1] > center_cluster1[1]): y_maximum = center_cluster2[1]; y_minimum = center_cluster1[1]; y_index_maximum = 1; else: y_maximum = center_cluster1[1]; y_minimum = center_cluster2[1]; y_index_maximum = -1; print("Cluster 1:", cluster1, ", center:", center_cluster1); print("Cluster 2:", cluster2, ", center:", center_cluster2); stage.annotate(s = '', xy = (center_cluster1[0], center_cluster1[1]), xytext = (center_cluster2[0], center_cluster2[1]), arrowprops = dict(arrowstyle = '<->')); for index_distance_type in range(len(distances)): distance = None; distance_type = distances[index_distance_type]; if (distance_type == 'euclidian'): distance = utils.euclidean_distance(center_cluster1, center_cluster2); elif (distance_type == 'manhattan'): distance = utils.manhattan_distance(center_cluster1, center_cluster2); elif (distance_type == 'avr-inter'): distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample); elif (distance_type == 'avr-intra'): distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample); elif (distance_type == 'variance'): distance = utils.variance_increase_distance(cluster1, cluster2, sample); print("\tCluster distance -", distance_type, ":", distance); x_multiplier = index_distance_type + 3; if (x_index_maximum < 0): x_multiplier = len(distances) - index_distance_type + 3; y_multiplier = index_distance_type + 3; if (y_index_maximum < 0): y_multiplier = len(distances) - index_distance_type + 3; x_text = x_multiplier * (x_maximum - x_minimum) / (len(distances) + 6) + x_minimum; y_text = y_multiplier * (y_maximum - y_minimum) / (len(distances) + 6) + y_minimum; #print(x_text, y_text, "\n"); stage.text(x_text, y_text, distance_type + " {:.3f}".format(distance), fontsize = 9, color='blue'); plt.show();
def display_two_dimensional_cluster_distances(path_sample, amount_clusters): distances = [ 'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance' ] ajacency = [[0] * amount_clusters for i in range(amount_clusters)] sample = utils.read_sample(path_sample) agglomerative_instance = agglomerative(sample, amount_clusters) agglomerative_instance.process() obtained_clusters = agglomerative_instance.get_clusters() stage = utils.draw_clusters(sample, obtained_clusters, display_result=False) for index_cluster in range(len(ajacency)): for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)): if ((index_cluster == index_neighbor_cluster) or (ajacency[index_cluster][index_neighbor_cluster] is True)): continue ajacency[index_cluster][index_neighbor_cluster] = True ajacency[index_neighbor_cluster][index_cluster] = True cluster1 = obtained_clusters[index_cluster] cluster2 = obtained_clusters[index_neighbor_cluster] center_cluster1 = utils.centroid(sample, cluster1) center_cluster2 = utils.centroid(sample, cluster2) x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None x_index_maximum, y_index_maximum = 1, 1 if (center_cluster2[0] > center_cluster1[0]): x_maximum = center_cluster2[0] x_minimum = center_cluster1[0] x_index_maximum = 1 else: x_maximum = center_cluster1[0] x_minimum = center_cluster2[0] x_index_maximum = -1 if (center_cluster2[1] > center_cluster1[1]): y_maximum = center_cluster2[1] y_minimum = center_cluster1[1] y_index_maximum = 1 else: y_maximum = center_cluster1[1] y_minimum = center_cluster2[1] y_index_maximum = -1 print("Cluster 1:", cluster1, ", center:", center_cluster1) print("Cluster 2:", cluster2, ", center:", center_cluster2) stage.annotate(s='', xy=(center_cluster1[0], center_cluster1[1]), xytext=(center_cluster2[0], center_cluster2[1]), arrowprops=dict(arrowstyle='<->')) for index_distance_type in range(len(distances)): distance = None distance_type = distances[index_distance_type] if (distance_type == 'euclidian'): distance = utils.euclidean_distance( center_cluster1, center_cluster2) elif (distance_type == 'manhattan'): distance = utils.manhattan_distance( center_cluster1, center_cluster2) elif (distance_type == 'avr-inter'): distance = utils.average_inter_cluster_distance( cluster1, cluster2, sample) elif (distance_type == 'avr-intra'): distance = utils.average_intra_cluster_distance( cluster1, cluster2, sample) elif (distance_type == 'variance'): distance = utils.variance_increase_distance( cluster1, cluster2, sample) print("\tCluster distance -", distance_type, ":", distance) x_multiplier = index_distance_type + 3 if (x_index_maximum < 0): x_multiplier = len(distances) - index_distance_type + 3 y_multiplier = index_distance_type + 3 if (y_index_maximum < 0): y_multiplier = len(distances) - index_distance_type + 3 x_text = x_multiplier * (x_maximum - x_minimum) / ( len(distances) + 6) + x_minimum y_text = y_multiplier * (y_maximum - y_minimum) / ( len(distances) + 6) + y_minimum #print(x_text, y_text, "\n"); stage.text(x_text, y_text, distance_type + " {:.3f}".format(distance), fontsize=9, color='blue') plt.show()