def DBscan_step_intuition_dist_multistep_1(step, old_labels, repository, min_sample=2, eps=85, nb_step=5, phi_=300, alpha_=1, gamma_=0.998): """ DBSCAN algorithm using euclidian metric on data = alpha * positions[i] + phi * velocities[i] for i = t-n, ..., t """ global phi, alpha, gamma phi = phi_ alpha = alpha_ gamma = gamma_ positions, velocities, headings = \ get_positions_velocity_headings(repository, step - nb_step) train_data = np.concatenate((alpha * positions, phi * velocities), axis=1) for k, i in zip(range(1, nb_step - 1, 1), range(step - nb_step + 1, step, 1)): positions, velocities, headings = \ get_positions_velocity_headings(repository, i) train_data = np.concatenate((train_data, gamma**k * np.concatenate( (alpha * positions, phi * velocities), axis=1)), axis=1) start = time.time() db = DBSCAN(eps=eps, min_samples=min_sample).fit(train_data) end = time.time() print("clustering done in: {0} seconds".format(end - start)) labels = db.labels_ + 1 # for getting rid of -1 labels if old_labels is not None: labels = merge_labels(old_labels, labels) stock_labels(labels, step, repository=repository, filename="DBSCAN_intuition_distmultisteps_phi=" + str(phi) + "_alpha=" + str(alpha) + "gamma=" + str(gamma) + "nb_step=" + str(nb_step) + "_label") return labels
def DBscan_step_positions_and_velocity(step, old_labels, repository, alpha=1, beta=27, eps=85, min_sample=2): """ DBSCAN algorithm on positions + beta * velocities """ positions, velocities, headings = \ get_positions_velocity_headings(repository, step) train_data = np.concatenate((alpha * positions, beta * velocities), axis=1) start = time.time() db = DBSCAN(eps=eps, min_samples=min_sample).fit(train_data) end = time.time() print("clustering done in: {0} seconds".format(end - start)) labels = db.labels_ + 1 # for getting rid of -1 labels if old_labels is not None: labels = merge_labels(old_labels, labels) stock_labels(labels, step, repository=repository, filename="DBSCAN_position|velocity_eps=" + str(eps) + "min_sample=" + str(min_sample) + "alpha=" + str(alpha) + "beta=" + str(beta) + "label") return labels
def graph_step(step, repository="simulation_data/"): positions, velocities, headings = \ get_positions_velocity_headings(repository, step) graph = build_graph(positions, velocities, headings) if len(graph.edges) > 0: color_dict = dict() list_connected_comp, list_renaming = \ connected_components_graph(graph) for subgraph, renaming in zip(list_connected_comp, list_renaming): inv_ren = {v: k for k, v in renaming.items()} # if there is one edge or more if len(subgraph.edges) > 0: membership = label_prop(subgraph) color_list = membership_to_colorlist(membership) for i in range(len(membership)): color_dict[inv_ren[i]] = color_list[i] # else, imply one node alone else: color_dict[inv_ren[0]] = TRIANGLES_COLORS[0] color_dict = collections.OrderedDict(sorted(color_dict.items())) list_color = list(color_dict.values()) return list_color
def DBscan_step_intuition_dist_multistep(step, old_labels, repository, min_sample=2, eps=85, nb_step=3, gamma=0.5): """ DBSCAN algorithm on positions + beta * velocities """ global phi, alpha precomputed_ = True train_data = None for i in range(step, step + nb_step, 1): positions, velocities, headings = \ get_positions_velocity_headings(repository, step) if train_data is not None: train_data = np.concatenate( (train_data, np.concatenate((positions, velocities), axis=1)), axis=1) else: train_data = np.concatenate((positions, velocities), axis=1) start = time.time() if precomputed_: train_data = linear_comb_dist12_multistep_precomputed(train_data) db = DBSCAN(eps=eps, min_samples=min_sample, metric='precomputed').fit(train_data) else: db = DBSCAN(eps=eps, min_samples=min_sample, metric=linear_comb_dist12_multiplestep).fit(train_data) end = time.time() print("clustering done in: {0} seconds".format(end - start)) labels = db.labels_ + 1 # for getting rid of -1 labels if old_labels is not None: labels = merge_labels(old_labels, labels) stock_labels(labels, step, repository=repository, filename="DBSCAN_intuition_distmultisteps_phi=" + str(phi) + "_alpha=" + str(alpha) + "gamma=" + str(gamma) + "_label") return labels
def animate_label_prop(self, time): self.boids.positions, self.boids.velocities, self.boids.headings = \ get_positions_velocity_headings(self.repository, self.step) list_color = graph_step(self.step, repository=self.repository) self.step += 1 self.triangles.set_colors(list_color) self.triangles.update_triangles(self.boids.headings, self.boids.positions)
def animate_rerun_step(self, time): """ Standard rerun: we take data of self.repository with prefix self.filename and replay it, we update the step and the triangles """ self.boids.positions, self.boids.velocities, self.boids.headings = \ get_positions_velocity_headings(self.repository, self.step) self.step += 1 self.triangles.update_triangles(self.boids.headings, self.boids.positions)
def build_ground_truth(step, old_labels, repository, list_nb_boids, beta=23, eps=75, min_sample=2): """ build ground truth with DBscan on positions """ positions, velocities, headings = \ get_positions_velocity_headings(repository, step) labels = np.zeros(positions.shape[0], dtype=int) sum_boids = 0 for nb_boids in list_nb_boids: indices = np.arange(sum_boids, sum_boids + nb_boids) sum_boids = sum_boids + nb_boids """ train_data = np.concatenate((positions[indices], beta * velocities[indices]), axis=1) """ train_data = positions[indices] db = DBSCAN(eps=eps, min_samples=min_sample).fit(train_data) for ind_0, ind in zip(np.arange(0, nb_boids), indices): # we keep the zeros, we apply + 50 to the other labels labels[ind] = np.where(db.labels_[ind_0] > -1, db.labels_[ind_0] + sum_boids, 0) # for getting rid of -1 labels # we apply db.labels + sum_boids to differentiate clusters # from different species if old_labels is not None: labels[indices] = merge_labels(old_labels[indices], labels[indices]) stock_labels(labels, step, repository=repository, filename="ground_truth_label") return labels
def animate_DBscan_positions_and_velocities(self, time): self.boids.positions, self.boids.velocities, self.boids.headings = \ get_positions_velocity_headings(self.repository, self.step) self.old_labels = DBscan_step_positions_and_velocity(self.step, self.old_labels, self.repository) color_list = labels_to_colorlist(self.old_labels) self.triangles.set_colors(color_list) self.triangles.update_triangles(self.boids.headings, self.boids.positions) self.step += 1
def animate_labels_the_data(self, time): self.boids.positions, self.boids.velocities, self.boids.headings = \ get_positions_velocity_headings(self.repository, self.step) self.old_labels = build_ground_truth(self.step, self.old_labels, self.repository, self.list_num_boids) color_list = labels_to_colorlist(self.old_labels) self.triangles.set_colors(color_list) self.triangles.update_triangles(self.boids.headings, self.boids.positions) self.step += 1
def animate_rerun_DBSCAN_intuition_metric(self, time, phi=100, alpha=1.2): """ Rerun DBSCAN algorithm with clusters results from DBSCAN with "intuition metrics" """ self.boids.positions, self.boids.velocities, self.boids.headings = \ get_positions_velocity_headings(self.repository, self.step) labels = np.loadtxt("data/" + self.path + "DBSCAN_intuition_dist_phi=" + str(phi) + "_alpha=" + str(alpha) + "_label" + str(self.step)) self.step += 1 colors = labels_to_colorlist(np.array(labels, dtype=int)) self.triangles.set_colors(colors) self.triangles.update_triangles(self.boids.headings, self.boids.positions)
def animate_DBscan_intuition_metric_multistep(self, time, steps=3): for i in range(self.step, self.step + steps): self.boids.positions, self.boids.velocities, self.boids.headings = \ get_positions_velocity_headings(self.repository, self.step) self.old_labels = DBscan_step_intuition_dist_multistep(self.step, self.old_labels, self.repository, nb_step=steps) color_list = labels_to_colorlist(self.old_labels) self.triangles.set_colors(color_list) self.triangles.update_triangles(self.boids.headings, self.boids.positions) self.step += 1
def DBscan_step_intuition_dist(step, old_labels, repository, min_sample=2, eps=85, phi_=10, alpha_=1): """ DBcsan algorithm on positions + beta * velocities """ global phi, alpha phi = phi_ alpha = alpha_ positions, velocities, headings = \ get_positions_velocity_headings(repository, step) train_data = np.concatenate((positions, velocities), axis=1) start = time.time() db = DBSCAN(eps=eps, min_samples=min_sample, metric=linear_comb_dist12).fit(train_data) end = time.time() print("clustering done in: {0} seconds".format(end - start)) labels = db.labels_ + 1 # for getting rid of -1 labels if old_labels is not None: labels = merge_labels(old_labels, labels) stock_labels(labels, step, repository=repository, filename="DBSCAN_intuition_dist_phi=" + str(phi) + "_alpha=" + str(alpha) + "_label") return labels