def setUp(self): Cluster.clusterIdCounter = 0 self.docx = Document(1, {1: 2, 2: 4}) self.docy = Document(2, {2: 4}) self.cluster1 = Cluster(self.docx) self.cluster2 = Cluster(self.docy) self.doc1 = Document(3, Vector({3: 4})) self.doc2 = Document(4, Vector({2: 4}))
def generate_cluster_objects(scooter_data: pd.DataFrame, cluster_labels: list) -> [Cluster]: """ Based on cluster labels and scooter data create Scooter and Cluster objects. Cluster class generates cluster center :param scooter_data: geospatial data for scooters :param cluster_labels: list of labels for scooter data :return: list of clusters """ # Add cluster labels as a row to the scooter data dataframe scooter_data_w_labels = scooter_data.copy() scooter_data_w_labels["cluster_labels"] = cluster_labels # Generate series of scooters belonging to each cluster clusters = [] for cluster_label in np.unique(cluster_labels): # Filter out scooters within cluster cluster_scooters = scooter_data_w_labels[ scooter_data_w_labels["cluster_labels"] == cluster_label] # Generate scooter objets, using index as ID scooters = [ Scooter(row["lat"], row["lon"], row["battery"], index) for index, row in cluster_scooters.iterrows() ] # Adding all scooters to cluster to find center location clusters.append(Cluster(cluster_label, scooters)) return sorted(clusters, key=lambda cluster: cluster.id)
def inspect_cluster_size(clustered_path: str, sense_inventory_path: str, fig_path: str): from classes import Cluster # cluster_dict_original: Dict[str, List[Cluster]] = pickle.load(open(sense_inventory_path, "rb")) # clusters_count_original: int = sum([len(y.cluster_items) for x in cluster_dict_original.values() for y in x]) cluster_dict_raw: Dict[str, List[Cluster]] = pickle.load( open(clustered_path, "rb")) largest_cluster: Cluster = Cluster([]) for lemma in tqdm(cluster_dict_raw): for cluster in cluster_dict_raw[lemma]: if len(cluster.cluster_items) > len(largest_cluster.cluster_items): largest_cluster = cluster print( f"Max cluster size for {largest_cluster.get_dominant_lemma()}: {len(largest_cluster.cluster_items)}" ) merged_clusters: List[Cluster] = [ y for x in cluster_dict_raw.values() for y in x if len(y.cluster_items) > 1 ] merged_clusters.sort(key=lambda x: len(x.cluster_items), reverse=True) merged_cluster_items_count: int = sum( [len(x.cluster_items) for x in merged_clusters]) x_data: List[str] = list(range(len(merged_clusters) - 1)) y_data: List[int] = [len(x.cluster_items) for x in merged_clusters[1:]] pyplot.scatter(x_data, y_data) pyplot.xlabel("Cluster ID") pyplot.ylabel("Number of contained usage contexts") pyplot.title("Distribution of cluster size") pyplot.savefig(fig_path, dpi=600) pyplot.show()
def json_process(json_out): data = json_out["frame"] Xs = [i['x'] for i in data] Ys = [i['y'] for i in data] cluster_in = np.column_stack((Xs, Ys)) cluster = Cluster(cluster_in, eps=0.35, min_samples=3) cluster.plot(fig=plt) plt.xlim(-10, 10) plt.ylim(-0.9, 18) plt.pause(0.00000001) plt.clf()
def process_data(data): Xs = [i['x'] for i in data] Ys = [i['y'] for i in data] cluster_in = np.column_stack((Xs, Ys)) cluster = Cluster(cluster_in, eps=0.35, min_samples=3) cluster.plot(fig=plt) plt.xlim(-10, 10) plt.ylim(-0.9, 18) plt.pause(0.00000001) plt.clf()
def getClusterAndUpdateExistingClusters(self, document): predictedCluster = self.getClusterForDocument(document) if predictedCluster != None: self.clusters[predictedCluster].addDocument(document) else: newCluster = Cluster(document) newCluster.setSignatureUsingVectorPermutations( self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) for permutation in self.signaturePermutations: permutation.addDocument(newCluster) self.clusters[newCluster.clusterId] = newCluster
def json_process(json_out): data = json_out["frame"] Xs = [i['x'] for i in data] Ys = [i['y'] for i in data] cluster_in = np.column_stack((Xs, Ys)) cluster = Cluster(cluster_in, eps=0.35, min_samples=3) cluster.plot(fig=plt) # plt.scatter(Xs, Ys) plt.xlim(-10, 10) plt.ylim(-0.9, 18) # plt.set_xbound(lower=xmin, upper=xmax) # plt.set_ybound(lower=ymin, upper=ymax) plt.pause(0.00000001) plt.clf()
# $Id$ # # pylint: disable-msg=E1101,W0612,W0142 # """superclass for all content-objects """ __version__ = "$Id$" # phython imports from classes import Cluster, Node, Ressource import MenuSystem as menusystem import hb_mini glbmanager = hb_mini.miniManager() cluster1 = Cluster("172.16.10.172", "hacluster", "ddd", "172.16.10.172", glbmanager) if(glbmanager.login(cluster1.ip, cluster1.user, cluster1.passwd) == True): print "Connected succesfully" else: print "Connection failure" exit() def printNodeNames(data): print "\n" for node in cluster1.getNodes(): print "Node: %s" % node.name def printActiveNodes(data): print "\n" for node in cluster1.getActiveNodes():