Exemplo n.º 1
0
 def setUp(self):
     Cluster.clusterIdCounter = 0
     self.docx = Document(1, {1: 2, 2: 4})
     self.docy = Document(2, {2: 4})
     self.cluster1 = Cluster(self.docx)
     self.cluster2 = Cluster(self.docy)
     self.doc1 = Document(3, Vector({3: 4}))
     self.doc2 = Document(4, Vector({2: 4}))
Exemplo n.º 2
0
def generate_cluster_objects(scooter_data: pd.DataFrame,
                             cluster_labels: list) -> [Cluster]:
    """
    Based on cluster labels and scooter data create Scooter and Cluster objects.
    Cluster class generates cluster center
    :param scooter_data: geospatial data for scooters
    :param cluster_labels: list of labels for scooter data
    :return: list of clusters
    """
    # Add cluster labels as a row to the scooter data dataframe
    scooter_data_w_labels = scooter_data.copy()
    scooter_data_w_labels["cluster_labels"] = cluster_labels
    # Generate series of scooters belonging to each cluster
    clusters = []
    for cluster_label in np.unique(cluster_labels):
        # Filter out scooters within cluster
        cluster_scooters = scooter_data_w_labels[
            scooter_data_w_labels["cluster_labels"] == cluster_label]
        # Generate scooter objets, using index as ID
        scooters = [
            Scooter(row["lat"], row["lon"], row["battery"], index)
            for index, row in cluster_scooters.iterrows()
        ]
        # Adding all scooters to cluster to find center location
        clusters.append(Cluster(cluster_label, scooters))
    return sorted(clusters, key=lambda cluster: cluster.id)
def inspect_cluster_size(clustered_path: str, sense_inventory_path: str,
                         fig_path: str):
    from classes import Cluster
    # cluster_dict_original: Dict[str, List[Cluster]] = pickle.load(open(sense_inventory_path, "rb"))
    # clusters_count_original: int = sum([len(y.cluster_items) for x in cluster_dict_original.values() for y in x])
    cluster_dict_raw: Dict[str, List[Cluster]] = pickle.load(
        open(clustered_path, "rb"))
    largest_cluster: Cluster = Cluster([])
    for lemma in tqdm(cluster_dict_raw):
        for cluster in cluster_dict_raw[lemma]:
            if len(cluster.cluster_items) > len(largest_cluster.cluster_items):
                largest_cluster = cluster
    print(
        f"Max cluster size for {largest_cluster.get_dominant_lemma()}: {len(largest_cluster.cluster_items)}"
    )
    merged_clusters: List[Cluster] = [
        y for x in cluster_dict_raw.values() for y in x
        if len(y.cluster_items) > 1
    ]
    merged_clusters.sort(key=lambda x: len(x.cluster_items), reverse=True)
    merged_cluster_items_count: int = sum(
        [len(x.cluster_items) for x in merged_clusters])
    x_data: List[str] = list(range(len(merged_clusters) - 1))
    y_data: List[int] = [len(x.cluster_items) for x in merged_clusters[1:]]
    pyplot.scatter(x_data, y_data)
    pyplot.xlabel("Cluster ID")
    pyplot.ylabel("Number of contained usage contexts")
    pyplot.title("Distribution of cluster size")
    pyplot.savefig(fig_path, dpi=600)
    pyplot.show()
Exemplo n.º 4
0
def json_process(json_out):
    data = json_out["frame"]
    Xs = [i['x'] for i in data]
    Ys = [i['y'] for i in data]
    cluster_in = np.column_stack((Xs, Ys))
    cluster = Cluster(cluster_in, eps=0.35, min_samples=3)
    cluster.plot(fig=plt)
    plt.xlim(-10, 10)
    plt.ylim(-0.9, 18)
    plt.pause(0.00000001)
    plt.clf()
Exemplo n.º 5
0
def process_data(data):

    Xs = [i['x'] for i in data]
    Ys = [i['y'] for i in data]
    cluster_in = np.column_stack((Xs, Ys))
    cluster = Cluster(cluster_in, eps=0.35, min_samples=3)
    cluster.plot(fig=plt)
    plt.xlim(-10, 10)
    plt.ylim(-0.9, 18)
    plt.pause(0.00000001)
    plt.clf()
Exemplo n.º 6
0
 def getClusterAndUpdateExistingClusters(self, document):
     predictedCluster = self.getClusterForDocument(document)
     if predictedCluster != None:
         self.clusters[predictedCluster].addDocument(document)
     else:
         newCluster = Cluster(document)
         newCluster.setSignatureUsingVectorPermutations(
             self.unitVector, self.vectorPermutations,
             self.phraseTextAndDimensionMap)
         for permutation in self.signaturePermutations:
             permutation.addDocument(newCluster)
         self.clusters[newCluster.clusterId] = newCluster
Exemplo n.º 7
0
def json_process(json_out):
    data = json_out["frame"]
    Xs = [i['x'] for i in data]
    Ys = [i['y'] for i in data]
    cluster_in = np.column_stack((Xs, Ys))
    cluster = Cluster(cluster_in, eps=0.35, min_samples=3)
    cluster.plot(fig=plt)
    # plt.scatter(Xs, Ys)
    plt.xlim(-10, 10)
    plt.ylim(-0.9, 18)
    # plt.set_xbound(lower=xmin, upper=xmax)
    # plt.set_ybound(lower=ymin, upper=ymax)
    plt.pause(0.00000001)
    plt.clf()
Exemplo n.º 8
0
# $Id$
#
# pylint: disable-msg=E1101,W0612,W0142
#
"""superclass for all content-objects
"""

__version__ = "$Id$"

# phython imports
from classes import Cluster, Node, Ressource
import MenuSystem as menusystem
import hb_mini

glbmanager = hb_mini.miniManager()
cluster1 = Cluster("172.16.10.172", "hacluster", "ddd", "172.16.10.172", glbmanager)
if(glbmanager.login(cluster1.ip, cluster1.user, cluster1.passwd) == True):
    print "Connected succesfully"
else:
    print "Connection failure"
    exit()
    

def printNodeNames(data):
    print "\n"
    for node in cluster1.getNodes():
        print "Node: %s" % node.name

def printActiveNodes(data):
    print "\n"
    for node in cluster1.getActiveNodes():