Пример #1
0
    def __init__(self, time, categories, dimensions, values, data):
        try:
            global conn
            conn = psycopg2.connect(dbname='postgres',
                                    user='******',
                                    host='localhost',
                                    password='******')
        except psycopg2.DatabaseError as ex:
            print(ex)
            sys.exit(1)

        try:
            global engine
            engine = create_engine(
                'postgresql://*****:*****@localhost:5432/postgres',
                echo=True)
        except Exception as ex:
            print(ex)
            sys.exit(1)

        self.cursor = conn.cursor()
        self.data = data
        self.categories = categories

        #for testing
        # self.values = values
        # self.dimensions = dimensions+time
        #for testing

        #org begin
        global conn
        reduced_dimensions, reduced_values = Clustering.Cluster(
            dimensions, values, self.data, conn)
        print(reduced_dimensions)
        print(reduced_values)

        self.dimensions = reduced_dimensions + time
        self.values = reduced_values
        #org end

        PatternStore.create_table_object(self.data)
        self.formDatacube()
Пример #2
0
def kmeans(examples: list, numClusters: int, verbose: bool = False) -> list:
    """
    Generate numClusters Clusters using greedy algorithm.

    Args
    ----
        examples (list): List of sample points
        numClusters (int): Number of clusters to generate
        verbose (bool, optional): Print statements. Defaults to False.

    Returns
    -------
        list: Current set of defined clusters.

    Algorithm
    ---------
        Randomly choose numExamples examples as initial centroids
        while true
          - Assign each member of passed in examples to the cluster whose
            centroid is closest
          - Compute updated clusters centroid with the addition of new members
          - If new cluster centroids are not different then they were in the
            previous iteration of the while loop
            - return the current set of clusters

    """
    # Generate numClusters centroids and add them to separate cluster list
    # elements
    initialCentroids = random.sample(examples, numClusters)
    clusters = []
    for centroid in initialCentroids:
        clusters.append(Clustering.Cluster([centroid]))

    #
    # Iterate until centroids do not change (converge)
    #
    converged = False
    numIterations = 0
    while not converged:
        numIterations += 1

        # Create a list containing numClusters distinct empty lists
        newClusters = []
        for i in range(numClusters):
            newClusters.append([])
        #
        # Associate each example with the cluster with the closest centroid
        #
        for example in examples:
            # Find the centroid among the centroids of the cluster list
            # that is closest to the example starting with first centroid
            # group and compare other centroids
            currentSmallestDistance = \
              example.distance(clusters[0].getCentroid())
            index = 0
            for i in range(1, numClusters):
                # Comment out one of the two folllowing lines
                # if power paramter is 1, calculate Manhattan distance
                # if power paramter is 2, calculate Euclidean distance
                # power = 1
                power = 2
                distance = example.distance(clusters[i].getCentroid(), power)
                if distance < currentSmallestDistance:
                    currentSmallestDistance = distance
                    index = i
            # Add example element to the cluster list of examples, where the
            # distance between the element and the cluster list's 
            # centroid is minimum.
            newClusters[index].append(example)

        for cluster in newClusters:  # Check to see if any empty clusters
            if len(cluster) == 0:
                raise ValueError('Empty cluster')

        # Update each cluster; determine if cluster centroid has changed
        converged = True
        for i in range(numClusters):
            if clusters[i].update(newClusters[i]) > 0.0:
                converged = False

        if verbose:
            print('Iteration num:', numIterations)
            for cluster in clusters:
                print(cluster)
            print("\n")
    return clusters