Пример #1
0
 def get_clusters_falconn(self):
     serializable_list = []
     vector_numpy_ndarray = np.array(self.vector_matrix)
     vector_numpy_ndarray /= np.linalg.norm(vector_numpy_ndarray).reshape(-1, 1)
     center = np.mean(vector_numpy_ndarray)
     vector_numpy_ndarray -= center
     falconn_params = falconn.get_default_parameters(len(self.vector_matrix), len(self.vector_matrix[0]))
     falconn_params.distance_function = "euclidean_squared"
     lsh_index = falconn.LSHIndex(falconn_params)
     lsh_index.setup(vector_numpy_ndarray)
     i = 0
     for vector in self.vector_matrix:
         cluster = lsh_index.find_near_neighbors(np.array(vector), self.similarity_threshold)
         cluster = cluster + (i,)
         i += 1
         if len(cluster) < 2:
             continue
         similarity_cluster = SimilarityCluster(self.similarity_threshold,
                                                self.vector_id_list[cluster[0]],
                                                self.vector_matrix[cluster[0]],
                                                self.start_time_ms,
                                                self.end_time_ms)
         for index in cluster:
             if index == cluster[0]:
                 continue
             similarity_cluster.similar_image_ids.append(self.vector_id_list[index])
             similarity_cluster.apply_vector_to_average(self.vector_matrix[index])
         serializable_list.append(similarity_cluster.to_serializable_object())
     return serializable_list
Пример #2
0
    def process_vector_custom(self,
                              vector_id,
                              post_id,
                              vector,
                              image_url=None):
        normalized_vector = np.linalg.norm(vector)
        if normalized_vector == 0:
            print "normalized vector returned 0, skipping."
            return
        match_id = self.process_cluster_set(self.similarity_clusters["high"],
                                            vector_id, post_id, vector,
                                            normalized_vector, image_url)
        if match_id is not None:
            self.organize_cluster(match_id, self.similarity_clusters["high"])
            return

        match_id = self.process_cluster_set(self.similarity_clusters["medium"],
                                            vector_id, post_id, vector,
                                            normalized_vector, image_url)
        if match_id is not None:
            self.organize_cluster(match_id, self.similarity_clusters["medium"])
            return

        match_id = self.process_cluster_set(self.similarity_clusters["low"],
                                            vector_id, post_id, vector,
                                            normalized_vector, image_url)
        if match_id is not None:
            self.organize_cluster(match_id, self.similarity_clusters["low"])
            return

        # found no matches, just add a new cluster to the low group
        new_cluster = SimilarityCluster(self.similarity_threshold, vector_id,
                                        post_id, vector, self.start_time_ms,
                                        self.end_time_ms, image_url)
        self.similarity_clusters["low"][new_cluster.id] = new_cluster
Пример #3
0
 def test_positive_similarity_state(self):
     cluster = SimilarityCluster(.9, 0, 0, [1, 1, 1], 0, 0)
     cluster.process_similarity(1, 1, [1, 1, 1], np.linalg.norm([1, 1, 1]))
     self.assertTrue(cluster.valid_cluster)
     self.assertTrue(len(cluster.similar_ids) == 2)
Пример #4
0
 def test_empty_starting_vector(self):
     cluster = SimilarityCluster(.9, 0, 0, [], 0, 0)
     self.assertFalse(cluster.valid_cluster)
Пример #5
0
 def test_positive_similarity_state(self):
     cluster = SimilarityCluster(.9, 0, 0, [1, 1, 1], 0, 0)
     cluster.process_similarity(1, 1, [1, 1, 1], np.linalg.norm([1, 1, 1]))
     self.assertTrue(cluster.valid_cluster)
     self.assertTrue(len(cluster.similar_ids) == 2)