def __init__(self, clusters: ClusterResults, rates: List[float]) -> None: self._results = [[] for _ in range(clusters.k())] for clusterId, rate, path in zip(clusters.labels(), rates, clusters.get_all_urls()): self._results[clusterId].append({ 'path': path, 'rating': rate, 'cluster': clusterId, }) for cluster in self._results: cluster.sort(key=lambda x: x['rating'], reverse=True)
def run( self, images: List[Url], nfeatures: int = 0, nOctaveLayers: int = 3, contrastThreshold: float = 0.04, edgeThreshold: float = 10, sigma: float = 1.6, ratio: float = 0.8, similarity_metric: SimilarityMetric = SimilarityMetric. INVERSE_DISTANCE, damping: float = 0.5, max_iter: int = 200, convergence_iter: int = 15, affinity: AffinityPropagationAffinity = AffinityPropagationAffinity. EUCLIDEAN, descriptor_matcher: DescriptorMatcher = DescriptorMatcher.FLANNBASED, hue_bins: int = 180, saturation_bins: int = 256, value_bins: int = 256, bandwidth: Optional[float] = None, ) -> ClusterResults: results1 = SiftCluster2().run_cached( images, nfeatures=nfeatures, nOctaveLayers=nOctaveLayers, contrastThreshold=contrastThreshold, edgeThreshold=edgeThreshold, sigma=sigma, ratio=ratio, similarity_metric=similarity_metric, damping=damping, max_iter=max_iter, convergence_iter=convergence_iter, affinity=affinity, descriptor_matcher=descriptor_matcher, ) results2 = HistogramCluster().run_cached( images, hue_bins=hue_bins, saturation_bins=saturation_bins, value_bins=value_bins, bandwidth=bandwidth, ) labels1 = results1.labels() labels2 = results2.labels() k1 = results1.k() cluster = [ self.combine(c1, c2, k1) for c1, c2 in zip(labels1, labels2) ] self.remove_empty_clusters(cluster) return ClusterResults(images, cluster)
def run(self, images: List[Url]) -> ClusterResults: """ Clusters images. """ results1 = self._sift.run(images) results2 = self._hist.run(images) labels1 = results1.labels() labels2 = results2.labels() k1 = results1.k() cluster = [ self.combine(c1, c2, k1) for c1, c2 in zip(labels1, labels2) ] self.remove_empty_clusters(cluster) return ClusterResults(images, cluster)
def run(self, images: List[Url]) -> ClusterResults: """ Clusters images. """ results1 = self._hist.run(images) cluster = [-1] * len(images) for label1, urls1 in enumerate(results1.urls()): results2 = self._sift.run(urls1) for label2, urls2 in enumerate(results2.urls()): label3 = self.combine(label1, label2, results1.k()) for url2 in urls2: i = images.index(url2) cluster[i] = label3 self.remove_empty_clusters(cluster) return ClusterResults(images, cluster)
def run(self, images: List[Url]) -> ClusterResults: """ Creates descriptors of images. Groups images together by how similar their descriptors are. Returns a cluster ID for each set of descriptors. """ print("Creating descriptors from images....") sds = SiftDescriptorSet(images) print('Normalizing descriptors to unit vectors....') sds.unit_normalize() print('Similarity matrix....') sm = SimilarityMatrix(sds.descriptors, self._similarity) print('Scaling each row of the similarity matrix....') sm.scale() print('Clustering by affinity propagation....') cluster = AffinityPropagation(random_state=0).fit_predict( sm.matrix).tolist() return ClusterResults(images, cluster)
def run( self, images: List[Url], hue_bins: int = 180, saturation_bins: int = 256, value_bins: int = 256, bandwidth: Optional[float] = None, ) -> ClusterResults: """ Clusters images. """ c = list() for i, img in enumerate(images): print("HISTOGRAM: %i / %i" % (i, len(images))) hh = HsvHistogram(img) histogram = HsvHistogram.scale(hh.hsv(hue_bins, saturation_bins, value_bins), hh.size()) c.append(histogram) d = vstack(c) print('CLUSTER: Mean Shift') cluster = MeanShift(bandwidth=bandwidth).fit_predict(d).tolist() return ClusterResults(images, cluster)
def run( self, images: List[Url], nfeatures: int = 0, nOctaveLayers: int = 3, contrastThreshold: float = 0.04, edgeThreshold: float = 10, sigma: float = 1.6, ratio: float = 0.8, similarity_metric: SimilarityMetric = SimilarityMetric. INVERSE_DISTANCE, damping: float = 0.5, max_iter: int = 200, convergence_iter: int = 15, affinity: AffinityPropagationAffinity = AffinityPropagationAffinity. EUCLIDEAN, descriptor_matcher: DescriptorMatcher = DescriptorMatcher.FLANNBASED, ) -> ClusterResults: if not isinstance(similarity_metric, SimilarityMetric): similarity_metric = SimilarityMetric(similarity_metric) if not isinstance(affinity, AffinityPropagationAffinity): affinity = AffinityPropagationAffinity(affinity) if not isinstance(descriptor_matcher, DescriptorMatcher): descriptor_matcher = DescriptorMatcher(descriptor_matcher) list_of_images = list() matrix = SimilarityMatrix.empty_matrix(len(images)) for url in images: print("SIFT DESCRIPTORS: %s" % url) keypoints, descriptors = cv2.xfeatures2d.SIFT_create( nfeatures, nOctaveLayers, contrastThreshold, edgeThreshold, sigma, ).detectAndCompute(image=read_image(url), mask=None) list_of_images.append(descriptors) combo = list( itertools.combinations_with_replacement(range(len(list_of_images)), 2)) if descriptor_matcher == DescriptorMatcher.FLANNBASED: matcher = cv2.FlannBasedMatcher_create() else: matcher = cv2.BFMatcher_create() for idx, (i, j) in enumerate(combo): print("SIFT SIMILARITY: ( %i , %i ) %i / %i" % (i, j, idx, len(combo))) if i != j: matches = matcher.knnMatch(queryDescriptors=list_of_images[i], trainDescriptors=list_of_images[j], k=2) good = [] for m, n in matches: if m.distance < ratio * n.distance: good.append(m) if similarity_metric == SimilarityMetric.INVERSE_DISTANCE: inverse_distance = 0 for k in good: inverse_distance += 1 - k.distance if len(good) > 0: matrix[i][j] = inverse_distance / len(good) matrix[j][i] = inverse_distance / len(good) elif similarity_metric == SimilarityMetric.COUNT: matrix[i][j] = len(good) matrix[j][i] = len(good) else: if similarity_metric == SimilarityMetric.INVERSE_DISTANCE: matrix[i][i] = 1 elif similarity_metric == SimilarityMetric.COUNT: matrix[i][i] = nfeatures print('CLUSTER: AffinityPropagation') cluster = AffinityPropagation( damping=damping, max_iter=max_iter, convergence_iter=convergence_iter, affinity=affinity.value, random_state=0, ).fit_predict(matrix).tolist() return ClusterResults(images, cluster)