def test_iris(self): rca = RCA(dim=2) chunks = RCA.prepare_constraints(self.iris_labels, num_chunks=30, chunk_size=2, seed=1234) rca.fit(self.iris_points, chunks) csep = class_separation(rca.transform(), self.iris_labels) self.assertLess(csep, 0.25)
class RCA: def __init__(self): self.metric_model = RCA_ml() self.X_tr = None self.y_train = None self.X_te = None def fit(self, X_tr, y_train): """Fits the model to the prescribed data.""" self.X_tr = X_tr self.y_train = y_train return self.metric_model.fit(X_tr, y_train) def transform(self, X): """Transforms the test data according to the model""" return self.metric_model.transform(X) def predict_proba(self, X_te): """Predicts the probabilities of each of the test samples""" test_samples = X_te.shape[0] self.X_tr = self.transform(self.X_tr) clf = NearestCentroid() clf.fit(self.X_tr, self.y_train) centroids = clf.centroids_ probabilities = np.zeros((test_samples, centroids.shape[0])) for sample in xrange(test_samples): probabilities[sample] = sk_nearest_neighbour_proba( centroids, X_te[sample, :]) return probabilities
def test_rank_deficient_returns_warning(self): """Checks that if the covariance matrix is not invertible, we raise a warning message advising to use PCA""" X, y = load_iris(return_X_y=True) # we make the fourth column a linear combination of the two first, # so that the covariance matrix will not be invertible: X[:, 3] = X[:, 0] + 3 * X[:, 1] rca = RCA() msg = ('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' 'You should remove any linearly dependent features and/or ' 'reduce the dimensionality of your input, ' 'for instance using `sklearn.decomposition.PCA` as a ' 'preprocessing step.') with pytest.warns(None) as raised_warnings: rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings)
def fit(self, X, y=None, ml=[], cl=[]): X_transformed = X if ml: chunks = np.full(X.shape[0], -1) ml_graph, cl_graph, neighborhoods = preprocess_constraints( ml, cl, X.shape[0]) for i, neighborhood in enumerate(neighborhoods): chunks[neighborhood] = i # print(chunks) rca = RCA() rca.fit(X, chunks=chunks) X_transformed = rca.transform(X) # print(rca.metric()) kmeans = KMeans(n_clusters=self.n_clusters, max_iter=self.max_iter) kmeans.fit(X_transformed) self.labels_ = kmeans.labels_ return self
ids_quadruplets_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in quadruplets_learners])) pairs_learners = [ (ITML(), build_pairs), (MMC(max_iter=2), build_pairs), # max_iter=2 for faster (SDML(), build_pairs), ] ids_pairs_learners = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in pairs_learners])) classifiers = [(Covariance(), build_classification), (LFDA(), build_classification), (LMNN(), build_classification), (NCA(), build_classification), (RCA(), build_classification), (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), (RCA_Supervised(num_chunks=10), build_classification), (SDML_Supervised(), build_classification)] ids_classifiers = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) regressors = [(MLKR(), build_regression)] ids_regressors = list( map(lambda x: x.__class__.__name__, [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin)
def __init__(self): self.metric_model = RCA_ml() self.X_tr = None self.y_train = None self.X_te = None
def compute_graph(current_graph=[]): global image_names, labels, n_clusters global graph, position_constraints, prev_embedding global d, features if len(current_graph) == 0 or prev_embedding is None: print('Initialise graph...') tic = time() compute_embedding() # initialise prev_embedding with standard tsne # find clusters clusters = cluster_embedding(prev_embedding, n_clusters=n_clusters, seed=seed) graph = create_graph(image_names, prev_embedding, label=clusters, labels=labels) toc = time() print('Done. ({:2.0f}min {:2.1f}s)'.format((toc - tic) / 60, (toc - tic) % 60)) print('Embedding range: x [{}, {}], y [{}, {}]'.format( prev_embedding[0].min(), prev_embedding[0].max(), prev_embedding[1].min(), prev_embedding[1].max())) return graph print('Update graph...') tic = time() graph = format_graph(current_graph['nodes']) # get current embedding current_embedding = prev_embedding.copy() moved = get_moved( margin=2.0) # nodes which have moved further than the given margin if len(moved) > 0: pos_moved = np.array([[graph[idx]['x'], graph[idx]['y']] for idx in moved]) current_embedding[moved] = pos_moved # find clusters clusters = cluster_embedding(current_embedding, n_clusters=n_clusters, seed=seed) if len(moved) > 0: # sample chunks from clusters chunk_size = int(d / 4.99) # minimal chunk size chunks = make_chunks(current_embedding, moved, clusters, chunk_size, n_neighbors=5) # transform features fts_reduced_rca = RCA().fit_transform(fts_reduced, chunks) if np.isfinite(fts_reduced_rca).all(): fts_reduced = fts_reduced_rca else: warnings.warn( 'RCA features included infinite value or nan, so features are not updated.' 'Try to group more samples or reduce cluster size.', RuntimeWarning) compute_embedding() # update prev_embedding graph = create_graph(image_names, prev_embedding, label=clusters, labels=labels) print('Embedding range: x [{}, {}], y [{}, {}]'.format( prev_embedding[0].min(), prev_embedding[0].max(), prev_embedding[1].min(), prev_embedding[1].max())) toc = time() print('Done. ({:2.0f}min {:2.1f}s)'.format((toc - tic) / 60, (toc - tic) % 60)) return graph