def learn_distance_metric(distances, pairs_per_prototype=100, test_size=0.5, return_features=False, return_pairs=False): feature_pipeline = Pipeline([ ('dates', DateFeatureTransformer()), ('features', MMCFeatureTransformer()), ]) features = feature_pipeline.fit_transform(distances) pairs = create_mmc_pairs(distances, pairs_per_prototype=pairs_per_prototype) X_train, X_test, y_train, y_test = train_test_split(pairs[:, :2], pairs[:, -1], shuffle=True, stratify=pairs[:, -1], test_size=test_size ) mmc = MMC(preprocessor=np.array(features, dtype=np.float)) mmc = mmc.fit(X_train, y_train) score = f1_score(y_test, mmc.predict(X_test), average='weighted') return SimpleNamespace( score=score, metric_components=mmc.components_.transpose(), features=None if not return_features else features, pairs=None if not return_pairs else pairs )
# in this task we want points where the first feature is close to be closer to each other, # no matter how close the second feature is y = [1, 1, -1, -1] """ Learn MMC (Mahalanobis Metrics for Clustering) Model """ mmc = MMC() mmc.fit(pairs, y) # learn the MMC model """ Return the decision function used to classify the pairs """ print("debug 1: ", mmc.decision_function(pairs)) """ Returns a copy of the Mahalanobis matrix learned by the metric learner """ print("debug 2: ", mmc.get_mahalanobis_matrix()) """ Returns a function that takes as input two 1D arrays and outputs the learned metric score on these two points. """ f = mmc.get_metric() print("debug 3: ", f) """ Predicts the learned metric between input pairs """ example_pairs = [ [[1.2, 7.5], [1.3, 8.5]] ] #[1.2, 7.5] # error - ValueError: 3D array of formed tuples expected by MMC. print("debug 4 : ", mmc.predict(example_pairs))