def test_iris(self):
    num_constraints = 200

    lsml = LSML_Supervised(num_constraints=num_constraints).fit(self.iris_points, self.iris_labels)

    csep = class_separation(lsml.transform(), self.iris_labels)
    self.assertLess(csep, 0.8)  # it's pretty terrible
Пример #2
0
class LSML:
    def __init__(self):
        self.metric_model = LSML_Supervised(num_constraints=200)
        self.X_tr = None
        self.y_train = None
        self.X_te = None

    def fit(self, X_tr, y_train):
        """Fits the model to the prescribed data."""
        self.X_tr = X_tr
        self.y_train = y_train
        return self.metric_model.fit(X_tr, y_train)

    def transform(self, X):
        """Transforms the test data according to the model"""
        return self.metric_model.transform(X)

    def predict_proba(self, X_te):
        """Predicts the probabilities of each of the test samples"""
        test_samples = X_te.shape[0]
        self.X_tr = self.transform(self.X_tr)
        clf = NearestCentroid()
        clf.fit(self.X_tr, self.y_train)
        centroids = clf.centroids_
        probabilities = np.zeros((test_samples, centroids.shape[0]))
        for sample in xrange(test_samples):
            probabilities[sample] = sk_nearest_neighbour_proba(
                centroids, X_te[sample, :])
        return probabilities
Пример #3
0
    def test_iris(self):
        lsml = LSML_Supervised(num_constraints=200)
        lsml.fit(self.iris_points, self.iris_labels)

        csep = class_separation(lsml.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.8)  # it's pretty terrible
Пример #4
0
    def process_lsml(self, **option):
        '''Metric Learning algorithm: LSML'''
        GeneExp = self.GeneExp_train
        Label = self.Label_train

        lsml = LSML_Supervised(**option)
        lsml.fit(GeneExp, Label)
        self.Trans['LSML'] = lsml.transformer()
Пример #5
0
def sandwich_demo():
    x, y = sandwich_data()
    knn = nearest_neighbors(x, k=2)
    ax = plt.subplot(3, 1, 1)  # take the whole top row
    plot_sandwich_data(x, y, ax)
    plot_neighborhood_graph(x, knn, y, ax)
    ax.set_title('input space')
    ax.set_aspect('equal')
    ax.set_xticks([])
    ax.set_yticks([])

    mls = [
        LMNN(),
        ITML_Supervised(num_constraints=200),
        SDML_Supervised(num_constraints=200),
        LSML_Supervised(num_constraints=200),
    ]

    for ax_num, ml in enumerate(mls, start=3):
        ml.fit(x, y)
        tx = ml.transform()
        ml_knn = nearest_neighbors(tx, k=2)
        ax = plt.subplot(3, 2, ax_num)
        plot_sandwich_data(tx, y, axis=ax)
        plot_neighborhood_graph(tx, ml_knn, y, axis=ax)
        ax.set_title(ml.__class__.__name__)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()
Пример #6
0
def get_dist_func(data : Array[np.float64], target : Array[np.float64]) -> Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64]:
    """
    Get function that returns distances between examples in learned space.

    Args:
        data : Array[np.float64] - training data_trans
        target : int - target variable values (classes of training examples)
    Returns:
        Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64] -- higher
        order function that takes a matric function and returns a function that takes two indices of examples
        and returns distance between examples in learned metric space.
    """

    # Get transformed data.
    data_trans : Array[np.float64] = LSML_Supervised().fit_transform(StandardScaler().fit_transform(data), target)


    # Computing distance:
    def dist_func_res(metric : Callable[[np.float64, np.float64], np.float64], i1 : np.int, i2 : np.int) -> np.float64:
        """ 
        distance function that takes indices of examples in training set and returns distance
        in learned space using specified distance metric.

        Args:
            i1 : int - index of first training example
            i2 : int - index of second training example
        Returns:
            np.float64 - distance in learned metric space using specified metric
                    between specified training examples.
        """

        # Compute distance in learned metric space using specified metric.
        return metric(data_trans[i1, :], data_trans[i2, :])

    return dist_func_res  # Return distance function.
  def test_lsml_supervised(self):
    seed = np.random.RandomState(1234)
    lsml = LSML_Supervised(n_constraints=200, random_state=seed)
    lsml.fit(self.X, self.y)
    res_1 = lsml.transform(self.X)

    seed = np.random.RandomState(1234)
    lsml = LSML_Supervised(n_constraints=200, random_state=seed)
    res_2 = lsml.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
 def test_deprecation(self):
     # test that the right deprecation message is thrown.
     # TODO: remove in v.0.5
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
     lsml_supervised = LSML_Supervised(num_labeled=np.inf)
     msg = ('"num_labeled" parameter is not used.'
            ' It has been deprecated in version 0.5.0 and will be'
            'removed in 0.6.0')
     assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X,
                          y)
Пример #9
0
  def test_lsml_supervised(self):
    seed = np.random.RandomState(1234)
    lsml = LSML_Supervised(num_constraints=200)
    lsml.fit(self.X, self.y, random_state=seed)
    res_1 = lsml.transform(self.X)

    seed = np.random.RandomState(1234)
    lsml = LSML_Supervised(num_constraints=200)
    res_2 = lsml.fit_transform(self.X, self.y, random_state=seed)

    assert_array_almost_equal(res_1, res_2)
 def test_lsml_supervised(self):
   seed = np.random.RandomState(1234)
   lsml = LSML_Supervised(num_constraints=200, random_state=seed)
   lsml.fit(self.X, self.y)
   L = lsml.components_
   assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix())
Пример #11
0
def gettestData():

    # Get testing file name from the command line
    testdatafile = sys.argv[2]

    # The testing file is in libSVM format
    ts_data = load_svmlight_file(testdatafile)

    Xts = ts_data[0].toarray()  # Converts sparse matrices to dense
    Yts = ts_data[1]  # The trainig labels
    return Xts, Yts


# get training data
Xtr, Ytr = gettrainData()
# get testing data
Xts, Yts = gettestData()

# Taking only a fraction of data. i.e. 1/4th
Xtr = Xtr[:len(Xtr) // 2]
Ytr = Ytr[:len(Ytr) // 2]

lsml = LSML_Supervised(num_constraints=1000)
# learning
lsml.fit(Xtr, Ytr)
# Get the learnt metric
M = lsml.metric()

# Metric saved
np.save("model.npy", M)
Пример #12
0
 def test_lsml(self):
     check_estimator(LSML_Supervised())
Пример #13
0
        [learner for (learner, _) in quadruplets_learners]))

pairs_learners = [
    (ITML(), build_pairs),
    (MMC(max_iter=2), build_pairs),  # max_iter=2 for faster
    (SDML(), build_pairs),
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=10), build_classification),
               (SDML_Supervised(), build_classification)]
ids_classifiers = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in classifiers]))

regressors = [(MLKR(), build_regression)]
ids_regressors = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in regressors]))

WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin)

tuples_learners = pairs_learners + quadruplets_learners
Пример #14
0
    X_test = np.load(osp.join(args.data_root, 'feature_test.npy'))
    y_test = np.load(osp.join(args.data_root, 'label_test.npy'))
    return X_train, X_test, y_train, y_test


if __name__ == '__main__':
    parser = argparse.ArgumentParser("LSML")
    parser.add_argument('--data-root', default='./data/raw_split')
    parser.add_argument('--max-iter', type=int, default=1000)
    args = parser.parse_args()

    name = f"{args.max_iter}"
    data_save_folder = f"./data/LSML/{name}"
    makedirs(data_save_folder)

    X_train, X_test, y_train, y_test = load_split(args)
    print(X_train.shape)

    t = time.time()

    lsml = LSML_Supervised(max_iter=args.max_iter, verbose=1)
    lsml.fit(X_train, y_train)

    print(" # LSML fit done.")

    np.save(osp.join(data_save_folder, "feature_train.npy"),
            lsml.transform(X_train))
    np.save(osp.join(data_save_folder, "label_train.npy"), y_train)
    np.save(osp.join(data_save_folder, "feature_test.npy"),
            lsml.transform(X_test))
    np.save(osp.join(data_save_folder, "label_test.npy"), y_test)
Пример #15
0
 def __init__(self):
     self.metric_model = LSML_Supervised(num_constraints=200)
     self.X_tr = None
     self.y_train = None
     self.X_te = None
 def test_lsml_supervised(self):
   seed = np.random.RandomState(1234)
   lsml = LSML_Supervised(num_constraints=200)
   lsml.fit(self.X, self.y, random_state=seed)
   L = lsml.transformer_
   assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix())
 def test_lsml_supervised(self):
   seed = np.random.RandomState(1234)
   lsml = LSML_Supervised(num_constraints=200)
   lsml.fit(self.X, self.y, random_state=seed)
   L = lsml.transformer_
   assert_array_almost_equal(L.T.dot(L), lsml.metric())