示例#1
0
    def test_iris(self):
        itml = ITML_Supervised(num_constraints=200)
        itml.fit(self.iris_points, self.iris_labels)

        csep = class_separation(itml.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.2)
示例#2
0
def main():

    # Get training file name from the command line
    traindatafile = sys.argv[1]

    # The training file is in libSVM format
    tr_data = load_svmlight_file(traindatafile)

    Xtr = tr_data[0].toarray()
    # Converts sparse matrices to dense
    Ytr = tr_data[1]
    # The trainig labels

    Indices_array = np.arange(Ytr.shape[0])
    np.random.shuffle(Indices_array)

    Xtr = Xtr[Indices_array]
    Xtr = Xtr[:6000]

    Ytr = Ytr[Indices_array]
    Ytr = Ytr[:6000]

    itml = ITML_Supervised()
    itml.fit(Xtr, Ytr)
    Met = itml.metric()
    # print Met;
    np.save("itml_model.npy", Met)
示例#3
0
    def process_itml(self, **option):
        '''Metric Learning algorithm: ITML'''
        GeneExp = self.GeneExp_train
        Label = self.Label_train

        itml = ITML_Supervised(**option)
        itml.fit(GeneExp, Label)
        self.Trans['ITML'] = itml.transformer()
示例#4
0
def runITML(X_train, X_test, y_train, y_test):
    transformer = ITML_Supervised(num_constraints=200, verbose=True)
    transformer.fit(X_train, y_train)
    X_train_proj = transformer.transform(X_train)
    X_test_proj = transformer.transform(X_test)
    np.save('X_train_ITML', X_train_proj)
    np.save('X_test_ITML', X_test_proj)
    return X_train_proj, X_test_proj
  def test_itml_supervised(self):
    seed = np.random.RandomState(1234)
    itml = ITML_Supervised(num_constraints=200)
    itml.fit(self.X, self.y, random_state=seed)
    res_1 = itml.transform(self.X)

    seed = np.random.RandomState(1234)
    itml = ITML_Supervised(num_constraints=200)
    res_2 = itml.fit_transform(self.X, self.y, random_state=seed)

    assert_array_almost_equal(res_1, res_2)
  def test_itml_supervised(self):
    seed = np.random.RandomState(1234)
    itml = ITML_Supervised(n_constraints=200, random_state=seed)
    itml.fit(self.X, self.y)
    res_1 = itml.transform(self.X)

    seed = np.random.RandomState(1234)
    itml = ITML_Supervised(n_constraints=200, random_state=seed)
    res_2 = itml.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
def test_bounds_parameters_valid(bounds):
  """Asserts that we can provide any array-like of two elements as bounds,
  and that the attribute bound_ is a numpy array"""

  pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
  y_pairs = [1, -1]
  itml = ITML()
  itml.fit(pairs, y_pairs, bounds=bounds)

  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
  y = np.array([1, 0, 1, 0])
  itml_supervised = ITML_Supervised()
  itml_supervised.fit(X, y, bounds=bounds)
示例#8
0
def get_metric():
    ad=pd.read_csv('ad_feature.csv',header=0,sep='\t')
    data=ad.values
    m=np.array([[.0,.0,.0],[.0,.0,.0],[.0,.0,.0]])
    itml = ITML_Supervised(num_constraints=200)
    for i in range(6):
        data_r=np.array(random.sample(data.tolist(),int(len(data)/300)))
        x=data_r[:,[2,3,4]]
        y=data_r[:,1]
        itml.fit(x,y)
        m=m+itml.metric()
    m=m/6
    return m
示例#9
0
def test_bounds_parameters_valid(bounds):
    """Asserts that we can provide any array-like of two elements as bounds,
  and that the attribute bound_ is a numpy array"""

    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    itml = ITML()
    itml.fit(pairs, y_pairs, bounds=bounds)

    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
    y = np.array([1, 0, 1, 0])
    itml_supervised = ITML_Supervised()
    itml_supervised.fit(X, y, bounds=bounds)
示例#10
0
def test_bounds_parameters_invalid(bounds):
    """Assert that if a non array-like is put for bounds, or an array-like
  of length different than 2, an error is returned"""
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    itml = ITML()
    with pytest.raises(Exception):
        itml.fit(pairs, y_pairs, bounds=bounds)

    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
    y = np.array([1, 0, 1, 0])
    itml_supervised = ITML_Supervised()
    with pytest.raises(Exception):
        itml_supervised.fit(X, y, bounds=bounds)
示例#11
0
def test_bounds_parameters_invalid(bounds):
  """Assert that if a non array-like is put for bounds, or an array-like
  of length different than 2, an error is returned"""
  pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
  y_pairs = [1, -1]
  itml = ITML()
  with pytest.raises(Exception):
    itml.fit(pairs, y_pairs, bounds=bounds)

  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
  y = np.array([1, 0, 1, 0])
  itml_supervised = ITML_Supervised()
  with pytest.raises(Exception):
    itml_supervised.fit(X, y, bounds=bounds)
示例#12
0
class ITML:
    def __init__(self, num_constraints=200):
        self.space_model = PCA()
        self.metric_model = ITML_Supervised(num_constraints)

    def fit(self, feats, labels):
        """Fits the model to the prescribed data."""
        pdb.set_trace()
        self.eigenvecs, self.space = self.space_model.fit(feats, labels)
        pdb.set_trace()
        self.metric_model.fit(self.space.T, labels)

    def transform(self, y):
        """Transforms the test data according to the model"""
        test_proj, _ = self.space_model.transform(y)
        pdb.set_trace()
        return self.metric_model.transform(y)
示例#13
0
class ITML:
    def __init__(self, num_constraints=200):
        self.metric_model = ITML_Supervised(num_constraints)

    def fit(self, features, labels):
        """Fits the model to the prescribed data."""
        return self.metric_model.fit(features, labels)

    def transform(self, y):
        """Transforms the test data according to the model"""
        return self.metric_model.transform(y)

    def predict_proba(self, X_te):
        """Predicts the probabilities of each of the test samples"""
        test_samples = X_te.shape[0]
        self.X_tr = self.transform(self.X_tr)
        clf = NearestCentroid()
        clf.fit(self.X_tr, self.y_train)
        centroids = clf.centroids_
        probabilities = np.zeros((test_samples, centroids.shape[0]))
        for sample in xrange(test_samples):
            probabilities[sample] = sk_nearest_neighbour_proba(
                centroids, X_te[sample, :])
        return probabilities
示例#14
0
def gettestData():

    # Get testing file name from the command line
    testdatafile = sys.argv[2]

    # The testing file is in libSVM format
    ts_data = load_svmlight_file(testdatafile)

    Xts = ts_data[0].toarray()  # Converts sparse matrices to dense
    Yts = ts_data[1]  # The trainig labels
    return Xts, Yts


# get training data
Xtr, Ytr = gettrainData()
# get testing data
Xts, Yts = gettestData()

# Taking only a fraction of data. i.e. 1/4th
Xtr = Xtr[:len(Xtr)//4]
Ytr = Ytr[:len(Ytr)//4]

itml = ITML_Supervised(num_constraints=1000)
# learning
itml.fit(Xtr, Ytr)
# Get the learnt metric
M = itml.metric()

# Metric saved
np.save("model.npy", M)
def main(params):

    initialize_results_dir(params.get('results_dir'))
    backup_params(params, params.get('results_dir'))

    print('>>> loading data...')

    X_train, y_train, X_test, y_test = LoaderFactory().create(
        name=params.get('dataset'),
        root=params.get('dataset_dir'),
        random=True,
        seed=params.getint('split_seed'))()

    print('<<< data loaded')

    print('>>> computing psd matrix...')

    if params.get('algorithm') == 'identity':
        psd_matrix = np.identity(X_train.shape[1], dtype=X_train.dtype)

    elif params.get('algorithm') == 'nca':
        nca = NCA(init='auto',
                  verbose=True,
                  random_state=params.getint('algorithm_seed'))
        nca.fit(X_train, y_train)
        psd_matrix = nca.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'lmnn':
        lmnn = LMNN(init='auto',
                    verbose=True,
                    random_state=params.getint('algorithm_seed'))
        lmnn.fit(X_train, y_train)
        psd_matrix = lmnn.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'itml':
        itml = ITML_Supervised(verbose=True,
                               random_state=params.getint('algorithm_seed'))
        itml.fit(X_train, y_train)
        psd_matrix = itml.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'lfda':

        lfda = LFDA()
        lfda.fit(X_train, y_train)
        psd_matrix = lfda.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'arml':
        learner = TripleLearner(
            optimizer=params.get('optimizer'),
            optimizer_params={
                'lr': params.getfloat('lr'),
                'momentum': params.getfloat('momentum'),
                'weight_decay': params.getfloat('weight_decay'),
            },
            criterion=params.get('criterion'),
            criterion_params={'calibration': params.getfloat('calibration')},
            n_epochs=params.getint('n_epochs'),
            batch_size=params.getint('batch_size'),
            random_initialization=params.getboolean('random_initialization',
                                                    fallback=False),
            update_triple=params.getboolean('update_triple', fallback=False),
            device=params.get('device'),
            seed=params.getint('learner_seed'))

        psd_matrix = learner(X_train,
                             y_train,
                             n_candidate_mins=params.getint('n_candidate_mins',
                                                            fallback=1))

    else:
        raise Exception('unsupported algorithm')

    print('<<< psd matrix got')

    np.savetxt(os.path.join(params.get('results_dir'), 'psd_matrix.txt'),
               psd_matrix)
    print("Method: LMNN", '\n')
    lmnn = LMNN(k=3, learn_rate=1e-6, verbose=False)
    x = lmnn.fit(FSTrainData, TrainLabels)
    TFSTestData = x.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'COV':
    print("Method: COV", '\n')
    cov = Covariance().fit(FSTrainData)
    TFSTestData = cov.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'ITML':
    print("Method: ITML", '\n')
    itml = ITML_Supervised(num_constraints=200, A0=None)
    x = itml.fit(FSTrainData, TrainLabels)
    TFSTestData = x.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'LFDA':
    print("Method: LFDA", '\n')
    lfda = LFDA(k=4, dim=1)
    x = lfda.fit(FSTrainData, TrainLabels)
    TFSTestData = x.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'NCA':
    print("Method: NCA", '\n')
    #print('Max', TrainData.max(axis=0))
    #print('sssssssss', len(TrainData[0]))
    #print('sssssssss', len(TrainData.max(axis=0)))
示例#17
0
from metric_learn import ITML_Supervised
from sklearn.datasets import load_iris

iris_data = load_iris()
X = iris_data['data']
Y = iris_data['target']

itml = ITML_Supervised(num_constraints=200)
itml.fit(X, Y)
示例#18
0
def test_ITML():
    X = np.random.rand(40, 40)
    Y = np.array([i for j in range(2) for i in range(20)])
    itml = ITML_Supervised(num_constraints=200)
    itml.fit(X, Y)
    pdb.set_trace()
示例#19
0
  def test_iris(self):
    itml = ITML_Supervised(num_constraints=200)
    itml.fit(self.iris_points, self.iris_labels)

    csep = class_separation(itml.transform(), self.iris_labels)
    self.assertLess(csep, 0.2)
 def test_itml_supervised(self):
   seed = np.random.RandomState(1234)
   itml = ITML_Supervised(num_constraints=200)
   itml.fit(self.X, self.y, random_state=seed)
   L = itml.transformer_
   assert_array_almost_equal(L.T.dot(L), itml.metric())
 def test_itml_supervised(self):
   seed = np.random.RandomState(1234)
   itml = ITML_Supervised(num_constraints=200, random_state=seed)
   itml.fit(self.X, self.y)
   L = itml.components_
   assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix())
示例#22
0
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
print("done in %0.3fs" % (time() - t0))

eigenfaces = pca.components_.reshape((n_components, h, w))

print("Projecting the input data on the eigenfaces orthonormal basis")
t0 = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done in %0.3fs" % (time() - t0))

# Try LMNN here.
print("Trying ITML")
param_grid = {''}
itml = ITML(num_constraints=200)
X_tr = itml.fit(X_train_pca, y_train).transform(X_train_pca)
X_te = itml.transform(X_test_pca)

acc, y_pred = classifier.sk_nearest_neighbour(X_tr, y_train, X_te, y_test)
print("accuracy = %s",acc)
print(classification_report(y_test, y_pred, target_names=target_names))
print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))


###############################################################################
# Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
 def test_itml_supervised(self):
   seed = np.random.RandomState(1234)
   itml = ITML_Supervised(num_constraints=200)
   itml.fit(self.X, self.y, random_state=seed)
   L = itml.transformer_
   assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix())
start_time = time.time()
nca.fit(pca.train_sample_projection, original_train_labels)
end_time = time.time()
print("Learning time: %s" % (end_time - start_time))
transformed_query_features = nca.transform(pca_query_features)
transformed_gallery_features = nca.transform(pca_gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)

# Compute ITML (Information Theoretic Metric Learning)
print("\n-----ITML-----")
itml = ITML_Supervised(max_iter=20,
                       convergence_threshold=1e-5,
                       num_constraints=500,
                       verbose=True)
itml.fit(original_train_features, original_train_labels)
transformed_query_features = itml.transform(query_features)
transformed_gallery_features = itml.transform(gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)

# Compute PCA_ITML
print("\n-----PCA_ITML-----")
itml = ITML_Supervised(max_iter=20,
                       convergence_threshold=1e-5,
                       num_constraints=500,
                       verbose=True)
start_time = time.time()
itml.fit(pca.train_sample_projection, original_train_labels)
end_time = time.time()
print("Learning time: %s" % (end_time - start_time))