示例#1
0
    def test_iris(self):
        itml = ITML_Supervised(num_constraints=200)
        itml.fit(self.iris_points, self.iris_labels)

        csep = class_separation(itml.transform(self.iris_points),
                                self.iris_labels)
        self.assertLess(csep, 0.2)
示例#2
0
def itml_cmc(features, train_idxs, query_idxs, camId, gallery_idxs, labels):
    N, m = features[train_idxs].shape
    # features[train_idxs][:1000, ].shape
    eigvals, eigvecs = calc_eig_pca_small(features[train_idxs].T, m, N)
    m = 50
    m_eigvecs = eigvecs[:, :m]
    avg_face = compute_avg_face(features[train_idxs].T)
    phi = features - avg_face
    m_features = np.dot(phi, m_eigvecs)


    itml = ITML_Supervised(verbose=True, num_constraints=5000, gamma=0.2)
    X = m_features[train_idxs]
    Y = labels[train_idxs]
    X_itml = itml.fit_transform(X, Y)
    M = itml.metric()
    nn_idx_mat = evaluation(
                knn, 
                features=m_features,
                gallery_idxs=gallery_idxs,
                query_idxs=query_idxs,
                camId=camId, 
                labels=labels,
                metric='mahalanobis',
                metric_params={'VI': M}
            )
    return plot_CMC(nn_idx_mat, query_idxs, labels)
示例#3
0
def main():

    # Get training file name from the command line
    traindatafile = sys.argv[1]

    # The training file is in libSVM format
    tr_data = load_svmlight_file(traindatafile)

    Xtr = tr_data[0].toarray()
    # Converts sparse matrices to dense
    Ytr = tr_data[1]
    # The trainig labels

    Indices_array = np.arange(Ytr.shape[0])
    np.random.shuffle(Indices_array)

    Xtr = Xtr[Indices_array]
    Xtr = Xtr[:6000]

    Ytr = Ytr[Indices_array]
    Ytr = Ytr[:6000]

    itml = ITML_Supervised()
    itml.fit(Xtr, Ytr)
    Met = itml.metric()
    # print Met;
    np.save("itml_model.npy", Met)
  def test_iris(self):
    num_constraints = 200

    itml = ITML_Supervised(num_constraints=num_constraints).fit(self.iris_points, self.iris_labels)

    csep = class_separation(itml.transform(), self.iris_labels)
    self.assertLess(csep, 0.4)  # it's not great
示例#5
0
    def process_itml(self, **option):
        '''Metric Learning algorithm: ITML'''
        GeneExp = self.GeneExp_train
        Label = self.Label_train

        itml = ITML_Supervised(**option)
        itml.fit(GeneExp, Label)
        self.Trans['ITML'] = itml.transformer()
    def test_iris(self):
        num_constraints = 200

        itml = ITML_Supervised(num_constraints=num_constraints).fit(
            self.iris_points, self.iris_labels)

        csep = class_separation(itml.transform(), self.iris_labels)
        self.assertLess(csep, 0.4)  # it's not great
示例#7
0
def get_metric():
    ad=pd.read_csv('ad_feature.csv',header=0,sep='\t')
    data=ad.values
    m=np.array([[.0,.0,.0],[.0,.0,.0],[.0,.0,.0]])
    itml = ITML_Supervised(num_constraints=200)
    for i in range(6):
        data_r=np.array(random.sample(data.tolist(),int(len(data)/300)))
        x=data_r[:,[2,3,4]]
        y=data_r[:,1]
        itml.fit(x,y)
        m=m+itml.metric()
    m=m/6
    return m
def test_bounds_parameters_valid(bounds):
  """Asserts that we can provide any array-like of two elements as bounds,
  and that the attribute bound_ is a numpy array"""

  pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
  y_pairs = [1, -1]
  itml = ITML()
  itml.fit(pairs, y_pairs, bounds=bounds)

  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
  y = np.array([1, 0, 1, 0])
  itml_supervised = ITML_Supervised()
  itml_supervised.fit(X, y, bounds=bounds)
示例#9
0
def test_bounds_parameters_valid(bounds):
    """Asserts that we can provide any array-like of two elements as bounds,
  and that the attribute bound_ is a numpy array"""

    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    itml = ITML()
    itml.fit(pairs, y_pairs, bounds=bounds)

    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
    y = np.array([1, 0, 1, 0])
    itml_supervised = ITML_Supervised()
    itml_supervised.fit(X, y, bounds=bounds)
示例#10
0
def test_bounds_parameters_invalid(bounds):
  """Assert that if a non array-like is put for bounds, or an array-like
  of length different than 2, an error is returned"""
  pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
  y_pairs = [1, -1]
  itml = ITML()
  with pytest.raises(Exception):
    itml.fit(pairs, y_pairs, bounds=bounds)

  X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
  y = np.array([1, 0, 1, 0])
  itml_supervised = ITML_Supervised()
  with pytest.raises(Exception):
    itml_supervised.fit(X, y, bounds=bounds)
示例#11
0
def test_bounds_parameters_invalid(bounds):
    """Assert that if a non array-like is put for bounds, or an array-like
  of length different than 2, an error is returned"""
    pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]])
    y_pairs = [1, -1]
    itml = ITML()
    with pytest.raises(Exception):
        itml.fit(pairs, y_pairs, bounds=bounds)

    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
    y = np.array([1, 0, 1, 0])
    itml_supervised = ITML_Supervised()
    with pytest.raises(Exception):
        itml_supervised.fit(X, y, bounds=bounds)
def sandwich_demo():
    x, y = sandwich_data()
    knn = nearest_neighbors(x, k=2)
    ax = plt.subplot(3, 1, 1)  # take the whole top row
    plot_sandwich_data(x, y, ax)
    plot_neighborhood_graph(x, knn, y, ax)
    ax.set_title('input space')
    ax.set_aspect('equal')
    ax.set_xticks([])
    ax.set_yticks([])

    mls = [
        LMNN(),
        ITML_Supervised(num_constraints=200),
        SDML_Supervised(num_constraints=200),
        LSML_Supervised(num_constraints=200),
    ]

    for ax_num, ml in enumerate(mls, start=3):
        ml.fit(x, y)
        tx = ml.transform()
        ml_knn = nearest_neighbors(tx, k=2)
        ax = plt.subplot(3, 2, ax_num)
        plot_sandwich_data(tx, y, axis=ax)
        plot_neighborhood_graph(tx, ml_knn, y, axis=ax)
        ax.set_title(ml.__class__.__name__)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()
示例#13
0
class ITML:
    def __init__(self, num_constraints=200):
        self.space_model = PCA()
        self.metric_model = ITML_Supervised(num_constraints)

    def fit(self, feats, labels):
        """Fits the model to the prescribed data."""
        pdb.set_trace()
        self.eigenvecs, self.space = self.space_model.fit(feats, labels)
        pdb.set_trace()
        self.metric_model.fit(self.space.T, labels)

    def transform(self, y):
        """Transforms the test data according to the model"""
        test_proj, _ = self.space_model.transform(y)
        pdb.set_trace()
        return self.metric_model.transform(y)
  def test_itml_supervised(self):
    seed = np.random.RandomState(1234)
    itml = ITML_Supervised(n_constraints=200, random_state=seed)
    itml.fit(self.X, self.y)
    res_1 = itml.transform(self.X)

    seed = np.random.RandomState(1234)
    itml = ITML_Supervised(n_constraints=200, random_state=seed)
    res_2 = itml.fit_transform(self.X, self.y)

    assert_array_almost_equal(res_1, res_2)
 def test_deprecation(self):
     # test that the right deprecation message is thrown.
     # TODO: remove in v.0.5
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
     itml_supervised = ITML_Supervised(num_labeled=np.inf)
     msg = ('"num_labeled" parameter is not used.'
            ' It has been deprecated in version 0.5.0 and will be'
            'removed in 0.6.0')
     assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X,
                          y)
示例#16
0
 def test_deprecation_bounds(self):
     # test that a deprecation message is thrown if bounds is set at
     # initialization
     # TODO: remove in v.0.6
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
     itml_supervised = ITML_Supervised(bounds=None)
     msg = ('"bounds" parameter from initialization is not used.'
            ' It has been deprecated in version 0.5.0 and will be'
            'removed in 0.6.0. Use the "bounds" parameter of this '
            'fit method instead.')
     assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X,
                          y)
示例#17
0
def runITML(X_train, X_test, y_train, y_test):
    transformer = ITML_Supervised(num_constraints=200, verbose=True)
    transformer.fit(X_train, y_train)
    X_train_proj = transformer.transform(X_train)
    X_test_proj = transformer.transform(X_test)
    np.save('X_train_ITML', X_train_proj)
    np.save('X_test_ITML', X_test_proj)
    return X_train_proj, X_test_proj
示例#18
0
class ITML:
    def __init__(self, num_constraints=200):
        self.metric_model = ITML_Supervised(num_constraints)

    def fit(self, features, labels):
        """Fits the model to the prescribed data."""
        return self.metric_model.fit(features, labels)

    def transform(self, y):
        """Transforms the test data according to the model"""
        return self.metric_model.transform(y)

    def predict_proba(self, X_te):
        """Predicts the probabilities of each of the test samples"""
        test_samples = X_te.shape[0]
        self.X_tr = self.transform(self.X_tr)
        clf = NearestCentroid()
        clf.fit(self.X_tr, self.y_train)
        centroids = clf.centroids_
        probabilities = np.zeros((test_samples, centroids.shape[0]))
        for sample in xrange(test_samples):
            probabilities[sample] = sk_nearest_neighbour_proba(
                centroids, X_te[sample, :])
        return probabilities
  def test_itml_supervised(self):
    seed = np.random.RandomState(1234)
    itml = ITML_Supervised(num_constraints=200)
    itml.fit(self.X, self.y, random_state=seed)
    res_1 = itml.transform(self.X)

    seed = np.random.RandomState(1234)
    itml = ITML_Supervised(num_constraints=200)
    res_2 = itml.fit_transform(self.X, self.y, random_state=seed)

    assert_array_almost_equal(res_1, res_2)
示例#20
0
def get_dist_func(
    data: Array[np.float64], target: Array[np.float64]
) -> Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int],
              np.float64]:
    """
    Get function that returns distances between examples in learned space.

    Args:
        data : Array[np.float64] - training data_trans
        target : int - target variable values (classes of training examples)
    Returns:
        Callable[[Callable[[np.float64, np.float64], np.float64], np.int, np.int], np.float64] -- higher
        order function that takes a matric function and returns a function that takes two indices of examples
        and returns distance between examples in learned metric space.
    """

    # Get transformed data.
    data_trans: Array[np.float64] = ITML_Supervised().fit_transform(
        StandardScaler().fit_transform(data), target)

    # Computing distance:
    def dist_func_res(metric: Callable[[np.float64, np.float64], np.float64],
                      i1: np.int, i2: np.int) -> np.float64:
        """ 
        distance function that takes indices of examples in training set and returns distance
        in learned space using specified distance metric.

        Args:
            i1 : int - index of first training example
            i2 : int - index of second training example
        Returns:
            np.float64 - distance in learned metric space using specified metric
                    between specified training examples.
        """

        # Compute distance in learned metric space using specified metric.
        return metric(data_trans[i1, :], data_trans[i2, :])

    return dist_func_res  # Return distance function.
示例#21
0
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in quadruplets_learners]))

pairs_learners = [
    (ITML(), build_pairs),
    (MMC(max_iter=2), build_pairs),  # max_iter=2 for faster
    (SDML(), build_pairs),
]
ids_pairs_learners = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in pairs_learners]))

classifiers = [(Covariance(), build_classification),
               (LFDA(), build_classification), (LMNN(), build_classification),
               (NCA(), build_classification), (RCA(), build_classification),
               (ITML_Supervised(max_iter=5), build_classification),
               (LSML_Supervised(), build_classification),
               (MMC_Supervised(max_iter=5), build_classification),
               (RCA_Supervised(num_chunks=10), build_classification),
               (SDML_Supervised(), build_classification)]
ids_classifiers = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in classifiers]))

regressors = [(MLKR(), build_regression)]
ids_regressors = list(
    map(lambda x: x.__class__.__name__,
        [learner for (learner, _) in regressors]))

WeaklySupervisedClasses = (_PairsClassifierMixin, _QuadrupletsClassifierMixin)
def main():
    print("importing data...")
    data = loadmat('assets/cuhk03_new_protocol_config_labeled.mat')
    with open('assets/feature_data.json') as f:
        features = ujson.load(f)

    print("data imported")
    features = np.array(features)

    train_idxs = data['train_idx'].flatten() - 1
    query_idxs = data['query_idx'].flatten() - 1
    camId = data['camId'].flatten()
    gallery_idxs = data['gallery_idx'].flatten() - 1
    labels = data['labels'].flatten()

    N, m = features[train_idxs].shape
    # features[train_idxs][:1000, ].shape
    eigvals, eigvecs = calc_eig_pca_small(features[train_idxs].T, m, N)
    m = 50
    m_eigvecs = eigvecs[:, :m]
    avg_face = compute_avg_face(features[train_idxs].T)
    phi = features - avg_face
    m_features = np.dot(phi, m_eigvecs)

    itml = ITML_Supervised(verbose=True, num_constraints=5000, gamma=0.1)
    X = m_features[train_idxs]
    Y = labels[train_idxs]
    X_itml = itml.fit_transform(X, Y)
    M = itml.metric()
    plot_3d(X_itml, Y)
    nn_idx_mat = evaluation(knn,
                            features=m_features,
                            gallery_idxs=gallery_idxs,
                            query_idxs=query_idxs,
                            camId=camId,
                            labels=labels,
                            metric='mahalanobis',
                            metric_params={'VI': M})

    acc = get_all_rank_acc(nn_idx_mat, query_idxs, labels)
    print("Accuracy:")
    print(acc)

    test_set_idxs = np.append(gallery_idxs, query_idxs)
    features_ITML = itml.transform(m_features)
    X_test = features_ITML[test_set_idxs]
    Y_test = labels[test_set_idxs]
    n_cluster = np.unique(Y_test).size
    nmi_kmean, acc_kmean = evaluation_k_means(X_test, n_cluster, Y_test)
    print("ITML k-means accuracy (test set):")
    print(acc_kmean)

    gamma = [i / 10 for i in range(1, 11)]
    X_itmls = []
    all_rank_acc_g = []
    for g in gamma:
        itml = ITML_Supervised(verbose=True, num_constraints=5000, gamma=0.2)
        X = m_features[train_idxs]
        X_itml = itml.fit_transform(X, Y)
        X_itmls.append(X_itml)
        M = itml.metric()
        nn_idx_mat = evaluation(knn,
                                features=m_features,
                                gallery_idxs=gallery_idxs,
                                query_idxs=query_idxs,
                                camId=camId,
                                labels=labels,
                                metric='mahalanobis',
                                metric_params={'VI': M})
        acc_g = get_all_rank_acc(nn_idx_mat, query_idxs, labels)
        all_rank_acc_g.append(acc_g)
    plt.plot(gamma, all_rank_acc_g)
    plt.legend(('Rank 1', 'Rank 5', 'Rank10'))
    plt.ylabel('Accuracy')
    plt.xlabel('gamma')
    print(all_rank_acc_g)
    plt.show()
示例#23
0
def test_ITML():
    X = np.random.rand(40, 40)
    Y = np.array([i for j in range(2) for i in range(20)])
    itml = ITML_Supervised(num_constraints=200)
    itml.fit(X, Y)
    pdb.set_trace()
示例#24
0
 def test_itml(self):
     check_estimator(ITML_Supervised())
示例#25
0
from models.parts import PartsNet
from models.triplet import TripletNet
from models.voting import VotingNet
from plotter import Plot
from trainer import Trainer

name = 'triplet-sgd-nopool' + datetime.now().strftime('_%Y-%m-%d_%H%M%S')
plot = Plot(name)
net = TripletNet(ConvNet())
net.load_state_dict(torch.load('triplet-sgd_2018-05-10_101323_best'))
# net = VotingNet(BkwNet())
net.cuda()

train, val, test = VIPeR.create((316, 380), shuffle_seed=12345)

metric = ITML_Supervised()
metric_learn(metric, net, train)

criterion = nn.TripletMarginLoss().cuda()
optimizer = optim.SGD(net.parameters(), lr=1e-2, momentum=0.9)
scheduler = scheduler.ReduceLROnPlateau(optimizer,
                                        patience=1,
                                        eps=1e-8,
                                        verbose=True)

trainer = Trainer(name,
                  net, (train, val, test),
                  optimizer,
                  scheduler,
                  criterion,
                  plot,
def main(params):

    initialize_results_dir(params.get('results_dir'))
    backup_params(params, params.get('results_dir'))

    print('>>> loading data...')

    X_train, y_train, X_test, y_test = LoaderFactory().create(
        name=params.get('dataset'),
        root=params.get('dataset_dir'),
        random=True,
        seed=params.getint('split_seed'))()

    print('<<< data loaded')

    print('>>> computing psd matrix...')

    if params.get('algorithm') == 'identity':
        psd_matrix = np.identity(X_train.shape[1], dtype=X_train.dtype)

    elif params.get('algorithm') == 'nca':
        nca = NCA(init='auto',
                  verbose=True,
                  random_state=params.getint('algorithm_seed'))
        nca.fit(X_train, y_train)
        psd_matrix = nca.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'lmnn':
        lmnn = LMNN(init='auto',
                    verbose=True,
                    random_state=params.getint('algorithm_seed'))
        lmnn.fit(X_train, y_train)
        psd_matrix = lmnn.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'itml':
        itml = ITML_Supervised(verbose=True,
                               random_state=params.getint('algorithm_seed'))
        itml.fit(X_train, y_train)
        psd_matrix = itml.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'lfda':

        lfda = LFDA()
        lfda.fit(X_train, y_train)
        psd_matrix = lfda.get_mahalanobis_matrix()

    elif params.get('algorithm') == 'arml':
        learner = TripleLearner(
            optimizer=params.get('optimizer'),
            optimizer_params={
                'lr': params.getfloat('lr'),
                'momentum': params.getfloat('momentum'),
                'weight_decay': params.getfloat('weight_decay'),
            },
            criterion=params.get('criterion'),
            criterion_params={'calibration': params.getfloat('calibration')},
            n_epochs=params.getint('n_epochs'),
            batch_size=params.getint('batch_size'),
            random_initialization=params.getboolean('random_initialization',
                                                    fallback=False),
            update_triple=params.getboolean('update_triple', fallback=False),
            device=params.get('device'),
            seed=params.getint('learner_seed'))

        psd_matrix = learner(X_train,
                             y_train,
                             n_candidate_mins=params.getint('n_candidate_mins',
                                                            fallback=1))

    else:
        raise Exception('unsupported algorithm')

    print('<<< psd matrix got')

    np.savetxt(os.path.join(params.get('results_dir'), 'psd_matrix.txt'),
               psd_matrix)
示例#27
0
class MLPipe:
    pipe = Pipeline([('scaling', StandardScaler()),
                     ('feature_selection',
                      SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='median')),
                     ('metric_learning', None),
                     ('classifier', SVC())])
    save_path = './titanic/pipe_{}.bin'
    feature_selection_param_grid = {
        'SVC': [
            {
                'scaling': [StandardScaler(), None],
                'metric_learning': [None,  LMNN()],
                'feature_selection': [SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='median'), None],
                # 'feature_selection__estimator': [RandomForestClassifier(n_estimators=100, random_state=42), SVC(C=1000), KNeighborsClassifier()],
                'classifier': [SVC()],
                'classifier__kernel': ['rbf'],
                'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100],
                'classifier__gamma': [0.001, 0.01, 0.1, 1, 10, 100]
            },
            {
                'scaling': [StandardScaler(), None],
                'metric_learning': [None,  LMNN()],
                'feature_selection': [SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='median'), None],
                'classifier': [SVC()],
                'classifier__kernel': ['linear'],
                'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100],
            }
        ],
        'rfc': [
            {
                #'scaling': [StandardScaler(), None],
                'scaling': [None],
                'metric_learning': [None,  LMNN()],
                'feature_selection': [SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='median'), None],
                'classifier': [RandomForestClassifier()],
                'classifier__n_estimators': [10, 25, 50, 75, 100],
                'classifier__max_depth': [None, 5, 10, 25],
                'classifier__min_samples_split': [5, 10, 15]
            }
        ],
        'knn': [
            {
                'scaling': [StandardScaler(), MinMaxScaler(), None],
                'metric_learning': [None,  LMNN(), ITML_Supervised(num_constraints=200)],
                'feature_selection': [
                    SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='median'),
                    None],
                'classifier': [KNeighborsClassifier()],
                'classifier__n_neighbors': [2, 3, 4, 5],
                'classifier__algorithm': ['auto', 'ball_tree', 'kd_tree']
            }
        ],
        'dt': [
            {
                'scaling': [StandardScaler(), MinMaxScaler(), None],
                'metric_learning': [None],
                'feature_selection': [
                    SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='median'),
                    None],
                'classifier': [DecisionTreeClassifier()],
                'classifier__criterion': ['gini', 'entropy'],
                'classifier__max_features': ['auto', 'sqrt', 'log2'],
                'classifier__max_depth': [None, 5, 10, 15]
            }
        ],
        'gbc': [
            {
                'scaling': [StandardScaler(), None],
                'metric_learning': [None, LMNN()],
                'feature_selection': [
                    SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='median'),
                    None],
                'classifier': [GradientBoostingClassifier()],
                'classifier__loss': ['deviance'],
                'classifier__learning_rate': [0.1, 1, 10],
                'classifier__n_estimators': [10, 50, 100],
                'classifier__criterion': ['friedman_mse'],
                'classifier__max_features': ['auto'],
                'classifier__max_depth': [None, 2, 5, 10, 15, 25],
                'classifier__min_samples_split': [5, 10, 15]
            }
        ],
        'xgb': [
            {
                'scaling': [StandardScaler()],#, None],
                'metric_learning': [None],#, LMNN(), ITML_Supervised(num_constraints=200)],
                'feature_selection': [None],
                'classifier': [xgb.XGBClassifier()],
                'classifier__n_estimators': [500, 1000, 2000],
                'classifier__max_depth': [4, 6, 8, 10],
                'classifier__min_child_weight': [1, 2, 3],
                'classifier__gamma': [0.4, 0.6, 0.8, 0.9, 1],
                'classifier__subsample': [0.4, 0.6, 0.8, 1.0],
                'classifier__colsample_bytree': [0.4, 0.6, 0.8, 1.0]
            }
        ]
    }

    def __init__(self, model_name: str):
        print('model_name: %s' % model_name)
        self._model_name = model_name
        self._param_grid = MLPipe.feature_selection_param_grid[model_name]
        self._save_path = MLPipe.save_path.format(model_name)
        self._save_best_path = self._save_path + '-best'
        self._model = None
        self._pipe = MLPipe.pipe

    def fit_model(self, train_X: list, train_y: list):
        model = load_model(self._save_path)
        if not model:
            # create model, if not loading file
            grid_search = GridSearchCV(self._pipe, self._param_grid, cv=5, n_jobs=-1)
            grid_search.fit(train_X, train_y)
            save_model(self._save_path, grid_search)
            model = grid_search

        print('best score: {:.2f}'.format(model.best_score_))
        print('best estimator \n{}'.format(model.best_estimator_))
        self._model = model.best_estimator_

    def predict(self, test_X: list) -> list:
        test_y = self._model.predict(test_X).astype(int)
        return test_y

    def get_model(self) -> dict:
        return self._model

    def save_best_model(self):
        save_model(self._save_best_path, self._model)
    
    def load_best_model(self):
        self._model = load_model(self._save_best_path)

    def get_cv_failure_data(self, train_X: list, train_y: list):
        ret_index = np.array([])
        evaluate_model = self._model
        kf = KFold(n_splits=5)
        for train_index, test_index in kf.split(train_X):
            evaluate_model.fit(train_X[train_index], train_y[train_index])
            evaluate_y = evaluate_model.predict(train_X[test_index])
            correct_eval_y = train_y[test_index]

            ret_index = np.concatenate((ret_index, np.array(test_index)[evaluate_y != train_y[test_index]]))

        return list(ret_index.astype(int))
 def test_itml_supervised(self):
   seed = np.random.RandomState(1234)
   itml = ITML_Supervised(num_constraints=200)
   itml.fit(self.X, self.y, random_state=seed)
   L = itml.transformer_
   assert_array_almost_equal(L.T.dot(L), itml.metric())
 def test_itml_supervised(self):
   seed = np.random.RandomState(1234)
   itml = ITML_Supervised(num_constraints=200, random_state=seed)
   itml.fit(self.X, self.y)
   L = itml.components_
   assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix())
示例#30
0
            dist_func=lambda x1, x2: np.sqrt(np.sum(np.abs(x1 - x2)**2.0, 1)))
}

# Initialize dictionary for storing results.
res_dict = dict.fromkeys(rbas.keys())
for key in res_dict.keys():
    res_dict[key] = np.empty(NUM_FEATURES_TO_SELECT_LIM, dtype=np.float)

# Go over RBAs.
for rba_name in rbas.keys():

    print("### Testing {0} ###".format(rba_name))

    # Initialize next pipeline.
    clf_pipeline = Pipeline([('scaling', StandardScaler()),
                             ('lmf', ITML_Supervised()),
                             ('rba', rbas[rba_name]), ('clf', clf)])

    # Go over values on x axis.
    for num_features_to_select in np.arange(1, NUM_FEATURES_TO_SELECT_LIM + 1):

        print("{0}/{1}".format(num_features_to_select,
                               NUM_FEATURES_TO_SELECT_LIM))

        # Set parameter.
        clf_pipeline.set_params(
            rba__n_features_to_select=num_features_to_select)

        # Compute score of 10 runs of 10 fold cross-validation.
        score = np.mean(
            cross_val_score(clf_pipeline,
示例#31
0
from metric_learn import ITML_Supervised
from sklearn.datasets import load_iris

iris_data = load_iris()
X = iris_data['data']
Y = iris_data['target']

itml = ITML_Supervised(num_constraints=200)
itml.fit(X, Y)
示例#32
0
t0 = time()
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
print("done in %0.3fs" % (time() - t0))

eigenfaces = pca.components_.reshape((n_components, h, w))

print("Projecting the input data on the eigenfaces orthonormal basis")
t0 = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done in %0.3fs" % (time() - t0))

# Try LMNN here.
print("Trying ITML")
param_grid = {''}
itml = ITML(num_constraints=200)
X_tr = itml.fit(X_train_pca, y_train).transform(X_train_pca)
X_te = itml.transform(X_test_pca)

acc, y_pred = classifier.sk_nearest_neighbour(X_tr, y_train, X_te, y_test)
print("accuracy = %s",acc)
print(classification_report(y_test, y_pred, target_names=target_names))
print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))


###############################################################################
# Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
示例#33
0
  def test_iris(self):
    itml = ITML_Supervised(num_constraints=200)
    itml.fit(self.iris_points, self.iris_labels)

    csep = class_separation(itml.transform(self.iris_points), self.iris_labels)
    self.assertLess(csep, 0.2)
# Compute PCA_NCA Learning
print("\n-----PCA_NCA-----")
nca = NCA(max_iter=20, verbose=True)
start_time = time.time()
nca.fit(pca.train_sample_projection, original_train_labels)
end_time = time.time()
print("Learning time: %s" % (end_time - start_time))
transformed_query_features = nca.transform(pca_query_features)
transformed_gallery_features = nca.transform(pca_gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)

# Compute ITML (Information Theoretic Metric Learning)
print("\n-----ITML-----")
itml = ITML_Supervised(max_iter=20,
                       convergence_threshold=1e-5,
                       num_constraints=500,
                       verbose=True)
itml.fit(original_train_features, original_train_labels)
transformed_query_features = itml.transform(query_features)
transformed_gallery_features = itml.transform(gallery_features)
compute_k_mean(num_of_clusters, transformed_query_features,
               transformed_gallery_features, gallery_labels)

# Compute PCA_ITML
print("\n-----PCA_ITML-----")
itml = ITML_Supervised(max_iter=20,
                       convergence_threshold=1e-5,
                       num_constraints=500,
                       verbose=True)
start_time = time.time()
itml.fit(pca.train_sample_projection, original_train_labels)
 def test_itml_supervised(self):
   seed = np.random.RandomState(1234)
   itml = ITML_Supervised(num_constraints=200)
   itml.fit(self.X, self.y, random_state=seed)
   L = itml.transformer_
   assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix())
if Method == 'LMNN':
    print("Method: LMNN", '\n')
    lmnn = LMNN(k=3, learn_rate=1e-6, verbose=False)
    x = lmnn.fit(FSTrainData, TrainLabels)
    TFSTestData = x.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'COV':
    print("Method: COV", '\n')
    cov = Covariance().fit(FSTrainData)
    TFSTestData = cov.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'ITML':
    print("Method: ITML", '\n')
    itml = ITML_Supervised(num_constraints=200, A0=None)
    x = itml.fit(FSTrainData, TrainLabels)
    TFSTestData = x.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'LFDA':
    print("Method: LFDA", '\n')
    lfda = LFDA(k=4, dim=1)
    x = lfda.fit(FSTrainData, TrainLabels)
    TFSTestData = x.transform(FSTestData)
    print('Transformation Done', '\n')

elif Method == 'NCA':
    print("Method: NCA", '\n')
    #print('Max', TrainData.max(axis=0))
    #print('sssssssss', len(TrainData[0]))