start = time.time() # Read the training set train_images_filenames, train_labels = io.load_training_set() print('Loaded {} train images.'.format(len(train_images_filenames))) # Feature extraction with sift print('Obtaining dense features...') try: D, L, I = io.load_object('train_dense_descriptors', ignore=True), \ io.load_object('train_dense_labels', ignore=True), \ io.load_object('train_dense_indices', ignore=True) except IOError: D, L, I, _ = feature_extraction.parallel_dense(train_images_filenames, train_labels, num_samples_class=-1, n_jobs=settings.n_jobs) io.save_object(D, 'train_dense_descriptors', ignore=True) io.save_object(L, 'train_dense_labels', ignore=True) io.save_object(I, 'train_dense_indices', ignore=True) print('Elapsed time: {:.2f} s'.format(time.time() - start)) temp = time.time() print('Applying PCA...') pca, D = feature_extraction.pca(D) print('Elapsed time: {:.2f} s'.format(time.time() - temp)) temp = time.time() print('Creating GMM model with {} Gaussians'.format(settings.codebook_size)) gmm = bovw.create_gmm(D, codebook_name='gmm_{}_dense_pca_{}'.format(settings.codebook_size, settings.pca_reduction)) print('Elapsed time: {:.2f} s'.format(time.time() - temp))
start = time.time() # Read the training set train_images_filenames, train_labels = io.load_training_set() print('Loaded {} train images.'.format(len(train_images_filenames))) # Feature extraction with sift print('Obtaining sift features...') try: D, L, I, Kp_pos = io.load_object('train_dense_descriptors', ignore=True), \ io.load_object('train_dense_labels', ignore=True), \ io.load_object('train_dense_indices', ignore=True), \ io.load_object('train_dense_keypoints', ignore=True) except IOError: print('error') D, L, I, Kp = feature_extraction.parallel_dense(train_images_filenames, train_labels, num_samples_class=-1, n_jobs=N_JOBS) io.save_object(D, 'train_dense_descriptors', ignore=True) io.save_object(L, 'train_dense_labels', ignore=True) io.save_object(I, 'train_dense_indices', ignore=True) Kp_pos = np.array([Kp[i].pt for i in range(0, len(Kp))], dtype=np.float64) io.save_object(Kp_pos, 'train_dense_keypoints', ignore=True) print('Elapsed time: {:.2f} s'.format(time.time() - start)) temp = time.time() print('Creating codebook with {} visual words'.format(K)) codebook = bovw.create_codebook(D, codebook_name='default_codebook') print('Elapsed time: {:.2f} s'.format(time.time() - temp)) temp = time.time() print('Getting visual words from training set...')
def train(): best_accuracy = 0 best_params = {} cv_results = {} """ SETTINGS """ settings.n_jobs = 1 # Read the training set train_images_filenames, train_labels = io.load_training_set() io.log('Loaded {} train images.'.format(len(train_images_filenames))) # Parameter sweep for dense SIFT for ds in dense_sampling_density: io.log('Obtaining dense features with sampling parameter {}...'.format( ds)) start_sift = time.time() settings.dense_sampling_density = ds try: D, L, I = io.load_object('train_dense_descriptors_{}'.format(settings.dense_sampling_density), ignore=True), \ io.load_object('train_dense_labels_{}'.format(settings.dense_sampling_density), ignore=True), \ io.load_object('train_dense_indices_{}'.format(settings.dense_sampling_density), ignore=True) except IOError: D, L, I, _ = feature_extraction.parallel_dense( train_images_filenames, train_labels, num_samples_class=-1, n_jobs=settings.n_jobs) io.save_object(D, 'train_dense_descriptors_{}'.format( settings.dense_sampling_density), ignore=True) io.save_object(L, 'train_dense_labels_{}'.format( settings.dense_sampling_density), ignore=True) io.save_object(I, 'train_dense_indices_{}'.format( settings.dense_sampling_density), ignore=True) sift_time = time.time() - start_sift io.log('Elapsed time: {:.2f} s'.format(sift_time)) # Parameter sweep for PCA for dim_red in pca_reduction: io.log('Applying PCA (dim = {})...'.format(dim_red)) start_pca = time.time() settings.pca_reduction = dim_red pca, D_pca = feature_extraction.pca(D) pca_time = time.time() - start_pca io.log('Elapsed time: {:.2f} s'.format(pca_time)) # Parameter sweep for codebook size for k in codebook_size: io.log('Creating GMM model (k = {})'.format(k)) start_gmm = time.time() settings.codebook_size = k gmm = bovw.create_gmm( D_pca, 'gmm_{}_dense_{}_pca_{}'.format(k, ds, dim_red)) gmm_time = time.time() - start_gmm io.log('Elapsed time: {:.2f} s'.format(gmm_time)) io.log('Getting Fisher vectors from training set...') start_fisher = time.time() fisher, labels = bovw.fisher_vectors(D_pca, L, I, gmm, normalization='l2') fisher_time = time.time() - start_fisher io.log('Elapsed time: {:.2f} s'.format(fisher_time)) io.log('Scaling features...') start_scaler = time.time() std_scaler = StandardScaler().fit(fisher) vis_words = std_scaler.transform(fisher) scaler_time = time.time() - start_scaler io.log('Elapsed time: {:.2f} s'.format(scaler_time)) io.log('Optimizing SVM hyperparameters...') start_crossvalidation = time.time() svm = SVC(kernel='precomputed') random_search = RandomizedSearchCV(svm, params_distribution, n_iter=n_iter, scoring='accuracy', n_jobs=settings.n_jobs, refit=False, cv=3, verbose=1) # Precompute Gram matrix gram = kernels.intersection_kernel(vis_words, vis_words) random_search.fit(gram, labels) crossvalidation_time = time.time() - start_crossvalidation io.log('Elapsed time: {:.2f} s'.format(crossvalidation_time)) # Convert MaskedArrays to ndarrays to avoid unpickling bugs results = random_search.cv_results_ results['param_C'] = results['param_C'].data # Appending all parameter-scores combinations cv_results.update({ (k, dim_red, ds): { 'cv_results': results, 'sift_time': sift_time, 'pca_time': pca_time, 'gmm_time': gmm_time, 'fisher_time': fisher_time, 'scaler_time': scaler_time, 'crossvalidation_time': crossvalidation_time, 'total_time': sift_time + pca_time + gmm_time + fisher_time + scaler_time + crossvalidation_time } }) io.save_object( cv_results, 'intersection_svm_optimization_fisher_vectors_l2', ignore=True) # Obtaining the parameters which yielded the best accuracy if random_search.best_score_ > best_accuracy: best_accuracy = random_search.best_score_ best_params = random_search.best_params_ best_params.update({'k': k, 'pca': dim_red, 'ds': ds}) io.log('-------------------------------\n') io.log('\nSaving best parameters...') io.save_object( best_params, 'best_params_intersection_svm_optimization_fisher_vectors_l2', ignore=True) best_params_file = os.path.abspath( './ignore/best_params_intersection_svm_optimization_fisher_vectors_l2.pickle' ) io.log('Saved at {}'.format(best_params_file)) io.log('\nSaving all cross-validation values...') io.save_object(cv_results, 'intersection_svm_optimization_fisher_vectors_l2', ignore=True) cv_results_file = os.path.abspath( './ignore/intersection_svm_optimization_fisher_vectors_l2.pickle') io.log('Saved at {}'.format(cv_results_file)) io.log('\nBEST PARAMS') io.log('k={}, C={}, dim_red={}, dense_grid={} --> accuracy: {:.3f}'.format( best_params['k'], best_params['C'], best_params['pca'], best_params['ds'], best_accuracy))
def train(): start = time.time() # Read the training set train_images_filenames, train_labels = io.load_training_set() print('Loaded {} train images.'.format(len(train_images_filenames))) # Feature extraction with sift print('Obtaining dense sift features...') try: D, L, I = io.load_object('train_dense_descriptors', ignore=True), \ io.load_object('train_dense_labels', ignore=True), \ io.load_object('train_dense_indices', ignore=True) except IOError: D, L, I, _ = feature_extraction.parallel_dense(train_images_filenames, train_labels, num_samples_class=-1, n_jobs=N_JOBS) io.save_object(D, 'train_dense_descriptors', ignore=True) io.save_object(L, 'train_dense_labels', ignore=True) io.save_object(I, 'train_dense_indices', ignore=True) print('Elapsed time: {:.2f} s'.format(time.time() - start)) # Start hyperparameters optimization print('\nSTARTING HYPERPARAMETER OPTIMIZATION FOR INTERSECTION SVM') codebook_k_values = [2**i for i in range(7, 16)] params_distribution = {'C': np.logspace(-4, 3, 10**6)} n_iter = 100 best_accuracy = 0 best_params = {} cv_results = {} # Iterate codebook values for k in codebook_k_values: temp = time.time() print('Creating codebook with {} visual words'.format(k)) D = D.astype(np.uint32) codebook = bovw.create_codebook( D, codebook_name='codebook_{}_dense'.format(k)) print('Elapsed time: {:.2f} s'.format(time.time() - temp)) temp = time.time() print('Getting visual words from training set...') vis_words, labels = bovw.visual_words(D, L, I, codebook, normalization=None) print('Elapsed time: {:.2f} s'.format(time.time() - temp)) temp = time.time() print('Scaling features...') std_scaler = StandardScaler().fit(vis_words) vis_words = std_scaler.transform(vis_words) print('Elapsed time: {:.2f} s'.format(time.time() - temp)) temp = time.time() print('Optimizing SVM hyperparameters...') svm = SVC(kernel='precomputed') random_search = RandomizedSearchCV(svm, params_distribution, n_iter=n_iter, scoring='accuracy', n_jobs=N_JOBS, refit=False, verbose=1, cv=4) # Precompute Gram matrix gram = kernels.intersection_kernel(vis_words, vis_words) random_search.fit(gram, labels) print('Elapsed time: {:.2f} s'.format(time.time() - temp)) # Convert MaskedArrays to ndarrays to avoid unpickling bugs results = random_search.cv_results_ results['param_C'] = results['param_C'].data # Appending all parameter-scores combinations cv_results.update({k: results}) io.save_object(cv_results, 'intersection_dense_none_svm_optimization') # Obtaining the parameters which yielded the best accuracy if random_search.best_score_ > best_accuracy: best_accuracy = random_search.best_score_ best_params = random_search.best_params_ best_params.update({'k': k}) print('-------------------------------\n') print('\nBEST PARAMS') print('k={}, C={} --> accuracy: {:.3f}'.format(best_params['k'], best_params['C'], best_accuracy)) print('Saving all cross-validation values...') io.save_object(cv_results, 'intersection_dense_none_svm_optimization') print('Done')