def train_data(data, labels, plot=False, C=3, W=32, H=32): data = data.reshape(-1, C, W, H) PrintHelper.print('Incoming data shape is %s' % str(data.shape)) filters, means, outputs = saak.multi_stage_saak_trans(data, energy_thresh=0.97) final_feat_dim = sum([ ((output.shape[1] - 1) / 2 + 1) * output.shape[2] * output.shape[3] for output in outputs ]) # This is the dimensionality of each datapoint. final_feat = saak.get_final_feature(outputs) #print('final feature dimension is {}'.format(final_feat.shape[1])) assert final_feat.shape[1] == final_feat_dim # Remove some of the features with an f-test selected_feat, idx = entropy_test(final_feat, labels, plot) #selected_feat, idx = f_test(final_feat, labels, thresh=0.75) reduced_feat, pca = reduce_feat_dim(selected_feat, dim=248) clf = svm_classifier(reduced_feat, labels) pred = clf.predict(reduced_feat) acc = sklearn.metrics.accuracy_score(labels, pred) print('training acc is {}'.format(acc)) return clf, filters, means, final_feat_dim, idx, pca
def train_data(data, labels): data = data.reshape(-1, 3, 32, 32) filters, means, outputs = saak.multi_stage_saak_trans(data, energy_thresh=0.97) final_feat_dim = sum([ ((output.shape[1] - 1) / 2 + 1) * output.shape[2] * output.shape[3] for output in outputs ]) # This is the dimensionality of each datapoint. print('final feature dimension is {}'.format(final_feat_dim)) final_feat = saak.get_final_feature(outputs) assert final_feat.shape[1] == final_feat_dim # Remove some of the features with an f-test selected_feat, idx = f_test(final_feat, labels, thresh=0.75) reduced_feat, pca = reduce_feat_dim(selected_feat, dim=248) #clf = svm_classifier(reduced_feat, labels) clf = knn_classifier(reduced_feat, labels, 20) pred = clf.predict(reduced_feat) acc = sklearn.metrics.accuracy_score(labels, pred) print('training acc is {}'.format(acc)) return clf, filters, means, final_feat_dim, idx, pca
def create_test_dataset(): #NUM_IMAGES_TEST = 500 NUM_IMAGES_TEST = None test_data, test_labels = create_numpy_dataset(NUM_IMAGES_TEST, test_loader) test_outputs = saak.test_multi_stage_saak_trans( test_data, means, filters) test_final_feat = saak.get_final_feature(test_outputs) return test_final_feat, test_labels
def main(): batch_size = 1 test_batch_size = 1 kwargs = {} torch.multiprocessing.set_sharing_strategy('file_system') # # MNIST # mnist_train = datasets.MNIST(root='./data/mnist', train=True, # transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), # download=True) # mnist_test = datasets.MNIST(root='./data/mnist', train=False, # transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), download=True) # # FASHION-MNIST # mnist_train = MNIST(root='./data/fashion-mnist', train=True, # transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), download=True) # mnist_test = MNIST(root='./data/fashion-mnist', train=False, # transform=transforms.Compose([transforms.Pad(2), transforms.ToTensor()]), download=True) # # train_loader = data_utils.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, **kwargs) # # test_loader = data_utils.DataLoader(mnist_test, batch_size=test_batch_size, shuffle=False, **kwargs) # # SVHN # # Customized data loader # hdf5 file is generated using data/create_hdf5.py # using create_hdf5.py, simply run: python create_hdf5.py # train_path = '/media/eeb435/media/Junting/data/Project/Cityscapes/saak_patches/32x32/hdf5/train_CS_DS2.hdf5' # train_path = '/media/eeb435/media/Junting/data/saak_da/data/svhn_train_full_hwc.hdf5' # test_path = '/media/eeb435/media/Junting/data/saak_da/data/svhn_train_full_hwc.hdf5' # train_set = DatasetFromHdf5(train_path, transform=transforms.ToTensor()) #transform=transforms.ToTensor() # test_set = DatasetFromHdf5(test_path, transform=transforms.ToTensor()) # # train_loader = data_utils.DataLoader(dataset=train_set, num_workers=8, # batch_size=batch_size, shuffle=True, **kwargs) # # test_loader = data_utils.DataLoader(dataset=test_set, num_workers=8, # batch_size=batch_size, shuffle=False, **kwargs) # Built-in SVHN loader svhn_train = datasets.SVHN(root='./data/svhn', split='train', transform=transforms.ToTensor(), download=True) svhn_test = datasets.SVHN(root='./data/svhn', split='test', transform=transforms.ToTensor(), download=True) train_loader = data_utils.DataLoader(svhn_train, batch_size=batch_size, shuffle=True, **kwargs) test_loader = data_utils.DataLoader(svhn_test, batch_size=test_batch_size, shuffle=False, **kwargs) NUM_IMAGES = None num_images = NUM_IMAGES data, labels = create_numpy_dataset(num_images, train_loader) filters, means, outputs = saak.multi_stage_saak_trans(data, energy_thresh=0.97) final_feat_dim = sum([ ((output.shape[1] - 1) / 2 + 1) * output.shape[2] * output.shape[3] for output in outputs ]) print 'final feature dimension is {}'.format(final_feat_dim) final_feat = saak.get_final_feature(outputs) assert final_feat.shape[1] == final_feat_dim selected_feat, idx = f_test(final_feat, labels) reduced_feat, pca = reduce_feat_dim(selected_feat, dim=64) clf = svm_classifier(reduced_feat, labels) pred = clf.predict(reduced_feat) acc = sklearn.metrics.accuracy_score(labels, pred) print 'training acc is {}'.format(acc) print '\n-----------------start testing-------------\n' test_data, test_labels = create_numpy_dataset(None, test_loader) test_outputs = saak.test_multi_stage_saak_trans(test_data, means, filters) test_final_feat = saak.get_final_feature(test_outputs) assert test_final_feat.shape[1] == final_feat_dim test_selected_feat = test_final_feat[:, idx] test_reduced_feat = pca.transform(test_selected_feat) print 'testing reducued feat shape {}'.format(test_reduced_feat.shape) test_pred = clf.predict(test_reduced_feat) test_acc = sklearn.metrics.accuracy_score(test_labels, test_pred) print 'testing acc is {}'.format(test_acc)
def create_test_dataset(): test_data, test_labels = create_numpy_dataset(None, test_loader) test_outputs = saak.test_multi_stage_saak_trans( test_data, means, filters) test_final_feat = saak.get_final_feature(test_outputs) return test_final_feat, test_labels
shuffle=True, **kwargs) test_loader = data_utils.DataLoader(svhn_test, batch_size=test_batch_size, shuffle=False, **kwargs) K = 10 NUM_VIS = 20 NUM_IMAGES = None num_images = NUM_IMAGES data = saak.create_numpy_dataset(num_images, train_loader) filters, means, outputs = saak.multi_stage_saak_trans(data, energy_thresh=0.97) final_feat_dim = sum([ ((output.shape[1] - 1) / 2 + 1) * output.shape[2] * output.shape[3] for output in outputs ]) print('final feature dimension is {}'.format(final_feat_dim)) final_feat = saak.get_final_feature(outputs) assert final_feat.shape[1] == final_feat_dim print(final_feat.shape) k_mean_clustering(data=data, feature=final_feat, K=K, num_centroids_to_visualize=NUM_VIS)