def get_image_feature(file_path,dictionary,layer_num,K): ''' Extracts the spatial pyramid matching feature. [input] * file_path: path of image file to read * dictionary: numpy.ndarray of shape (K,3F) * layer_num: number of spatial pyramid layers * K: number of clusters for the word maps [output] * feature: numpy.ndarray of shape (K) ''' # ----- TODO ----- #load image image = skimage.io.imread(file_path) image = image.astype('float')/255 # extract wordmap from image wordmap = visual_words.get_visual_words(image,dictionary) #compute SPM feature = get_feature_from_wordmap_SPM(wordmap,layer_num,K) #return computed feature return feature
def predict_image(file_path, dictionary, layer_num, K, features, train_labels): ''' Predicts the label using the trained system and extracted VGG-16 features This is a function run by a subprocess. [input] * file_path: path of image file * dictionary: numpy.ndarray of shape (K, 3F) * layer_num: number of spatial pyramid layers * K: number of clusters for the word maps * features: trained features using SPM * train_labels: trained set of labels [output] * predicted_label: int representing the predicted label ''' global PROGRESS with PROGRESS_LOCK: PROGRESS += NPROC print('Processing: %03d/160 | Image: %s' % (PROGRESS, file_path)) # Read image image = imageio.imread('../data/' + file_path) image = image.astype('float') / 255 # Create visual wordmap for the image wordmap = visual_words.get_visual_words(image, dictionary) # Compute features using SPM word_hist = get_feature_from_wordmap_SPM(wordmap, layer_num, K) # Find the predicted label using histogram intersection similarity predicted_label = train_labels[np.argmax( distance_to_set(word_hist, features))] return predicted_label
def get_image_feature(file_path, dictionary, layer_num, K): ''' Extracts the spatial pyramid matching feature. [input] * file_path: path of image file to read * dictionary: numpy.ndarray of shape (K, 3F) * layer_num: number of spatial pyramid layers * K: number of clusters for the word maps [output] * feature: numpy.ndarray of shape (K*(4^layer_num-1)/3) ''' image = skimage.io.imread(file_path) image = image.astype('float') / 255 wordmap = visual_words.get_visual_words(image, dictionary) histo = get_feature_from_wordmap_SPM(wordmap, layer_num, K) return histo
def evaluate_recognition_system(opts, n_worker=8): ''' Evaluates the recognition system for all test images and returns the confusion matrix. [input] * opts : options * n_worker : number of workers to process in parallel [output] * conf: numpy.ndarray of shape (8,8) * accuracy: accuracy of the evaluated system ''' data_dir = opts.data_dir out_dir = opts.out_dir wrng = [] trained_system = np.load(join(out_dir, 'trained_system.npz')) dictionary = trained_system['dictionary'] dict_size = len(dictionary) # using the stored options in the trained system instead of opts.py test_opts = copy(opts) test_opts.K = dictionary.shape[0] test_opts.L = trained_system['SPM_layer_num'] test_files = open(join(data_dir, 'test_files.txt')).read().splitlines() test_labels = np.loadtxt(join(data_dir, 'test_labels.txt'), np.int32) test_file_num = len(test_files) # ----- TODO ----- conf_mat = np.zeros((8, 8), dtype=int) test_pred_labl = np.zeros(test_file_num, dtype=int) train_feats = trained_system['features'] train_labels = trained_system['labels'] T_img = len(test_files) #size of training data--> # of images, T test_img_list = np.arange(T_img) prcs = Pool(n_worker) args = list(zip(test_img_list, test_files, test_labels)) feature_test = prcs.map(get_image_feature, args) #create subprocesses to call ome_image for ind_f in range(test_file_num): img = Image.open("../data/" + (test_files[ind_f])) # Load test images img = np.array(img).astype( np.float32) / 255 # convert to 0-1 range values wordmap_test = visual_words.get_visual_words(opts, img, dictionary) # feature_test = get_feature_from_wordmap_SPM(opts,wordmap_test) sim_dist = distance_to_set(feature_test[ind_f], train_feats) labl_pred = train_labels[np.argmax(sim_dist)] #predicted label test_pred_labl[ind_f] = labl_pred labl_true = test_labels[ind_f] conf_mat[labl_true, labl_pred] += 1 if labl_true != labl_pred: wrng.append(test_files[ind_f]) per_acc = np.trace(conf_mat) / np.sum(conf_mat) print('in evaluation') print('L is', opts.L) print('K is', opts.K) print('alpha is', opts.alpha) return conf_mat, per_acc
import skimage if __name__ == '__main__': num_cores = util.get_num_CPU() path_img = "../data/kitchen/sun_aasmevtpkslccptd.jpg" image = skimage.io.imread(path_img) image = image.astype('float') / 255 filter_responses = visual_words.extract_filter_responses(image) util.display_filter_responses(filter_responses) visual_words.compute_dictionary(num_workers=num_cores) dictionary = np.load('dictionary.npy') img = visual_words.get_visual_words(image, dictionary) #util.save_wordmap(wordmap, filename) visual_recog.build_recognition_system(num_workers=num_cores) conf, accuracy = visual_recog.evaluate_recognition_system( num_workers=num_cores) print(conf) print(np.diag(conf).sum() / conf.sum()) vgg16 = torchvision.models.vgg16(pretrained=True).double() vgg16.eval() deep_recog.build_recognition_system(vgg16, num_workers=num_cores // 2) conf = deep_recog.evaluate_recognition_system(vgg16, num_workers=num_cores // 2) print(conf) print(np.diag(conf).sum() / conf.sum())
def main(): opts = get_opts() print('L is', opts.L) print('K is', opts.K) print('alpha is', opts.alpha) print() # Q1.1 img_path = join(opts.data_dir, 'kitchen/sun_aasmevtpkslccptd.jpg') img = Image.open(img_path) img = np.array(img).astype(np.float32) / 255 filter_responses = visual_words.extract_filter_responses(opts, img) # imageio.imsave('../results/filter_responses.jpg',filter_responses) util.visualize_wordmap(img) util.display_filter_responses(opts, filter_responses) ## # Q1.2 n_cpu = util.get_num_CPU() visual_words.compute_dictionary(opts, n_worker=n_cpu) dictionary = np.load(join(opts.out_dir, 'dictionary.npy')) ### # ## Q1.3 img_path = join(opts.data_dir, 'kitchen/sun_aasmevtpkslccptd.jpg') img = Image.open(img_path) img = np.array(img).astype(np.float32) / 255 wordmap = visual_words.get_visual_words(opts, img, dictionary) util.visualize_wordmap(img) util.visualize_wordmap(wordmap) # img_path = join(opts.data_dir, 'waterfall/sun_bbeqjdnienanmmif.jpg') img = Image.open(img_path) img = np.array(img).astype(np.float32) / 255 wordmap = visual_words.get_visual_words(opts, img, dictionary) util.visualize_wordmap(img) util.visualize_wordmap(wordmap) # img_path = join(opts.data_dir, 'windmill/sun_bratfupeyvlazpba.jpg') img = Image.open(img_path) img = np.array(img).astype(np.float32) / 255 wordmap = visual_words.get_visual_words(opts, img, dictionary) util.visualize_wordmap(img) util.visualize_wordmap(wordmap) img_path = join(opts.data_dir, 'desert/sun_adjlepvuitklskrz.jpg') img = Image.open(img_path) img = np.array(img).astype(np.float32) / 255 wordmap = visual_words.get_visual_words(opts, img, dictionary) util.visualize_wordmap(img) util.visualize_wordmap(wordmap) # # Q2.1-2.4 n_cpu = util.get_num_CPU() visual_recog.build_recognition_system(opts, n_worker=n_cpu) ## Q2.5 n_cpu = util.get_num_CPU() conf, accuracy = visual_recog.evaluate_recognition_system(opts, n_worker=n_cpu) print(conf) print(accuracy) np.savetxt(join(opts.out_dir, 'confmat.csv'), conf, fmt='%d', delimiter=',') np.savetxt(join(opts.out_dir, 'accuracy.txt'), [accuracy], fmt='%g')
# #Q1.1 filter_responses = visual_words.extract_filter_responses(image) util.display_filter_responses(filter_responses) # Q1.2 visual_words.compute_dictionary(num_workers=num_cores) # Q1.3 visualize word map dictionary = np.load('../code/dictionary.npy', allow_pickle=True) train_data = np.load('../data/train_data.npz', allow_pickle=True) train_name = train_data['image_names'] train_data = np.random.permutation( train_name)[:3] # load 3 random images for visulize wordmap for image in train_data: image = imageio.imread('../data/' + image[0]) wordmap = visual_words.get_visual_words(image, dictionary) f, axes = plt.subplots(1, 2) f.set_size_inches(8, 8) axes[0].imshow(image) axes[0].set_title('original') axes[1].imshow(wordmap) axes[1].set_title('wordmap') plt.show() # Q2.1 plot feature of one word map dictionary = np.load('../code/dictionary.npy', allow_pickle=True) wordmap = visual_words.get_visual_words(image, dictionary) dict_size = len(dictionary) hist_all = visual_recog.get_feature_from_wordmap(wordmap, dict_size) plt.hist(hist_all, dict_size) plt.show()
def build_recognition_system_helper_func(img_path): image = imageio.imread("../data/" + img_path) wordmap = visual_words.get_visual_words(image, dictionary) hist = get_feature_from_wordmap_SPM(wordmap, layers, 200) return hist