def main(): image_dir = '../../vqa_images/' local_images = [ f for f in listdir(image_dir) if isfile(join(image_dir, f)) ] parser = argparse.ArgumentParser() parser.add_argument( '-model', type=str, default='../models/mlp_num_hidden_units_1024_num_hidden_layers_3.json') parser.add_argument( '-weights', type=str, default= '../models/mlp_num_hidden_units_1024_num_hidden_layers_3_epoch_70.hdf5' ) parser.add_argument('-sample_size', type=int, default=1) args = parser.parse_args() model = model_from_json(open(args.model).read()) model.load_weights(args.weights) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Model loaded and compiled' images_val = open('../data/preprocessed/images_val2014_all.txt', 'r').read().decode('utf8').splitlines() nlp = English() print 'Loaded word2vec features' labelencoder = joblib.load('../models/labelencoder.pkl') vgg_model_path = '../features/coco/vgg_feats.mat' features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'Loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) image_sample = random.sample(local_images, args.sample_size) for image in image_sample: img = Image.open(image_dir + image) img.show() q = unicode(raw_input("Ask a question about the image:")) coco_id = str(int(image[-16:-4])) timesteps = len(nlp(q)) X_q = get_questions_matrix_sum([q], nlp) X_i = get_images_matrix([coco_id], img_map, VGGfeatures) X_batch = np.hstack((X_q, X_i)) y_predict = model.predict_classes(X_batch, verbose=0) print labelencoder.inverse_transform(y_predict) #img.close() raw_input('Press enter to continue...')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-model', type=str, required=True) parser.add_argument('-weights', type=str, required=True) parser.add_argument('-results', type=str, required=True) args = parser.parse_args() model = model_from_json(open(args.model).read()) model.load_weights(args.weights) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') questions_val = open('../data/preprocessed/questions_val2014.txt', 'r').read().decode('utf8').splitlines() answers_val = open('../data/preprocessed/answers_val2014.txt', 'r').read().decode('utf8').splitlines() images_val = open('../data/preprocessed/images_val2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' print 'Model compiled, weights loaded...' labelencoder = joblib.load('../models/labelencoder.pkl') features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) nlp = English() print 'loaded word2vec features' nb_classes = 1000 y_predict_text = [] batchSize = 128 widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'), ' ', ETA()] pbar = ProgressBar(widgets=widgets) for qu_batch,an_batch,im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]), grouper(answers_val, batchSize, fillvalue=answers_val[0]), grouper(images_val, batchSize, fillvalue=images_val[0]))): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) if 'language_only' in args.model: X_batch = X_q_batch else: X_i_batch = get_images_matrix(im_batch, img_map , VGGfeatures) X_batch = np.hstack((X_q_batch, X_i_batch)) y_predict = model.predict_classes(X_batch, verbose=0) y_predict_text.extend(labelencoder.inverse_transform(y_predict)) correct_val=0 incorrect_val=0 f1 = open(args.results, 'w') for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val): temp_count=0 for _truth in truth.split(';'): if prediction == _truth: temp_count+=1 if temp_count>2: correct_val+=1 else: incorrect_val+=1 f1.write(question.encode('utf-8')) f1.write('\n') f1.write(image.encode('utf-8')) f1.write('\n') f1.write(prediction) f1.write('\n') f1.write(truth.encode('utf-8')) f1.write('\n') f1.write('\n') f1.write('Final Accuracy is ' + str(float(correct_val)/(incorrect_val+correct_val))) f1.close() f1 = open('../results/overall_results.txt', 'a') f1.write(args.weights + '\n') f1.write(str(float(correct_val)/(incorrect_val+correct_val)) + '\n') f1.close() print 'Final Accuracy on the validation set is', float(correct_val)/(incorrect_val+correct_val)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-model', type=str, required=True) parser.add_argument('-weights', type=str, required=True) parser.add_argument('-results', type=str, required=True) args = parser.parse_args() model = model_from_json(open(args.model).read()) model.load_weights(args.weights) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') questions_val = open('../data/preprocessed/questions_val2014.txt', 'r').read().decode('utf8').splitlines() answers_val = open('../data/preprocessed/answers_val2014.txt', 'r').read().decode('utf8').splitlines() images_val = open('../data/preprocessed/images_val2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' print 'Model compiled, weights loaded...' labelencoder = joblib.load('../models/labelencoder.pkl') features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco/coco_vgg_IDMap.txt').read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) nlp = English() print 'loaded word2vec features' nb_classes = 1000 y_predict_text = [] batchSize = 128 widgets = [ 'Evaluating ', Percentage(), ' ', Bar(marker='#', left='[', right=']'), ' ', ETA() ] pbar = ProgressBar(widgets=widgets) for qu_batch, an_batch, im_batch in pbar( zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]), grouper(answers_val, batchSize, fillvalue=answers_val[0]), grouper(images_val, batchSize, fillvalue=images_val[0]))): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) if 'language_only' in args.model: X_batch = X_q_batch else: X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) X_batch = np.hstack((X_q_batch, X_i_batch)) y_predict = model.predict_classes(X_batch, verbose=0) y_predict_text.extend(labelencoder.inverse_transform(y_predict)) correct_val = 0 incorrect_val = 0 f1 = open(args.results, 'w') for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val): temp_count = 0 for _truth in truth.split(';'): if prediction == _truth: temp_count += 1 if temp_count > 2: correct_val += 1 else: incorrect_val += 1 f1.write(question.encode('utf-8')) f1.write('\n') f1.write(image.encode('utf-8')) f1.write('\n') f1.write(prediction) f1.write('\n') f1.write(truth.encode('utf-8')) f1.write('\n') f1.write('\n') f1.write('Final Accuracy is ' + str(float(correct_val) / (incorrect_val + correct_val))) f1.close() f1 = open('../results/overall_results.txt', 'a') f1.write(args.weights + '\n') f1.write(str(float(correct_val) / (incorrect_val + correct_val)) + '\n') f1.close() print 'Final Accuracy on the validation set is', float(correct_val) / ( incorrect_val + correct_val)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=1024) parser.add_argument('-num_hidden_layers', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default=False) parser.add_argument('-num_epochs', type=int, default=10) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-batch_size', type=int, default=128) args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' maxAnswers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers( questions_train, answers_train, images_train, maxAnswers) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, '../models/labelencoder.pkl') features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() id_map = {} for ids in image_ids: id_split = ids.split() id_map[id_split[0]] = int(id_split[1]) nlp = English() print 'loaded word2vec features...' img_dim = 4096 word_vec_dim = 300 model = Sequential() if args.language_only: model.add( Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform')) else: model.add( Dense(args.num_hidden_units, input_dim=img_dim + word_vec_dim, init='uniform')) model.add(Activation(args.activation)) if args.dropout > 0: model.add(Dropout(args.dropout)) for i in xrange(args.num_hidden_layers - 1): model.add(Dense(args.num_hidden_units, init='uniform')) model.add(Activation(args.activation)) if args.dropout > 0: model.add(Dropout(args.dropout)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() if args.language_only: model_file_name = '../models/mlp_language_only_num_hidden_units_' + str( args.num_hidden_units) + '_num_hidden_layers_' + str( args.num_hidden_layers) else: model_file_name = '../models/mlp_num_hidden_units_' + str( args.num_hidden_units) + '_num_hidden_layers_' + str( args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) print 'Compiling model...' model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done...' print 'Training started...' for k in xrange(args.num_epochs): #shuffle the data points before going through them index_shuf = range(len(questions_train)) shuffle(index_shuf) questions_train = [questions_train[i] for i in index_shuf] answers_train = [answers_train[i] for i in index_shuf] images_train = [images_train[i] for i in index_shuf] progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) if args.language_only: X_batch = X_q_batch else: X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures) X_batch = np.hstack((X_q_batch, X_i_batch)) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_batch, Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) #print type(loss) if k % args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=1024) parser.add_argument('-num_hidden_layers', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default= False) parser.add_argument('-num_epochs', type=int, default=100) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-batch_size', type=int, default=128) parser.add_argument('-word_vector', type=str, default='') args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() vgg_model_path = '../features/coco/vgg_feats.mat' maxAnswers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder,'../models/labelencoder.pkl') features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct['feats'] print 'loaded vgg features' image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines() id_map = {} for ids in image_ids: id_split = ids.split() id_map[id_split[0]] = int(id_split[1]) # Code to choose the word vectors, default is Goldberg but GLOVE is preferred if args.word_vector == 'glove': nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') else: nlp = English() print 'loaded ' + args.word_vector + ' word2vec features...' img_dim = 4096 word_vec_dim = 300 model = Sequential() if args.language_only: model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform')) else: model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform')) model.add(Activation(args.activation)) if args.dropout>0: model.add(Dropout(args.dropout)) for i in xrange(args.num_hidden_layers-1): model.add(Dense(args.num_hidden_units, init='uniform')) model.add(Activation(args.activation)) if args.dropout>0: model.add(Dropout(args.dropout)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() if args.language_only: model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) else: model_file_name = '../models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) print 'Compiling model...' model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done...' print 'Training started...' for k in xrange(args.num_epochs): #shuffle the data points before going through them index_shuf = range(len(questions_train)) shuffle(index_shuf) questions_train = [questions_train[i] for i in index_shuf] answers_train = [answers_train[i] for i in index_shuf] images_train = [images_train[i] for i in index_shuf] progbar = generic_utils.Progbar(len(questions_train)) for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) if args.language_only: X_batch = X_q_batch else: X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures) X_batch = np.hstack((X_q_batch, X_i_batch)) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_batch, Y_batch) # fix for the Keras v0.3 issue #9 progbar.add(args.batch_size, values=[("train loss", loss[0])]) #print type(loss) if k%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
def main(): cwd = os.getcwd() parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=1024) parser.add_argument('-num_hidden_layers', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default= False) parser.add_argument('-num_epochs', type=int, default=2) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-model_weights_path', type=str, default=cwd+'/vgg/vgg16_weights.h5') parser.add_argument('-batch_size', type=int, default=128) parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2015.txt') parser.add_argument('-answers_train',type=str, default = cwd+'/data/preprocessed/answers_train2015_modal.txt') parser.add_argument('-im_dir',type=str, default =cwd+'/data/preprocessed/scene_img_abstract_v002_train2015/') #parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2014.txt') args = parser.parse_args() questions_train = open(args.questions_train, 'r').read().decode('utf8').splitlines() answers_train = open(args.answers_train, 'r').read().decode('utf8').splitlines() images_train = open(cwd+'/data/preprocessed/images_train2015.txt', 'r').read().decode('utf8').splitlines() #vgg_model_path = cwd+'/features/coco/vgg_feats.mat' #this needs to change maxAnswers = 100 questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder,cwd+'/models/labelencoder.pkl') #features_struct = scipy.io.loadmat(vgg_model_path) #VGGfeatures = features_struct['feats'] # print 'loaded vgg features' # image_ids = open(cwd+'/features/coco_vgg_IDMap.txt').read().splitlines() # id_map = {} # for ids in image_ids: # id_split = ids.split() # id_map[id_split[0]] = int(id_split[1]) vgg_model = vgg16.VGG_16(args.model_weights_path) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') print 'loaded vgg model...' nlp = English() print 'loaded word2vec features...' img_dim = 4096 word_vec_dim = 300 model = Sequential() if args.language_only: model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform')) else: model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform')) model.add(Activation(args.activation)) if args.dropout>0: model.add(Dropout(args.dropout)) for i in xrange(args.num_hidden_layers-1): model.add(Dense(args.num_hidden_units, init='uniform')) model.add(Activation(args.activation)) if args.dropout>0: model.add(Dropout(args.dropout)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = cwd+'/models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) print 'Training started...' id_map = {} f1 = open('abstract_image_precompute') f2 = open('abstract_image_precompute_reverse') VGGfeatures = np.loadtxt(f1) VGGfeatures_reverse = np.loadtxt(f2) f1.close() f2.close() for k in xrange(args.num_epochs): #shuffle the data points before going through them index_shuf = range(len(questions_train)) shuffle(index_shuf) questions_train = [questions_train[i] for i in index_shuf] answers_train = [answers_train[i] for i in index_shuf] images_train = [images_train[i] for i in index_shuf] progbar = generic_utils.Progbar(len(questions_train)) for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) im_path = args.im_dir +"abstract_v002_train2015_" print 'getting image features...' X_i_batch = get_images_matrix(im_batch, VGGfeatures, VGGfeatures_reverse) # X_i_batch = get_images_matrix_from_model(vgg_model, im_batch, im_path, id_map) X_batch = np.hstack((X_q_batch, X_i_batch)) Y_batch = get_answers_matrix(an_batch, labelencoder) print 'running training on batch...' loss = model.train_on_batch(X_batch, Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) if k%args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-model', type=str, required=True) parser.add_argument('-results', type=str, required=True) args = parser.parse_args() model = load_model(args.model) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') questions_val = open('./data/questions_val2014.txt', 'r').read().decode('utf8').splitlines() answers_val = open('./data/answers_val2014_all.txt', 'r').read().decode('utf8').splitlines() images_val = open('./data/images_val2014_all.txt', 'r').read().decode('utf8').splitlines() print 'Model compiled, weights loaded...' labelencoder = joblib.load('./labelencoder.pkl') image_ids = open("./id_map.txt").read().splitlines() id_map = {} for ids in image_ids: id_split = ids.split() id_map[int(id_split[0])] = int(id_split[1]) - 1 sherlock_features = np.load('./sherlock_features_temp.npy') nlp = English() print 'loaded word2vec features' nb_classes = 1000 y_predict_text = [] batchSize = 128 widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#', left='[', right=']'), ' ', ETA()] pbar = ProgressBar(widgets=widgets) for qu_batch, an_batch, im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]), grouper(answers_val, batchSize, fillvalue=answers_val[0]), grouper(images_val, batchSize, fillvalue=images_val[0]))): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) X_i_batch = get_images_matrix(im_batch, id_map, sherlock_features) X_batch = np.hstack((X_q_batch, X_i_batch)) y_predict = model.predict_classes(X_batch, verbose=0) y_predict_text.extend(labelencoder.inverse_transform(y_predict)) correct_val = 0.0 total = 0 f1 = open(args.results, 'w') for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val): temp_count = 0 for _truth in truth.split(';'): if prediction == _truth: temp_count += 1 if temp_count > 2: correct_val += 1 else: correct_val += float(temp_count) / 3 total += 1 f1.write(question.encode('utf-8')) f1.write('\n') f1.write(image.encode('utf-8')) f1.write('\n') f1.write(prediction) f1.write('\n') f1.write(truth.encode('utf-8')) f1.write('\n') f1.write('\n') f1.write('Final Accuracy is ' + str(correct_val / total)) f1.close() f1 = open('../results/overall_results.txt', 'a') f1.write(args.weights + '\n') f1.write(str(correct_val / total) + '\n') f1.close() print 'Final Accuracy on the validation set is', correct_val / total
def main(): parser = argparse.ArgumentParser() parser.add_argument("-model", type=str, required=True) parser.add_argument("-weights", type=str, required=True) parser.add_argument("-results", type=str, required=True) args = parser.parse_args() model = model_from_json(open(args.model).read()) model.load_weights(args.weights) model.compile(loss="categorical_crossentropy", optimizer="rmsprop") questions_val = open("../data/preprocessed/questions_val2014.txt", "r").read().decode("utf8").splitlines() answers_val = open("../data/preprocessed/answers_val2014.txt", "r").read().decode("utf8").splitlines() images_val = open("../data/preprocessed/images_val2014.txt", "r").read().decode("utf8").splitlines() vgg_model_path = "../features/coco/vgg_feats.mat" print "Model compiled, weights loaded..." labelencoder = joblib.load("../models/labelencoder.pkl") features_struct = scipy.io.loadmat(vgg_model_path) VGGfeatures = features_struct["feats"] print "loaded vgg features" image_ids = open("../features/coco/coco_vgg_IDMap.txt").read().splitlines() img_map = {} for ids in image_ids: id_split = ids.split() img_map[id_split[0]] = int(id_split[1]) nlp = English() print "loaded word2vec features" nb_classes = 1000 y_predict_text = [] batchSize = 128 widgets = ["Evaluating ", Percentage(), " ", Bar(marker="#", left="[", right="]"), " ", ETA()] pbar = ProgressBar(widgets=widgets) for qu_batch, an_batch, im_batch in pbar( zip( grouper(questions_val, batchSize, fillvalue=questions_val[0]), grouper(answers_val, batchSize, fillvalue=answers_val[0]), grouper(images_val, batchSize, fillvalue=images_val[0]), ) ): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) if "language_only" in args.model: X_batch = X_q_batch else: X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures) X_batch = np.hstack((X_q_batch, X_i_batch)) y_predict = model.predict_classes(X_batch, verbose=0) y_predict_text.extend(labelencoder.inverse_transform(y_predict)) correct_val = 0 incorrect_val = 0 f1 = open(args.results, "w") for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val): temp_count = 0 for _truth in truth.split(";"): if prediction == _truth: temp_count += 1 if temp_count > 2: correct_val += 1 else: incorrect_val += 1 f1.write(question.encode("utf-8")) f1.write("\n") f1.write(image.encode("utf-8")) f1.write("\n") f1.write(prediction) f1.write("\n") f1.write(truth.encode("utf-8")) f1.write("\n") f1.write("\n") f1.write("Final Accuracy is " + str(float(correct_val) / (incorrect_val + correct_val))) f1.close() f1 = open("../results/overall_results.txt", "a") f1.write(args.weights + "\n") f1.write(str(float(correct_val) / (incorrect_val + correct_val)) + "\n") f1.close() print "Final Accuracy on the validation set is", float(correct_val) / (incorrect_val + correct_val)
training_loss = [] for i in xrange(args.num_epochs): progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): # logging.debug("One batch done") x_q_batch = get_questions_matrix_sum(qu_batch, nlp) # logging.debug("length of qu_batch is %d", len(qu_batch)) # logging.debug("Shape of x_q_batch is: %s", x_q_batch.shape) x_i_batch = get_images_matrix(im_batch, id_map, sherlock_features) # logging.debug("shape of x_i_batch is %s", x_i_batch.shape) x_batch = np.hstack((x_q_batch, x_i_batch)) y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(x_batch, y_batch) training_loss.append(loss) progbar.add(args.batch_size, values=[("train_loss", loss)]) # print "\n" # if __name__ == '__main__': # main() plt.plot(training_loss) plt.title("Training loss for the network") plt.savefig('loss.png')
def process_input(self, question): return get_questions_matrix_sum(question, self._nlp)