示例#1
0
def main():

    image_dir = '../../vqa_images/'
    local_images = [
        f for f in listdir(image_dir) if isfile(join(image_dir, f))
    ]

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-model',
        type=str,
        default='../models/mlp_num_hidden_units_1024_num_hidden_layers_3.json')
    parser.add_argument(
        '-weights',
        type=str,
        default=
        '../models/mlp_num_hidden_units_1024_num_hidden_layers_3_epoch_70.hdf5'
    )
    parser.add_argument('-sample_size', type=int, default=1)
    args = parser.parse_args()

    model = model_from_json(open(args.model).read())
    model.load_weights(args.weights)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Model loaded and compiled'
    images_val = open('../data/preprocessed/images_val2014_all.txt',
                      'r').read().decode('utf8').splitlines()

    nlp = English()
    print 'Loaded word2vec features'
    labelencoder = joblib.load('../models/labelencoder.pkl')

    vgg_model_path = '../features/coco/vgg_feats.mat'
    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'Loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    image_sample = random.sample(local_images, args.sample_size)

    for image in image_sample:

        img = Image.open(image_dir + image)
        img.show()
        q = unicode(raw_input("Ask a question about the image:"))
        coco_id = str(int(image[-16:-4]))
        timesteps = len(nlp(q))
        X_q = get_questions_matrix_sum([q], nlp)
        X_i = get_images_matrix([coco_id], img_map, VGGfeatures)
        X_batch = np.hstack((X_q, X_i))
        y_predict = model.predict_classes(X_batch, verbose=0)
        print labelencoder.inverse_transform(y_predict)
        #img.close()
        raw_input('Press enter to continue...')
示例#2
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-model', type=str, required=True)
	parser.add_argument('-weights', type=str, required=True)
	parser.add_argument('-results', type=str, required=True)
	args = parser.parse_args()

	model = model_from_json(open(args.model).read())
	model.load_weights(args.weights)
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

	questions_val = open('../data/preprocessed/questions_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	answers_val = open('../data/preprocessed/answers_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	images_val = open('../data/preprocessed/images_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'
	
	print 'Model compiled, weights loaded...'
	labelencoder = joblib.load('../models/labelencoder.pkl')

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	img_map = {}
	for ids in image_ids:
		id_split = ids.split()
		img_map[id_split[0]] = int(id_split[1])

	nlp = English()
	print 'loaded word2vec features'

	nb_classes = 1000
	y_predict_text = []
	batchSize = 128
	widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'),
           ' ', ETA()]
	pbar = ProgressBar(widgets=widgets)

	for qu_batch,an_batch,im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]), 
												grouper(answers_val, batchSize, fillvalue=answers_val[0]), 
												grouper(images_val, batchSize, fillvalue=images_val[0]))):
		X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
		if 'language_only' in args.model:
			X_batch = X_q_batch
		else:
			X_i_batch = get_images_matrix(im_batch, img_map , VGGfeatures)
			X_batch = np.hstack((X_q_batch, X_i_batch))
		y_predict = model.predict_classes(X_batch, verbose=0)
		y_predict_text.extend(labelencoder.inverse_transform(y_predict))

	correct_val=0
	incorrect_val=0	
	f1 = open(args.results, 'w')

	for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val):
		temp_count=0
		for _truth in truth.split(';'):
			if prediction == _truth:
				temp_count+=1

		if temp_count>2:
			correct_val+=1
		else:
			incorrect_val+=1

		f1.write(question.encode('utf-8'))
		f1.write('\n')
		f1.write(image.encode('utf-8'))
		f1.write('\n')
		f1.write(prediction)
		f1.write('\n')
		f1.write(truth.encode('utf-8'))
		f1.write('\n')
		f1.write('\n')

	f1.write('Final Accuracy is ' + str(float(correct_val)/(incorrect_val+correct_val)))
	f1.close()
	f1 = open('../results/overall_results.txt', 'a')
	f1.write(args.weights + '\n')
	f1.write(str(float(correct_val)/(incorrect_val+correct_val)) + '\n')
	f1.close()
	print 'Final Accuracy on the validation set is', float(correct_val)/(incorrect_val+correct_val)
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-model', type=str, required=True)
    parser.add_argument('-weights', type=str, required=True)
    parser.add_argument('-results', type=str, required=True)
    args = parser.parse_args()

    model = model_from_json(open(args.model).read())
    model.load_weights(args.weights)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    questions_val = open('../data/preprocessed/questions_val2014.txt',
                         'r').read().decode('utf8').splitlines()
    answers_val = open('../data/preprocessed/answers_val2014.txt',
                       'r').read().decode('utf8').splitlines()
    images_val = open('../data/preprocessed/images_val2014.txt',
                      'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'

    print 'Model compiled, weights loaded...'
    labelencoder = joblib.load('../models/labelencoder.pkl')

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'loaded vgg features'
    image_ids = open('../features/coco/coco_vgg_IDMap.txt').read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print 'loaded word2vec features'

    nb_classes = 1000
    y_predict_text = []
    batchSize = 128
    widgets = [
        'Evaluating ',
        Percentage(), ' ',
        Bar(marker='#', left='[', right=']'), ' ',
        ETA()
    ]
    pbar = ProgressBar(widgets=widgets)

    for qu_batch, an_batch, im_batch in pbar(
            zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]),
                grouper(answers_val, batchSize, fillvalue=answers_val[0]),
                grouper(images_val, batchSize, fillvalue=images_val[0]))):
        X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        if 'language_only' in args.model:
            X_batch = X_q_batch
        else:
            X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
            X_batch = np.hstack((X_q_batch, X_i_batch))
        y_predict = model.predict_classes(X_batch, verbose=0)
        y_predict_text.extend(labelencoder.inverse_transform(y_predict))

    correct_val = 0
    incorrect_val = 0
    f1 = open(args.results, 'w')

    for prediction, truth, question, image in zip(y_predict_text, answers_val,
                                                  questions_val, images_val):
        temp_count = 0
        for _truth in truth.split(';'):
            if prediction == _truth:
                temp_count += 1

        if temp_count > 2:
            correct_val += 1
        else:
            incorrect_val += 1

        f1.write(question.encode('utf-8'))
        f1.write('\n')
        f1.write(image.encode('utf-8'))
        f1.write('\n')
        f1.write(prediction)
        f1.write('\n')
        f1.write(truth.encode('utf-8'))
        f1.write('\n')
        f1.write('\n')

    f1.write('Final Accuracy is ' +
             str(float(correct_val) / (incorrect_val + correct_val)))
    f1.close()
    f1 = open('../results/overall_results.txt', 'a')
    f1.write(args.weights + '\n')
    f1.write(str(float(correct_val) / (incorrect_val + correct_val)) + '\n')
    f1.close()
    print 'Final Accuracy on the validation set is', float(correct_val) / (
        incorrect_val + correct_val)
示例#4
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units', type=int, default=1024)
    parser.add_argument('-num_hidden_layers', type=int, default=3)
    parser.add_argument('-dropout', type=float, default=0.5)
    parser.add_argument('-activation', type=str, default='tanh')
    parser.add_argument('-language_only', type=bool, default=False)
    parser.add_argument('-num_epochs', type=int, default=10)
    parser.add_argument('-model_save_interval', type=int, default=10)
    parser.add_argument('-batch_size', type=int, default=128)
    args = parser.parse_args()

    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014_modal.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'
    maxAnswers = 1000
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, maxAnswers)

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    id_map = {}
    for ids in image_ids:
        id_split = ids.split()
        id_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print 'loaded word2vec features...'
    img_dim = 4096
    word_vec_dim = 300

    model = Sequential()
    if args.language_only:
        model.add(
            Dense(args.num_hidden_units,
                  input_dim=word_vec_dim,
                  init='uniform'))
    else:
        model.add(
            Dense(args.num_hidden_units,
                  input_dim=img_dim + word_vec_dim,
                  init='uniform'))
    model.add(Activation(args.activation))
    if args.dropout > 0:
        model.add(Dropout(args.dropout))
    for i in xrange(args.num_hidden_layers - 1):
        model.add(Dense(args.num_hidden_units, init='uniform'))
        model.add(Activation(args.activation))
        if args.dropout > 0:
            model.add(Dropout(args.dropout))
    model.add(Dense(nb_classes, init='uniform'))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    if args.language_only:
        model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(
            args.num_hidden_units) + '_num_hidden_layers_' + str(
                args.num_hidden_layers)
    else:
        model_file_name = '../models/mlp_num_hidden_units_' + str(
            args.num_hidden_units) + '_num_hidden_layers_' + str(
                args.num_hidden_layers)
    open(model_file_name + '.json', 'w').write(json_string)

    print 'Compiling model...'
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done...'

    print 'Training started...'
    for k in xrange(args.num_epochs):
        #shuffle the data points before going through them
        index_shuf = range(len(questions_train))
        shuffle(index_shuf)
        questions_train = [questions_train[i] for i in index_shuf]
        answers_train = [answers_train[i] for i in index_shuf]
        images_train = [images_train[i] for i in index_shuf]
        progbar = generic_utils.Progbar(len(questions_train))
        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        args.batch_size,
                        fillvalue=questions_train[-1]),
                grouper(answers_train,
                        args.batch_size,
                        fillvalue=answers_train[-1]),
                grouper(images_train,
                        args.batch_size,
                        fillvalue=images_train[-1])):
            X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
            if args.language_only:
                X_batch = X_q_batch
            else:
                X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
                X_batch = np.hstack((X_q_batch, X_i_batch))
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch(X_batch, Y_batch)
            progbar.add(args.batch_size, values=[("train loss", loss)])
        #print type(loss)
        if k % args.model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:02d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
示例#5
0
def main():

	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=1024)
	parser.add_argument('-num_hidden_layers', type=int, default=3)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-language_only', type=bool, default= False)
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=10)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	args = parser.parse_args()

	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'
	maxAnswers = 1000
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	id_map = {}
	for ids in image_ids:
		id_split = ids.split()
		id_map[id_split[0]] = int(id_split[1])

        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'
	img_dim = 4096
	word_vec_dim = 300

	model = Sequential()
	if args.language_only:
		model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
	else:
		model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
	model.add(Activation(args.activation))
	if args.dropout>0:
		model.add(Dropout(args.dropout))
	for i in xrange(args.num_hidden_layers-1):
		model.add(Dense(args.num_hidden_units, init='uniform'))
		model.add(Activation(args.activation))
		if args.dropout>0:
			model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	if args.language_only:
		model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
	else:
		model_file_name = '../models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)		
	open(model_file_name  + '.json', 'w').write(json_string)

	print 'Compiling model...'
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done...'
	
	print 'Training started...'
	for k in xrange(args.num_epochs):
		#shuffle the data points before going through them
		index_shuf = range(len(questions_train))
		shuffle(index_shuf)
		questions_train = [questions_train[i] for i in index_shuf]
		answers_train = [answers_train[i] for i in index_shuf]
		images_train = [images_train[i] for i in index_shuf]
		progbar = generic_utils.Progbar(len(questions_train))
		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
											grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
											grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
			if args.language_only:
				X_batch = X_q_batch
			else:
				X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
				X_batch = np.hstack((X_q_batch, X_i_batch))
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch(X_batch, Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])
		#print type(loss)
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
示例#6
0
文件: demoVQA.py 项目: hege0110/VQA
def main():
	cwd = os.getcwd()

	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=1024)
	parser.add_argument('-num_hidden_layers', type=int, default=3)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-language_only', type=bool, default= False)
	parser.add_argument('-num_epochs', type=int, default=2)
	parser.add_argument('-model_save_interval', type=int, default=10)
	parser.add_argument('-model_weights_path', type=str, default=cwd+'/vgg/vgg16_weights.h5')
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2015.txt')
	parser.add_argument('-answers_train',type=str, default = cwd+'/data/preprocessed/answers_train2015_modal.txt')
	parser.add_argument('-im_dir',type=str, default =cwd+'/data/preprocessed/scene_img_abstract_v002_train2015/')
	#parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2014.txt')
	args = parser.parse_args()

	questions_train = open(args.questions_train, 'r').read().decode('utf8').splitlines()
	answers_train = open(args.answers_train, 'r').read().decode('utf8').splitlines()
	images_train = open(cwd+'/data/preprocessed/images_train2015.txt', 'r').read().decode('utf8').splitlines()
	#vgg_model_path = cwd+'/features/coco/vgg_feats.mat' #this needs to change
	maxAnswers = 100
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,cwd+'/models/labelencoder.pkl')

	#features_struct = scipy.io.loadmat(vgg_model_path)
	#VGGfeatures = features_struct['feats']
	# print 'loaded vgg features'
	# image_ids = open(cwd+'/features/coco_vgg_IDMap.txt').read().splitlines()
	# id_map = {}
	# for ids in image_ids:
	# 	id_split = ids.split()
	# 	id_map[id_split[0]] = int(id_split[1])

	vgg_model = vgg16.VGG_16(args.model_weights_path)
	sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
	print 'loaded vgg model...'

	nlp = English()
	print 'loaded word2vec features...'

	img_dim = 4096
	word_vec_dim = 300

	model = Sequential()
	if args.language_only:
		model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
	else:
		model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
	model.add(Activation(args.activation))
	if args.dropout>0:
		model.add(Dropout(args.dropout))
	for i in xrange(args.num_hidden_layers-1):
		model.add(Dense(args.num_hidden_units, init='uniform'))
		model.add(Activation(args.activation))
		if args.dropout>0:
			model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = cwd+'/models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)		
	open(model_file_name  + '.json', 'w').write(json_string)
	
	print 'Training started...'
	id_map = {}
	f1 = open('abstract_image_precompute')
	f2 = open('abstract_image_precompute_reverse')
	VGGfeatures = np.loadtxt(f1)
	VGGfeatures_reverse = np.loadtxt(f2)
	f1.close()
	f2.close()
	for k in xrange(args.num_epochs):
		#shuffle the data points before going through them
		index_shuf = range(len(questions_train))
		shuffle(index_shuf)
		questions_train = [questions_train[i] for i in index_shuf]
		answers_train = [answers_train[i] for i in index_shuf]
		images_train = [images_train[i] for i in index_shuf]
		progbar = generic_utils.Progbar(len(questions_train))
		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
											grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
											grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			
			X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
			im_path = args.im_dir +"abstract_v002_train2015_"
			print 'getting image features...'
			X_i_batch = get_images_matrix(im_batch, VGGfeatures, VGGfeatures_reverse)
			# X_i_batch = get_images_matrix_from_model(vgg_model, im_batch, im_path, id_map)
			X_batch = np.hstack((X_q_batch, X_i_batch))

			Y_batch = get_answers_matrix(an_batch, labelencoder)
			print 'running training on batch...'
			
			loss = model.train_on_batch(X_batch, Y_batch)
			
			progbar.add(args.batch_size, values=[("train loss", loss)])

		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
示例#7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-model', type=str, required=True)
    parser.add_argument('-results', type=str, required=True)
    args = parser.parse_args()

    model = load_model(args.model)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    questions_val = open('./data/questions_val2014.txt',
                         'r').read().decode('utf8').splitlines()
    answers_val = open('./data/answers_val2014_all.txt',
                       'r').read().decode('utf8').splitlines()
    images_val = open('./data/images_val2014_all.txt',
                      'r').read().decode('utf8').splitlines()

    print 'Model compiled, weights loaded...'
    labelencoder = joblib.load('./labelencoder.pkl')

    image_ids = open("./id_map.txt").read().splitlines()
    id_map = {}
    for ids in image_ids:
        id_split = ids.split()
        id_map[int(id_split[0])] = int(id_split[1]) - 1

    sherlock_features = np.load('./sherlock_features_temp.npy')

    nlp = English()
    print 'loaded word2vec features'

    nb_classes = 1000
    y_predict_text = []
    batchSize = 128
    widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#', left='[', right=']'),
               ' ', ETA()]
    pbar = ProgressBar(widgets=widgets)

    for qu_batch, an_batch, im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]),
                                                 grouper(answers_val, batchSize, fillvalue=answers_val[0]),
                                                 grouper(images_val, batchSize, fillvalue=images_val[0]))):
        X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        X_i_batch = get_images_matrix(im_batch, id_map, sherlock_features)
        X_batch = np.hstack((X_q_batch, X_i_batch))
        y_predict = model.predict_classes(X_batch, verbose=0)
        y_predict_text.extend(labelencoder.inverse_transform(y_predict))

    correct_val = 0.0
    total = 0
    f1 = open(args.results, 'w')

    for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val):
        temp_count = 0
        for _truth in truth.split(';'):
            if prediction == _truth:
                temp_count += 1

        if temp_count > 2:
            correct_val += 1
        else:
            correct_val += float(temp_count) / 3

        total += 1
        f1.write(question.encode('utf-8'))
        f1.write('\n')
        f1.write(image.encode('utf-8'))
        f1.write('\n')
        f1.write(prediction)
        f1.write('\n')
        f1.write(truth.encode('utf-8'))
        f1.write('\n')
        f1.write('\n')

    f1.write('Final Accuracy is ' + str(correct_val / total))
    f1.close()
    f1 = open('../results/overall_results.txt', 'a')
    f1.write(args.weights + '\n')
    f1.write(str(correct_val / total) + '\n')
    f1.close()
    print 'Final Accuracy on the validation set is', correct_val / total
示例#8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-model", type=str, required=True)
    parser.add_argument("-weights", type=str, required=True)
    parser.add_argument("-results", type=str, required=True)
    args = parser.parse_args()

    model = model_from_json(open(args.model).read())
    model.load_weights(args.weights)
    model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

    questions_val = open("../data/preprocessed/questions_val2014.txt", "r").read().decode("utf8").splitlines()
    answers_val = open("../data/preprocessed/answers_val2014.txt", "r").read().decode("utf8").splitlines()
    images_val = open("../data/preprocessed/images_val2014.txt", "r").read().decode("utf8").splitlines()
    vgg_model_path = "../features/coco/vgg_feats.mat"

    print "Model compiled, weights loaded..."
    labelencoder = joblib.load("../models/labelencoder.pkl")

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct["feats"]
    print "loaded vgg features"
    image_ids = open("../features/coco/coco_vgg_IDMap.txt").read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print "loaded word2vec features"

    nb_classes = 1000
    y_predict_text = []
    batchSize = 128
    widgets = ["Evaluating ", Percentage(), " ", Bar(marker="#", left="[", right="]"), " ", ETA()]
    pbar = ProgressBar(widgets=widgets)

    for qu_batch, an_batch, im_batch in pbar(
        zip(
            grouper(questions_val, batchSize, fillvalue=questions_val[0]),
            grouper(answers_val, batchSize, fillvalue=answers_val[0]),
            grouper(images_val, batchSize, fillvalue=images_val[0]),
        )
    ):
        X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        if "language_only" in args.model:
            X_batch = X_q_batch
        else:
            X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
            X_batch = np.hstack((X_q_batch, X_i_batch))
        y_predict = model.predict_classes(X_batch, verbose=0)
        y_predict_text.extend(labelencoder.inverse_transform(y_predict))

    correct_val = 0
    incorrect_val = 0
    f1 = open(args.results, "w")

    for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val):
        temp_count = 0
        for _truth in truth.split(";"):
            if prediction == _truth:
                temp_count += 1

        if temp_count > 2:
            correct_val += 1
        else:
            incorrect_val += 1

        f1.write(question.encode("utf-8"))
        f1.write("\n")
        f1.write(image.encode("utf-8"))
        f1.write("\n")
        f1.write(prediction)
        f1.write("\n")
        f1.write(truth.encode("utf-8"))
        f1.write("\n")
        f1.write("\n")

    f1.write("Final Accuracy is " + str(float(correct_val) / (incorrect_val + correct_val)))
    f1.close()
    f1 = open("../results/overall_results.txt", "a")
    f1.write(args.weights + "\n")
    f1.write(str(float(correct_val) / (incorrect_val + correct_val)) + "\n")
    f1.close()
    print "Final Accuracy on the validation set is", float(correct_val) / (incorrect_val + correct_val)
training_loss = []

for i in xrange(args.num_epochs):
    progbar = generic_utils.Progbar(len(questions_train))
    for qu_batch, an_batch, im_batch in zip(
            grouper(questions_train,
                    args.batch_size,
                    fillvalue=questions_train[-1]),
            grouper(answers_train,
                    args.batch_size,
                    fillvalue=answers_train[-1]),
            grouper(images_train, args.batch_size,
                    fillvalue=images_train[-1])):
        # logging.debug("One batch done")
        x_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        # logging.debug("length of qu_batch is %d", len(qu_batch))
        # logging.debug("Shape of x_q_batch is: %s", x_q_batch.shape)
        x_i_batch = get_images_matrix(im_batch, id_map, sherlock_features)
        # logging.debug("shape of x_i_batch is %s", x_i_batch.shape)
        x_batch = np.hstack((x_q_batch, x_i_batch))
        y_batch = get_answers_matrix(an_batch, labelencoder)
        loss = model.train_on_batch(x_batch, y_batch)
        training_loss.append(loss)
        progbar.add(args.batch_size, values=[("train_loss", loss)])
        # print "\n"  # if __name__ == '__main__':
#     main()

plt.plot(training_loss)
plt.title("Training loss for the network")
plt.savefig('loss.png')
示例#10
0
 def process_input(self, question):
     return get_questions_matrix_sum(question, self._nlp)