Exemplo n.º 1
0
def main():
    args = parse_arguments()

    data_root = args.dataroot
    experiment_root = args.experiment_root

    # Set both the numpy and the Python random seeds.
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    tf.set_random_seed(RANDOM_SEED)

    # Load data needed for training and save all parameters/mappings to make
    # sure experiments are reproducible
    questions_train_all, answers_train_all, images_train_all = load_train_data(
        data_root)

    # Since we are simplifying the problem of Visual QA to a classification
    # problem in this baseline, we want to limit the number of possible
    # answers, and have the model simply pick the most appropriate one.
    max_answers = 1000
    questions_train_all, answers_train_all, images_train_all = \
        select_frequent_answers(questions_train_all, answers_train_all,
                                images_train_all, max_answers)

    # Encode the remaining (top max_answers) answers and save the mapping.
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train_all)
    nb_classes = len(list(labelencoder.classes_))
    with open(pjoin(experiment_root, 'labelencoder.pkl'), 'wb') as pfile:
        pickle.dump(labelencoder, pfile)

    # The initial shuffle ensures that the train-val split is randomized
    # depending on the random seed, and not fixed every time (which would be
    # very bad).
    print("Performing initial shuffle...")
    questions_train_all, answers_train_all, images_train_all = sklearn_shuffle(
        questions_train_all, answers_train_all, images_train_all)

    train_all_count = len(questions_train_all)
    valid_count = int(train_all_count * args.valid_ratio)
    train_count = train_all_count - valid_count

    print("We have {0} total Q-A pairs. Will use {1:.2f}% for validation, "
          "which is {2} data points. {3} data points will be used for "
          "actual training.".format(train_all_count, args.valid_ratio * 100.0,
                                    valid_count, train_count))

    questions_train = questions_train_all[:train_count]
    answers_train = answers_train_all[:train_count]
    images_train = images_train_all[:train_count]
    # Note again that this is NOT the official validation set, but just a
    # fraction (`args.valid_ratio`) of the training set. The full validation
    # set evaluation is performed separately.
    questions_valid = questions_train_all[train_count:]
    answers_valid = answers_train_all[train_count:]
    images_valid = images_train_all[train_count:]

    # construct the model
    final_model, lang_model, img_model = construct_model(
        args, data_root, experiment_root, nb_classes)
    model = final_model.model

    # Compute val error K times per epoch.
    val_per_epoch = 4
    eval_valid_every = int((train_count / args.batch_size) / val_per_epoch)

    # Perform Tensorboard-friendly dumps.
    # TODO(andrei): This only works when using Keras's 'fit' method directly.
    # tensorboard_log_dir = pjoin(experiment_root, 'logs')
    # tensorboard_cb = keras.callbacks.TensorBoard(log_dir=tensorboard_log_dir,
    #                                              histogram_freq=0,
    #                                              write_graph=True,
    #                                              write_images=False)

    # The training part starts here
    print('Training started...')
    last_valid_loss = 10
    for epoch in range(args.num_epochs):
        epoch_start_ms = int(time.time() * 1000)
        # shuffle the data points before going through them
        questions_train, answers_train, images_train = sklearn_shuffle(
            questions_train, answers_train, images_train)
        progbar = generic_utils.Progbar(len(questions_train))
        batches = batchify(args.batch_size, questions_train, answers_train,
                           images_train)
        for batch_idx, (qu_batch, an_batch, im_batch) in enumerate(batches):
            # Extract batch vectors to train on
            # Converts the answers to their index (we're just doing
            # classification at this point)
            y_batch = get_answers_matrix(an_batch, labelencoder)

            # train on language only or language and image both
            if args.language_only:
                x_q_batch = lang_model.process_input(qu_batch)
                loss = model.train_on_batch(x_q_batch, y_batch)
            else:
                x_q_batch = lang_model.process_input(qu_batch)
                x_i_batch = img_model.process_input(im_batch)
                loss = model.train_on_batch([x_q_batch, x_i_batch], y_batch)

            if (batch_idx + 1) % eval_valid_every == 0:
                # It's time to validate on the held-out part of the training
                # dataset.
                batch_val_losses = []
                val_batches = batchify(args.batch_size, questions_valid,
                                       answers_valid, images_valid)
                for (qu_val_batch, an_val_batch, im_val_batch) in val_batches:
                    y_val_batch = get_answers_matrix(an_val_batch,
                                                     labelencoder)
                    if args.language_only:
                        val_loss = model.test_on_batch(
                            lang_model.process_input(qu_val_batch),
                            y_val_batch)
                    else:
                        val_loss = model.test_on_batch([
                            lang_model.process_input(qu_val_batch),
                            img_model.process_input(im_val_batch)
                        ], y_val_batch)

                    batch_val_losses.append(val_loss)

                # The validation loss is just the average of the individual
                # losses computed for each batch of the validation data.
                last_valid_loss = np.mean(batch_val_losses)

            # if batch_idx % progress_update_every == 0:
            # Important: because of retarded reasons, the progress bar
            # averages these values, so the reported validation loss will
            # have a bit of lag.
            progbar.add(args.batch_size,
                        values=[("tra-loss", loss),
                                ("val-loss", last_valid_loss)])

        epoch_end_ms = int(time.time() * 1000)
        epoch_delta_s = (epoch_end_ms - epoch_start_ms) / 1000.0
        print("Epoch {0}/{1} took {2:.1f}s.".format(
            (epoch + 1), args.num_epochs, epoch_delta_s))
        print("Latest validation loss: {0:4f}".format(last_valid_loss))

        # Dump a checkpoint periodically.
        if (epoch + 1) % args.model_save_interval == 0:
            model_dump_fname = pjoin(experiment_root,
                                     'weights_{0}.hdf5'.format(epoch + 1))
            print('Saving model to file: {0}'.format(model_dump_fname))
            model.save_weights(model_dump_fname)

        # Compute overall accuracy periodically on OFFICIAL full validation
        # set (but not too often, as it can get quite slow).
        if (epoch + 1) % args.model_eval_full_valid_interval == 0:
            # TODO(andrei): Implement this in a neat way.
            pass

    # TODO(Bernhard): catch control+c and store last parameters...
    # Final checkpoint dump.
    model.save_weights(
        pjoin(experiment_root, 'weights_{0}_final.hdf5'.format(epoch + 1)))
Exemplo n.º 2
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=512)
	parser.add_argument('-num_lstm_layers', type=int, default=2)
	parser.add_argument('-dropout', type=float, default=0.2)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=5)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	args = parser.parse_args()

	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	max_answers = 1000
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers)

	print 'Loaded questions, sorting by length...'
	questions_lengths_train, questions_train, answers_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train))))
	
	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')
	max_len = 30 #25 is max for training, 27 is max for validation
	word_vec_dim = 300

	model = Sequential()
	model.add(LSTM(output_dim = args.num_hidden_units, activation='tanh', 
			return_sequences=True, input_shape=(max_len, word_vec_dim)))
	model.add(Dropout(args.dropout))
	model.add(LSTM(args.num_hidden_units, return_sequences=False))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = '../models/lstm_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_lstm_layers_' + str(args.num_lstm_layers) + '_dropout_' + str(args.dropout)
	open(model_file_name  + '.json', 'w').write(json_string)
	
	print 'Compiling model...'
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done...'

	#set up word vectors
        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'

	## training
        # Moved few variables to args.parser (num_epochs, batch_size, model_save_interval)
	print 'Training started...'
	for k in xrange(args.num_epochs):

		progbar = generic_utils.Progbar(len(questions_train))

		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[0]), 
												grouper(answers_train, args.batch_size, fillvalue=answers_train[0]), 
												grouper(images_train, args.batch_size, fillvalue=images_train[0])):
			timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
			X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch(X_q_batch, Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])

		
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k+1))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units', type=int, default=512)
    parser.add_argument('-num_lstm_layers', type=int, default=2)
    parser.add_argument('-dropout', type=float, default=0.2)
    parser.add_argument('-activation', type=str, default='tanh')
    args = parser.parse_args()

    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    questions_lengths_train = open(
        '../data/preprocessed/questions_lengths_train2014.txt',
        'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    max_answers = 1000
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, max_answers)

    print 'Loaded questions, sorting by length...'
    questions_lengths_train, questions_train, answers_train = (
        list(t) for t in zip(*sorted(
            zip(questions_lengths_train, questions_train, answers_train))))

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')
    max_len = 30  #25 is max for training, 27 is max for validation
    word_vec_dim = 300

    model = Sequential()
    model.add(
        LSTM(output_dim=args.num_hidden_units,
             activation='tanh',
             return_sequences=True,
             input_shape=(max_len, word_vec_dim)))
    model.add(Dropout(args.dropout))
    model.add(LSTM(args.num_hidden_units, return_sequences=False))
    model.add(Dense(nb_classes, init='uniform'))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_file_name = '../models/lstm_language_only_num_hidden_units_' + str(
        args.num_hidden_units) + '_num_lstm_layers_' + str(
            args.num_lstm_layers) + '_dropout_' + str(args.dropout)
    open(model_file_name + '.json', 'w').write(json_string)

    print 'Compiling model...'
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done...'

    #set up word vectors
    nlp = English()
    print 'loaded word2vec features...'

    ## training
    print 'Training started...'
    numEpochs = 100
    model_save_interval = 5
    batchSize = 128
    for k in xrange(numEpochs):

        progbar = generic_utils.Progbar(len(questions_train))

        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        batchSize,
                        fillvalue=questions_train[0]),
                grouper(answers_train, batchSize, fillvalue=answers_train[0]),
                grouper(images_train, batchSize, fillvalue=images_train[0])):
            timesteps = len(nlp(
                qu_batch[-1]))  #questions sorted in descending order of length
            X_q_batch = get_questions_tensor_timeseries(
                qu_batch, nlp, timesteps)
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch(X_q_batch, Y_batch)
            progbar.add(batchSize, values=[("train loss", loss)])

        if k % model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:02d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k + 1))
Exemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units_mlp', type=int, default=1024)
    parser.add_argument('-num_hidden_units_lstm', type=int, default=512)
    parser.add_argument('-num_hidden_layers_mlp', type=int, default=3)
    parser.add_argument('-num_hidden_layers_lstm', type=int, default=1)
    parser.add_argument('-dropout', type=float, default=0.5)
    parser.add_argument('-activation_mlp', type=str, default='tanh')
    parser.add_argument('-num_epochs', type=int, default=100)
    parser.add_argument('-model_save_interval', type=int, default=5)
    parser.add_argument('-batch_size', type=int, default=128)
    #TODO Feature parser.add_argument('-resume_training', type=str)
    #TODO Feature parser.add_argument('-language_only', type=bool, default= False)
    args = parser.parse_args()

    word_vec_dim = 300
    img_dim = 4096
    max_len = 30
    nb_classes = 1000

    #get the data
    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    questions_lengths_train = open(
        '../data/preprocessed/questions_lengths_train2014.txt',
        'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'

    max_answers = nb_classes
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, max_answers)
    questions_lengths_train, questions_train, answers_train, images_train = (
        list(t) for t in zip(*sorted(
            zip(questions_lengths_train, questions_train, answers_train,
                images_train))))

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')

    image_model = Sequential()
    image_model.add(Reshape(input_shape=(img_dim, ), dims=(img_dim, )))

    language_model = Sequential()
    if args.num_hidden_layers_lstm == 1:
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=False,
                 input_shape=(max_len, word_vec_dim)))
    else:
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=True,
                 input_shape=(max_len, word_vec_dim)))
        for i in xrange(args.num_hidden_layers_lstm - 2):
            language_model.add(
                LSTM(output_dim=args.num_hidden_units_lstm,
                     return_sequences=True))
        language_model.add(
            LSTM(output_dim=args.num_hidden_units_lstm,
                 return_sequences=False))

    model = Sequential()
    model.add(
        Merge([language_model, image_model], mode='concat', concat_axis=1))
    for i in xrange(args.num_hidden_layers_mlp):
        model.add(Dense(args.num_hidden_units_mlp, init='uniform'))
        model.add(Activation(args.activation_mlp))
        model.add(Dropout(args.dropout))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \
         '_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
         str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm)
    open(model_file_name + '.json', 'w').write(json_string)

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done'

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    img_map = {}
    for ids in image_ids:
        id_split = ids.split()
        img_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print 'loaded word2vec features...'
    ## training
    print 'Training started...'
    for k in xrange(args.num_epochs):

        progbar = generic_utils.Progbar(len(questions_train))

        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        args.batch_size,
                        fillvalue=questions_train[-1]),
                grouper(answers_train,
                        args.batch_size,
                        fillvalue=answers_train[-1]),
                grouper(images_train,
                        args.batch_size,
                        fillvalue=images_train[-1])):
            timesteps = len(nlp(
                qu_batch[-1]))  #questions sorted in descending order of length
            X_q_batch = get_questions_tensor_timeseries(
                qu_batch, nlp, timesteps)
            X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch)
            progbar.add(args.batch_size, values=[("train loss", loss)])

        if k % args.model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:03d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
Exemplo n.º 5
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-num_hidden_units', type=int, default=1024)
    parser.add_argument('-num_hidden_layers', type=int, default=3)
    parser.add_argument('-dropout', type=float, default=0.5)
    parser.add_argument('-activation', type=str, default='tanh')
    parser.add_argument('-language_only', type=bool, default=False)
    parser.add_argument('-num_epochs', type=int, default=10)
    parser.add_argument('-model_save_interval', type=int, default=10)
    parser.add_argument('-batch_size', type=int, default=128)
    args = parser.parse_args()

    questions_train = open('../data/preprocessed/questions_train2014.txt',
                           'r').read().decode('utf8').splitlines()
    answers_train = open('../data/preprocessed/answers_train2014_modal.txt',
                         'r').read().decode('utf8').splitlines()
    images_train = open('../data/preprocessed/images_train2014.txt',
                        'r').read().decode('utf8').splitlines()
    vgg_model_path = '../features/coco/vgg_feats.mat'
    maxAnswers = 1000
    questions_train, answers_train, images_train = selectFrequentAnswers(
        questions_train, answers_train, images_train, maxAnswers)

    #encode the remaining answers
    labelencoder = preprocessing.LabelEncoder()
    labelencoder.fit(answers_train)
    nb_classes = len(list(labelencoder.classes_))
    joblib.dump(labelencoder, '../models/labelencoder.pkl')

    features_struct = scipy.io.loadmat(vgg_model_path)
    VGGfeatures = features_struct['feats']
    print 'loaded vgg features'
    image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
    id_map = {}
    for ids in image_ids:
        id_split = ids.split()
        id_map[id_split[0]] = int(id_split[1])

    nlp = English()
    print 'loaded word2vec features...'
    img_dim = 4096
    word_vec_dim = 300

    model = Sequential()
    if args.language_only:
        model.add(
            Dense(args.num_hidden_units,
                  input_dim=word_vec_dim,
                  init='uniform'))
    else:
        model.add(
            Dense(args.num_hidden_units,
                  input_dim=img_dim + word_vec_dim,
                  init='uniform'))
    model.add(Activation(args.activation))
    if args.dropout > 0:
        model.add(Dropout(args.dropout))
    for i in xrange(args.num_hidden_layers - 1):
        model.add(Dense(args.num_hidden_units, init='uniform'))
        model.add(Activation(args.activation))
        if args.dropout > 0:
            model.add(Dropout(args.dropout))
    model.add(Dense(nb_classes, init='uniform'))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    if args.language_only:
        model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(
            args.num_hidden_units) + '_num_hidden_layers_' + str(
                args.num_hidden_layers)
    else:
        model_file_name = '../models/mlp_num_hidden_units_' + str(
            args.num_hidden_units) + '_num_hidden_layers_' + str(
                args.num_hidden_layers)
    open(model_file_name + '.json', 'w').write(json_string)

    print 'Compiling model...'
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    print 'Compilation done...'

    print 'Training started...'
    for k in xrange(args.num_epochs):
        #shuffle the data points before going through them
        index_shuf = range(len(questions_train))
        shuffle(index_shuf)
        questions_train = [questions_train[i] for i in index_shuf]
        answers_train = [answers_train[i] for i in index_shuf]
        images_train = [images_train[i] for i in index_shuf]
        progbar = generic_utils.Progbar(len(questions_train))
        for qu_batch, an_batch, im_batch in zip(
                grouper(questions_train,
                        args.batch_size,
                        fillvalue=questions_train[-1]),
                grouper(answers_train,
                        args.batch_size,
                        fillvalue=answers_train[-1]),
                grouper(images_train,
                        args.batch_size,
                        fillvalue=images_train[-1])):
            X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
            if args.language_only:
                X_batch = X_q_batch
            else:
                X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
                X_batch = np.hstack((X_q_batch, X_i_batch))
            Y_batch = get_answers_matrix(an_batch, labelencoder)
            loss = model.train_on_batch(X_batch, Y_batch)
            progbar.add(args.batch_size, values=[("train loss", loss)])
        #print type(loss)
        if k % args.model_save_interval == 0:
            model.save_weights(model_file_name +
                               '_epoch_{:02d}.hdf5'.format(k))

    model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
Exemplo n.º 6
0
def main():

	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=1024)
	parser.add_argument('-num_hidden_layers', type=int, default=3)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-language_only', type=bool, default= False)
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=10)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	args = parser.parse_args()

	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'
	maxAnswers = 1000
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	id_map = {}
	for ids in image_ids:
		id_split = ids.split()
		id_map[id_split[0]] = int(id_split[1])

        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'
	img_dim = 4096
	word_vec_dim = 300

	model = Sequential()
	if args.language_only:
		model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
	else:
		model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
	model.add(Activation(args.activation))
	if args.dropout>0:
		model.add(Dropout(args.dropout))
	for i in xrange(args.num_hidden_layers-1):
		model.add(Dense(args.num_hidden_units, init='uniform'))
		model.add(Activation(args.activation))
		if args.dropout>0:
			model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	if args.language_only:
		model_file_name = '../models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
	else:
		model_file_name = '../models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)		
	open(model_file_name  + '.json', 'w').write(json_string)

	print 'Compiling model...'
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done...'
	
	print 'Training started...'
	for k in xrange(args.num_epochs):
		#shuffle the data points before going through them
		index_shuf = range(len(questions_train))
		shuffle(index_shuf)
		questions_train = [questions_train[i] for i in index_shuf]
		answers_train = [answers_train[i] for i in index_shuf]
		images_train = [images_train[i] for i in index_shuf]
		progbar = generic_utils.Progbar(len(questions_train))
		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
											grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
											grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
			if args.language_only:
				X_batch = X_q_batch
			else:
				X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
				X_batch = np.hstack((X_q_batch, X_i_batch))
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch(X_batch, Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])
		#print type(loss)
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
Exemplo n.º 7
0
def main():
	cwd = os.getcwd()

	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=1024)
	parser.add_argument('-num_hidden_layers', type=int, default=3)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-language_only', type=bool, default= False)
	parser.add_argument('-num_epochs', type=int, default=2)
	parser.add_argument('-model_save_interval', type=int, default=10)
	parser.add_argument('-model_weights_path', type=str, default=cwd+'/vgg/vgg16_weights.h5')
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2015.txt')
	parser.add_argument('-answers_train',type=str, default = cwd+'/data/preprocessed/answers_train2015_modal.txt')
	parser.add_argument('-im_dir',type=str, default =cwd+'/data/preprocessed/scene_img_abstract_v002_train2015/')
	#parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2014.txt')
	args = parser.parse_args()

	questions_train = open(args.questions_train, 'r').read().decode('utf8').splitlines()
	answers_train = open(args.answers_train, 'r').read().decode('utf8').splitlines()
	images_train = open(cwd+'/data/preprocessed/images_train2015.txt', 'r').read().decode('utf8').splitlines()
	#vgg_model_path = cwd+'/features/coco/vgg_feats.mat' #this needs to change
	maxAnswers = 100
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,cwd+'/models/labelencoder.pkl')

	#features_struct = scipy.io.loadmat(vgg_model_path)
	#VGGfeatures = features_struct['feats']
	# print 'loaded vgg features'
	# image_ids = open(cwd+'/features/coco_vgg_IDMap.txt').read().splitlines()
	# id_map = {}
	# for ids in image_ids:
	# 	id_split = ids.split()
	# 	id_map[id_split[0]] = int(id_split[1])

	vgg_model = vgg16.VGG_16(args.model_weights_path)
	sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
	print 'loaded vgg model...'

	nlp = English()
	print 'loaded word2vec features...'

	img_dim = 4096
	word_vec_dim = 300

	model = Sequential()
	if args.language_only:
		model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
	else:
		model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
	model.add(Activation(args.activation))
	if args.dropout>0:
		model.add(Dropout(args.dropout))
	for i in xrange(args.num_hidden_layers-1):
		model.add(Dense(args.num_hidden_units, init='uniform'))
		model.add(Activation(args.activation))
		if args.dropout>0:
			model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = cwd+'/models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)		
	open(model_file_name  + '.json', 'w').write(json_string)
	
	print 'Training started...'
	id_map = {}
	f1 = open('abstract_image_precompute')
	f2 = open('abstract_image_precompute_reverse')
	VGGfeatures = np.loadtxt(f1)
	VGGfeatures_reverse = np.loadtxt(f2)
	f1.close()
	f2.close()
	for k in xrange(args.num_epochs):
		#shuffle the data points before going through them
		index_shuf = range(len(questions_train))
		shuffle(index_shuf)
		questions_train = [questions_train[i] for i in index_shuf]
		answers_train = [answers_train[i] for i in index_shuf]
		images_train = [images_train[i] for i in index_shuf]
		progbar = generic_utils.Progbar(len(questions_train))
		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
											grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
											grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			
			X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
			im_path = args.im_dir +"abstract_v002_train2015_"
			print 'getting image features...'
			X_i_batch = get_images_matrix(im_batch, VGGfeatures, VGGfeatures_reverse)
			# X_i_batch = get_images_matrix_from_model(vgg_model, im_batch, im_path, id_map)
			X_batch = np.hstack((X_q_batch, X_i_batch))

			Y_batch = get_answers_matrix(an_batch, labelencoder)
			print 'running training on batch...'
			
			loss = model.train_on_batch(X_batch, Y_batch)
			
			progbar.add(args.batch_size, values=[("train loss", loss)])

		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
Exemplo n.º 8
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units_mlp', type=int, default=1024)
	parser.add_argument('-num_hidden_units_lstm', type=int, default=512)
	parser.add_argument('-num_hidden_layers_mlp', type=int, default=3)
	parser.add_argument('-num_hidden_layers_lstm', type=int, default=1)
	parser.add_argument('-dropout', type=float, default=0.5)
	parser.add_argument('-activation_mlp', type=str, default='tanh')
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=5)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	#TODO Feature parser.add_argument('-resume_training', type=str)
	#TODO Feature parser.add_argument('-language_only', type=bool, default= False)
	args = parser.parse_args()

	word_vec_dim= 300
	img_dim = 4096
	max_len = 30
	nb_classes = 1000

	#get the data
	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'

	max_answers = nb_classes
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers)
	questions_lengths_train, questions_train, answers_train, images_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train, images_train))))

	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')
	
	image_model = Sequential()
	image_model.add(Reshape(input_shape = (img_dim,), dims=(img_dim,)))

	language_model = Sequential()
	if args.num_hidden_layers_lstm == 1:
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False, input_shape=(max_len, word_vec_dim)))
	else:
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True, input_shape=(max_len, word_vec_dim)))
		for i in xrange(args.num_hidden_layers_lstm-2):
			language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=True))
		language_model.add(LSTM(output_dim = args.num_hidden_units_lstm, return_sequences=False))

	model = Sequential()
	model.add(Merge([language_model, image_model], mode='concat', concat_axis=1))
	for i in xrange(args.num_hidden_layers_mlp):
		model.add(Dense(args.num_hidden_units_mlp, init='uniform'))
		model.add(Activation(args.activation_mlp))
		model.add(Dropout(args.dropout))
	model.add(Dense(nb_classes))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = '../models/lstm_1_num_hidden_units_lstm_' + str(args.num_hidden_units_lstm) + \
						'_num_hidden_units_mlp_' + str(args.num_hidden_units_mlp) + '_num_hidden_layers_mlp_' + \
						str(args.num_hidden_layers_mlp) + '_num_hidden_layers_lstm_' + str(args.num_hidden_layers_lstm)
	open(model_file_name + '.json', 'w').write(json_string)

	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done'

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	img_map = {}
	for ids in image_ids:
		id_split = ids.split()
		img_map[id_split[0]] = int(id_split[1])

        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'
	## training
	print 'Training started...'
	for k in xrange(args.num_epochs):

		progbar = generic_utils.Progbar(len(questions_train))

		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), 
												grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), 
												grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
			timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
			X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
			X_i_batch = get_images_matrix(im_batch, img_map, VGGfeatures)
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch([X_q_batch, X_i_batch], Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])

		
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:03d}.hdf5'.format(k))
for i in xrange(args.num_epochs):
    progbar = generic_utils.Progbar(len(questions_train))
    for qu_batch, an_batch, im_batch in zip(
            grouper(questions_train,
                    args.batch_size,
                    fillvalue=questions_train[-1]),
            grouper(answers_train,
                    args.batch_size,
                    fillvalue=answers_train[-1]),
            grouper(images_train, args.batch_size,
                    fillvalue=images_train[-1])):
        # logging.debug("One batch done")
        x_q_batch = get_questions_matrix_sum(qu_batch, nlp)
        # logging.debug("length of qu_batch is %d", len(qu_batch))
        # logging.debug("Shape of x_q_batch is: %s", x_q_batch.shape)
        x_i_batch = get_images_matrix(im_batch, id_map, sherlock_features)
        # logging.debug("shape of x_i_batch is %s", x_i_batch.shape)
        x_batch = np.hstack((x_q_batch, x_i_batch))
        y_batch = get_answers_matrix(an_batch, labelencoder)
        loss = model.train_on_batch(x_batch, y_batch)
        training_loss.append(loss)
        progbar.add(args.batch_size, values=[("train_loss", loss)])
        # print "\n"  # if __name__ == '__main__':
#     main()

plt.plot(training_loss)
plt.title("Training loss for the network")
plt.savefig('loss.png')

model.save('trained_vqa_100epochs.hd5')