Exemplo n.º 1
0
def main():
    start_time = time.time()

    parser = argparse.ArgumentParser(prog='trainLSTM_MLP.py',
            description='Train LSTM-MLP model for visual question answering')
    parser.add_argument('--mlp-hidden-units', type=int, default=1024, metavar='<mlp-hidden-units>')
    parser.add_argument('--lstm-hidden-units', type=int, default=512, metavar='<lstm-hidden-units>')
    parser.add_argument('--mlp-hidden-layers', type=int, default=3, metavar='<mlp-hidden-layers>')
    parser.add_argument('--lstm-hidden-layers', type=int, default=1, metavar='<lstm-hidden-layers>')
    parser.add_argument('--dropout', type=float, default=0.5, metavar='<dropout-rate>')
    parser.add_argument('--mlp-activation', type=str, default='tanh', metavar='<activation-function>')
    parser.add_argument('--num-epochs', type=int, default=100, metavar='<num-epochs>')
    parser.add_argument('--batch-size', type=int, default=128, metavar='<batch-size>')
    parser.add_argument('--learning-rate', type=float, default=0.001, metavar='<learning-rate>')
    parser.add_argument('--dev-accuracy-path', type=str, required=True, metavar='<accuracy-path>')
    args = parser.parse_args()

    word_vec_dim = 300
    img_dim = 4096
    max_len = 30
    ######################
    #      Load Data     #
    ######################
    data_dir = '/home/mlds/data/0.05_val/'

    print('Loading data...')

    train_id_pairs, train_image_ids = LoadIds('train', data_dir)
    dev_id_pairs, dev_image_ids = LoadIds('dev', data_dir)

    train_questions = LoadQuestions('train', data_dir)
    dev_questions = LoadQuestions('dev', data_dir)

    train_choices = LoadChoices('train', data_dir)
    dev_choices = LoadChoices('dev', data_dir)

    train_answers = LoadAnswers('train', data_dir)
    dev_answers = LoadAnswers('dev', data_dir)

    print('Finished loading data.')
    print('Time: %f s' % (time.time()-start_time))

    ######################
    # Model Descriptions #
    ######################
    print('Generating and compiling model...')

    # image model (CNN features)
    image_model = Sequential()
    image_model.add(Reshape(
        input_shape=(img_dim,), dims=(img_dim,)
        ))

    # language model (LSTM)
    language_model = Sequential()
    if args.lstm_hidden_layers == 1:
        language_model.add(LSTM(
            output_dim=args.lstm_hidden_units, return_sequences=False, input_shape=(max_len, word_vec_dim)
            ))
    else:
        language_model.add(LSTM(
            output_dim=args.lstm_hidden_units, return_sequences=True, input_shape=(max_len, word_vec_dim)
            ))
        for i in range(args.lstm_hidden_layers-2):
            language_model.add(LSTM(
                output_dim=args.lstm_hidden_units, return_sequences=True
                ))
        language_model.add(LSTM(
            output_dim=args.lstm_hidden_units, return_sequences=False
            ))

    # feedforward model (MLP)
    model = Sequential()
    model.add(Merge(
        [language_model, image_model], mode='concat', concat_axis=1
        ))
    for i in range(args.mlp_hidden_layers):
        model.add(Dense(
            args.mlp_hidden_units, init='uniform'
            ))
        model.add(Activation(args.mlp_activation))
        model.add(Dropout(args.dropout))
    model.add(Dense(word_vec_dim))
    #model.add(Activation('softmax'))

    json_string = model.to_json()
    model_filename = 'models/vgg_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.2f' % (args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation, args.learning_rate, args.dropout)
    #model_filename = 'models/vgg_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.2f_loss_cosine' % (args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation, args.learning_rate, args.dropout)
    open(model_filename + '.json', 'w').write(json_string)

    # loss and optimizer
    rmsprop = RMSprop(lr=args.learning_rate)
    #model.compile(loss='categorical_crossentropy', optimizer=rmsprop)
    model.compile(loss=Loss, optimizer=rmsprop)
    print('Compilation finished.')
    print('Time: %f s' % (time.time()-start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time()-start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time()-start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # training batches
    train_question_batches = [ b for b in MakeBatches(train_questions, args.batch_size, fillvalue=train_questions[-1]) ]
    train_answer_batches = [ b for b in MakeBatches(train_answers['toks'], args.batch_size, fillvalue=train_answers['toks'][-1]) ]
    train_image_batches = [ b for b in MakeBatches(train_image_ids, args.batch_size, fillvalue=train_image_ids[-1]) ]
    train_indices = list(range(len(train_question_batches)))

    # validation batches
    dev_question_batches = [ b for b in MakeBatches(dev_questions, args.batch_size, fillvalue=dev_questions[-1]) ]
    dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], args.batch_size, fillvalue=dev_answers['labs'][-1]) ]
    dev_choice_batches = [ b for b in MakeBatches(dev_choices, args.batch_size, fillvalue=dev_choices[-1]) ]
    dev_image_batches = [ b for b in MakeBatches(dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1]) ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time()-start_time))


    ######################
    #      Training      #
    ######################

    acc_file = open(args.dev_accuracy_path, 'w')
    dev_accs = []
    max_acc = -1
    max_acc_epoch = -1

    # define interrupt handler
    def PrintDevAcc():
        print('Max validation accuracy epoch: %i' % max_acc_epoch)
        print(dev_accs)

    def InterruptHandler(sig, frame):
        print(str(sig))
        PrintDevAcc()
        sys.exit(-1)

    signal.signal(signal.SIGINT, InterruptHandler)
    signal.signal(signal.SIGTERM, InterruptHandler)

    # print training information
    print('-'*80)
    print('Training Information')
    print('# of LSTM hidden units: %i' % args.lstm_hidden_units)
    print('# of LSTM hidden layers: %i' % args.lstm_hidden_layers)
    print('# of MLP hidden units: %i' % args.mlp_hidden_units)
    print('# of MLP hidden layers: %i' % args.mlp_hidden_layers)
    print('Dropout: %f' % args.dropout)
    print('MLP activation function: %s' % args.mlp_activation)
    print('# of training epochs: %i' % args.num_epochs)
    print('Batch size: %i' % args.batch_size)
    print('Learning rate: %f' % args.learning_rate)
    print('# of train questions: %i' % len(train_questions))
    print('# of dev questions: %i' % len(dev_questions))
    print('-'*80)
    acc_file.write('-'*80 + '\n')
    acc_file.write('Training Information\n')
    acc_file.write('# of LSTM hidden units: %i\n' % args.lstm_hidden_units)
    acc_file.write('# of LSTM hidden layers: %i\n' % args.lstm_hidden_layers)
    acc_file.write('# of MLP hidden units: %i\n' % args.mlp_hidden_units)
    acc_file.write('# of MLP hidden layers: %i\n' % args.mlp_hidden_layers)
    acc_file.write('Dropout: %f\n' % args.dropout)
    acc_file.write('MLP activation function: %s\n' % args.mlp_activation)
    acc_file.write('# of training epochs: %i\n' % args.num_epochs)
    acc_file.write('Batch size: %i\n' % args.batch_size)
    acc_file.write('Learning rate: %f\n' % args.learning_rate)
    acc_file.write('# of train questions: %i\n' % len(train_questions))
    acc_file.write('# of dev questions: %i\n' % len(dev_questions))
    acc_file.write('-'*80 + '\n')

    # start training
    print('Training started...')
    for k in range(args.num_epochs):
        print('-'*80)
        print('Epoch %i' % (k+1))
        progbar = generic_utils.Progbar(len(train_indices)*args.batch_size)
        # shuffle batch indices
        random.shuffle(train_indices)
        for i in train_indices:
            X_question_batch = GetQuestionsTensor(train_question_batches[i], word_embedding, word_map)
            X_image_batch = GetImagesMatrix(train_image_batches[i], img_map, VGG_features)
            Y_answer_batch = GetAnswersMatrix(train_answer_batches[i], word_embedding, word_map)
            loss = model.train_on_batch([X_question_batch, X_image_batch], Y_answer_batch)
            loss = loss[0].tolist()
            progbar.add(args.batch_size, values=[('train loss', loss)])
        print('Time: %f s' % (time.time()-start_time))

        # evaluate on dev set
        pbar = generic_utils.Progbar(len(dev_question_batches)*args.batch_size)

        dev_correct = 0

        # feed forward
        for i in range(len(dev_question_batches)):
            X_question_batch = GetQuestionsTensor(dev_question_batches[i], word_embedding, word_map)
            X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map, VGG_features)
            prob = model.predict_proba([X_question_batch, X_image_batch], args.batch_size, verbose=0)

            # get word vecs of choices
            choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding, word_map)
            similarity = np.zeros((5, args.batch_size), float)
            # calculate cosine distances
            for j in range(5):
                similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j]))
            # take argmax of cosine distances
            pred = np.argmax(similarity, axis=0) + 1

            if i != (len(dev_question_batches)-1):
                dev_correct += np.count_nonzero(dev_answer_batches[i]==pred)
            else:
                num_padding = args.batch_size * len(dev_question_batches) - len(dev_questions)
                last_idx = args.batch_size - num_padding
                dev_correct += np.count_nonzero(dev_answer_batches[:last_idx]==pred[:last_idx])
            pbar.add(args.batch_size)

        dev_acc = float(dev_correct)/len(dev_questions)
        dev_accs.append(dev_acc)
        print('Validation Accuracy: %f' % dev_acc)
        print('Time: %f s' % (time.time()-start_time))

        if dev_acc > max_acc:
            max_acc = dev_acc
            max_acc_epoch = k
            model.save_weights(model_filename + '_best.hdf5', overwrite=True)

    #model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k+1))
    print(dev_accs)
    for acc in dev_accs:
        acc_file.write('%f\n' % acc)
    print('Best validation accuracy: %f; epoch#%i' % (max_acc,(max_acc_epoch+1)))
    acc_file.write('Best validation accuracy: %f; epoch#%i\n' % (max_acc,(max_acc_epoch+1)))
    print('Training finished.')
    acc_file.write('Training finished.\n')
    print('Time: %f s' % (time.time()-start_time))
    acc_file.write('Time: %f s\n' % (time.time()-start_time))
    acc_file.close()
Exemplo n.º 2
0
def main():
    start_time = time.time()

    parser = argparse.ArgumentParser(
        prog='testLSTM_MLP.py',
        description='Test LSTM-MLP model for visual question answering')
    parser.add_argument('--model',
                        type=str,
                        required=True,
                        metavar='<model-path>')
    parser.add_argument('--weights',
                        type=str,
                        required=True,
                        metavar='<weights-path>')
    parser.add_argument('--output',
                        type=str,
                        required=True,
                        metavar='<prediction-path>')
    args = parser.parse_args()

    word_vec_dim = 300
    batch_size = 128

    #######################
    #      Load Model     #
    #######################

    print('Loading model and weights...')
    model = model_from_json(open(args.model, 'r').read())
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.load_weights(args.weights)
    print('Model and weights loaded.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #      Load Data     #
    ######################

    print('Loading data...')

    #dev_id_pairs, dev_image_ids = LoadIds('dev')
    test_id_pairs, test_image_ids = LoadIds('test')

    #dev_questions = LoadQuestions('dev')
    test_questions = LoadQuestions('test')

    #dev_choices = LoadChoices('dev')
    test_choices = LoadChoices('test')

    #dev_answers = LoadAnswers('dev')

    print('Finished loading data.')
    print('Time: %f s' % (time.time() - start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time() - start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # validation batches
    #dev_question_batches = [ b for b in MakeBatches(dev_questions, batch_size, fillvalue=dev_questions[-1]) ]
    #dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], batch_size, fillvalue=dev_answers['labs'][-1]) ]
    #dev_choice_batches = [ b for b in MakeBatches(dev_choices, batch_size, fillvalue=dev_choices[-1]) ]
    #dev_image_batches = [ b for b in MakeBatches(dev_image_ids, batch_size, fillvalue=dev_image_ids[-1]) ]

    # testing batches
    test_question_batches = [
        b for b in MakeBatches(
            test_questions, batch_size, fillvalue=test_questions[-1])
    ]
    test_choice_batches = [
        b for b in MakeBatches(
            test_choices, batch_size, fillvalue=test_choices[-1])
    ]
    test_image_batches = [
        b for b in MakeBatches(
            test_image_ids, batch_size, fillvalue=test_image_ids[-1])
    ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #       Testing      #
    ######################

    # evaluate on dev set
    pbar = generic_utils.Progbar(len(test_question_batches) * batch_size)

    #dev_correct = 0
    predictions = []

    for i in range(len(test_question_batches)):
        # feed forward
        X_question_batch = GetQuestionsTensor(test_question_batches[i],
                                              word_embedding, word_map)
        X_image_batch = GetImagesMatrix(test_image_batches[i], img_map,
                                        VGG_features)
        prob = model.predict_proba([X_question_batch, X_image_batch],
                                   batch_size,
                                   verbose=0)

        # get word vecs of choices
        choice_feats = GetChoicesTensor(test_choice_batches[i], word_embedding,
                                        word_map)
        similarity = np.zeros((5, batch_size), float)
        # calculate cosine distances
        for j in range(5):
            similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j]))
        # take argmax of cosine distances
        pred = np.argmax(similarity, axis=0) + 1
        predictions.extend(pred.tolist())
        pbar.add(batch_size)

        #dev_correct += np.count_nonzero(dev_answer_batches[i]==pred)

    SavePredictions(args.output, predictions, test_q_ids)
    print('Time: %f s' % (time.time() - start_time))
    print('Testing finished.')
def main():
    start_time = time.time()

    parser = argparse.ArgumentParser(prog='valLSTM_MLP.py',
            description='Test LSTM-MLP model for visual question answering')
    parser.add_argument('--model-vgg', type=str, required=True, metavar='<model-path>')
    parser.add_argument('--weights-vgg', type=str, required=True, metavar='<weights-path>')
    parser.add_argument('--model-inc', type=str, required=True, metavar='<model-path>')
    parser.add_argument('--weights-inc', type=str, required=True, metavar='<weights-path>')
    args = parser.parse_args()

    word_vec_dim = 300
    batch_size = 128
    vgg_weight = 0.25
    inc_weight = 1 - vgg_weight

    #######################
    #      Load Models    #
    #######################

    print('Loading models and weights...')
    model_vgg = model_from_json(open(args.model_vgg,'r').read())
    model_vgg.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model_vgg.load_weights(args.weights_vgg)

    model_inc = model_from_json(open(args.model_inc,'r').read())
    model_inc.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model_inc.load_weights(args.weights_inc)
    print('Models and weights loaded.')
    print('Time: %f s' % (time.time()-start_time))

    ######################
    #      Load Data     #
    ######################

    print('Loading data...')

    train_id_pairs, train_image_ids = LoadIds('train')
    dev_id_pairs, dev_image_ids = LoadIds('dev')
    #test_id_pairs, test_image_ids = LoadIds('test')

    train_questions = LoadQuestions('train')
    dev_questions = LoadQuestions('dev')
    #test_questions = LoadQuestions('test')

    train_choices = LoadChoices('train')
    dev_choices = LoadChoices('dev')
    #test_choices = LoadChoices('test')

    train_answers = LoadAnswers('train')
    dev_answers = LoadAnswers('dev')

    print('Finished loading data.')
    print('Time: %f s' % (time.time()-start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, vgg_img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time()-start_time))

    # load Inception features
    print('Loading Inception features...')
    INC_features, inc_img_map = LoadInceptionFeatures()
    print('Inception features loaded')
    print('Time: %f s' % (time.time()-start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time()-start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # train batches
    train_question_batches = [ b for b in MakeBatches(train_questions, batch_size, fillvalue=train_questions[-1]) ]
    train_answer_batches = [ b for b in MakeBatches(train_answers['labs'], batch_size, fillvalue=train_answers['labs'][-1]) ]
    train_choice_batches = [ b for b in MakeBatches(train_choices, batch_size, fillvalue=train_choices[-1]) ]
    train_image_batches = [ b for b in MakeBatches(train_image_ids, batch_size, fillvalue=train_image_ids[-1]) ]


    # validation batches
    dev_question_batches = [ b for b in MakeBatches(dev_questions, batch_size, fillvalue=dev_questions[-1]) ]
    dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], batch_size, fillvalue=dev_answers['labs'][-1]) ]
    dev_choice_batches = [ b for b in MakeBatches(dev_choices, batch_size, fillvalue=dev_choices[-1]) ]
    dev_image_batches = [ b for b in MakeBatches(dev_image_ids, batch_size, fillvalue=dev_image_ids[-1]) ]


    # testing batches
    #test_question_batches = [ b for b in MakeBatches(test_questions, batch_size, fillvalue=test_questions[-1]) ]
    #test_choice_batches = [ b for b in MakeBatches(test_choices, batch_size, fillvalue=test_choices[-1]) ]
    #test_image_batches = [ b for b in MakeBatches(test_image_ids, batch_size, fillvalue=test_image_ids[-1]) ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time()-start_time))

    ######################
    #       Testing      #
    ######################

    # evaluate on dev set
    pbar = generic_utils.Progbar(len(dev_question_batches)*batch_size)

    dev_correct = 0

    for i in range(len(dev_question_batches)):
        # feed forward
        X_question_batch = GetQuestionsTensor(dev_question_batches[i], word_embedding, word_map)
        X_vgg_image_batch = GetImagesMatrix(dev_image_batches[i], vgg_img_map, VGG_features)
        X_inc_image_batch = GetImagesMatrix(dev_image_batches[i], inc_img_map, INC_features)
        prob_vgg = model_vgg.predict_proba([X_question_batch, X_vgg_image_batch], batch_size, verbose=0)
        prob_inc = model_inc.predict_proba([X_question_batch, X_inc_image_batch], batch_size, verbose=0)
        prob = (vgg_weight*prob_vgg + inc_weight*prob_inc)

        # get word vecs of choices
        choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding, word_map)
        similarity = np.zeros((5, batch_size), float)
        # calculate cosine distances
        for j in range(5):
            similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j]))
        # take argmax of cosine distances
        pred = np.argmax(similarity, axis=0) + 1
        #predictions.extend(pred.tolist())
        if i != (len(dev_question_batches)-1):
            dev_correct += np.count_nonzero(dev_answer_batches[i]==pred)
        else:
            num_padding = batch_size * len(dev_question_batches) - len(dev_questions)
            last_idx = batch_size - num_padding
            dev_correct += np.count_nonzero(dev_answer_batches[:last_idx]==pred[:last_idx])

        #dev_correct += np.count_nonzero(dev_answer_batches[i]==pred)
        pbar.add(batch_size)

    print('Validation accuracy: %f' % (float(dev_correct)/len(dev_questions)))

    '''
    train_correct = 0
    pbar = generic_utils.Progbar(len(train_question_batches)*batch_size)

    for i in range(len(train_question_batches)):
        # feed forward
        X_question_batch = GetQuestionsTensor(train_question_batches[i], word_embedding, word_map)
        X_vgg_image_batch = GetImagesMatrix(train_image_batches[i], vgg_img_map, VGG_features)
        X_inc_image_batch = GetImagesMatrix(train_image_batches[i], inc_img_map, INC_features)
        prob_vgg = model_vgg.predict_proba([X_question_batch, X_vgg_image_batch], batch_size, verbose=0)
        prob_inc = model_inc.predict_proba([X_question_batch, X_inc_image_batch], batch_size, verbose=0)
        prob = (vgg_weight*prob_vgg + inc_weight*prob_inc)

        # get word vecs of choices
        choice_feats = GetChoicesTensor(train_choice_batches[i], word_embedding, word_map)
        similarity = np.zeros((5, batch_size), float)
        # calculate cosine distances
        for j in range(5):
            similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j]))
        # take argmax of cosine distances
        pred = np.argmax(similarity, axis=0) + 1
        #predictions.extend(pred.tolist())
        if i != (len(dev_question_batches)-1):
            train_correct += np.count_nonzero(train_answer_batches[i]==pred)
        else:
            num_padding = batch_size * len(train_question_batches) - len(train_questions)
            last_idx = batch_size - num_padding
            train_correct += np.count_nonzero(train_answer_batches[:last_idx]==pred[:last_idx])

        #train_correct += np.count_nonzero(train_answer_batches[i]==pred)
        pbar.add(batch_size)

    print('Training accuracy: %f' % (float(train_correct)/len(train_questions)))
    '''

    print('Time: %f s' % (time.time()-start_time))
    print('Testing finished.')
Exemplo n.º 4
0
def main():
    start_time = time.time()

    parser = argparse.ArgumentParser(
        prog='valLSTM_MLP.py',
        description='Test LSTM-MLP model for visual question answering')
    parser.add_argument('--model',
                        type=str,
                        required=True,
                        metavar='<model-path>')
    parser.add_argument('--weights',
                        type=str,
                        required=True,
                        metavar='<weights-path>')
    args = parser.parse_args()

    word_vec_dim = 300
    batch_size = 128

    #######################
    #      Load Model     #
    #######################

    print('Loading model and weights...')
    model = model_from_json(open(args.model, 'r').read())
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.load_weights(args.weights)
    print('Model and weights loaded.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #      Load Data     #
    ######################

    print('Loading data...')

    train_id_pairs, train_image_ids = LoadIds('train')
    dev_id_pairs, dev_image_ids = LoadIds('dev')
    #test_id_pairs, test_image_ids = LoadIds('test')

    train_questions = LoadQuestions('train')
    dev_questions = LoadQuestions('dev')
    #test_questions = LoadQuestions('test')

    train_choices = LoadChoices('train')
    dev_choices = LoadChoices('dev')
    #test_choices = LoadChoices('test')

    train_answers = LoadAnswers('train')
    dev_answers = LoadAnswers('dev')
    train_qa_type, train_qtype_count, train_atype_count = LoadQAType('train')
    dev_qa_type, dev_qtype_count, dev_atype_count = LoadQAType('dev')
    train_total_qtype_count, train_total_atype_count = LoadTotalQAType('train')
    dev_total_qtype_count, dev_total_atype_count = LoadTotalQAType('dev')

    print('Finished loading data.')
    print('Time: %f s' % (time.time() - start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time() - start_time))

    # load VGG features
    print('Loading Inception features...')
    INC_features, inc_img_map = LoadInceptionFeatures()
    print('Inception features loaded')
    print('Time: %f s' % (time.time() - start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # train batches
    train_question_batches = [
        b for b in MakeBatches(
            train_questions, batch_size, fillvalue=train_questions[-1])
    ]
    train_answer_batches = [
        b for b in MakeBatches(train_answers['labs'],
                               batch_size,
                               fillvalue=train_answers['labs'][-1])
    ]
    train_choice_batches = [
        b for b in MakeBatches(
            train_choices, batch_size, fillvalue=train_choices[-1])
    ]
    train_image_batches = [
        b for b in MakeBatches(
            train_image_ids, batch_size, fillvalue=train_image_ids[-1])
    ]

    train_qatype_batches = [
        b for b in MakeBatches(
            train_qa_type, batch_size, fillvalue=train_id_pairs[-1])
    ]

    # validation batches
    dev_question_batches = [
        b for b in MakeBatches(
            dev_questions, batch_size, fillvalue=dev_questions[-1])
    ]
    dev_answer_batches = [
        b for b in MakeBatches(
            dev_answers['labs'], batch_size, fillvalue=dev_answers['labs'][-1])
    ]
    dev_choice_batches = [
        b for b in MakeBatches(
            dev_choices, batch_size, fillvalue=dev_choices[-1])
    ]
    dev_image_batches = [
        b for b in MakeBatches(
            dev_image_ids, batch_size, fillvalue=dev_image_ids[-1])
    ]

    dev_qatype_batches = [
        b for b in MakeBatches(
            dev_qa_type, batch_size, fillvalue=dev_id_pairs[-1])
    ]

    # testing batches
    #test_question_batches = [ b for b in MakeBatches(test_questions, batch_size, fillvalue=test_questions[-1]) ]
    #test_choice_batches = [ b for b in MakeBatches(test_choices, batch_size, fillvalue=test_choices[-1]) ]
    #test_image_batches = [ b for b in MakeBatches(test_image_ids, batch_size, fillvalue=test_image_ids[-1]) ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #       Testing      #
    ######################

    # evaluate on dev set
    pbar = generic_utils.Progbar(len(dev_question_batches) * batch_size)

    dev_correct = 0

    for i in range(len(dev_question_batches)):
        # feed forward
        X_question_batch = GetQuestionsTensor(dev_question_batches[i],
                                              word_embedding, word_map)
        X_image_batch = GetImagesMatrix(dev_image_batches[i], inc_img_map,
                                        INC_features)
        prob = model.predict_proba([X_question_batch, X_image_batch],
                                   batch_size,
                                   verbose=0)

        # get word vecs of choices
        choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding,
                                        word_map)
        similarity = np.zeros((5, batch_size), float)
        # calculate cosine distances
        for j in range(5):
            similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j]))
        # take argmax of cosine distances
        pred = np.argmax(similarity, axis=0) + 1
        #predictions.extend(pred.tolist())

        dev_correct += np.count_nonzero(dev_answer_batches[i] == pred)

        # count the incorrect question and answer types
        incorrect = np.nonzero(dev_answer_batches[i] != pred)[0].tolist()
        #idpair_batch = dev_idpair_batches[i]
        qatype_batch = dev_qatype_batches[i]
        for idx in incorrect:
            q_type, a_type = qatype_batch[idx]
            dev_qtype_count[q_type] += 1
            dev_atype_count[a_type] += 1
        pbar.add(batch_size)
    print('Validation accuracy: %f' %
          (float(dev_correct) / len(dev_questions)))

    train_correct = 0
    pbar = generic_utils.Progbar(len(train_question_batches) * batch_size)

    for i in range(len(train_question_batches)):
        # feed forward
        X_question_batch = GetQuestionsTensor(train_question_batches[i],
                                              word_embedding, word_map)
        X_image_batch = GetImagesMatrix(train_image_batches[i], inc_img_map,
                                        INC_features)
        prob = model.predict_proba([X_question_batch, X_image_batch],
                                   batch_size,
                                   verbose=0)

        # get word vecs of choices
        choice_feats = GetChoicesTensor(train_choice_batches[i],
                                        word_embedding, word_map)
        similarity = np.zeros((5, batch_size), float)
        # calculate cosine distances
        for j in range(5):
            similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j]))
        # take argmax of cosine distances
        pred = np.argmax(similarity, axis=0) + 1
        #predictions.extend(pred.tolist())

        train_correct += np.count_nonzero(train_answer_batches[i] == pred)

        # count the incorrect question and answer types
        incorrect = np.nonzero(train_answer_batches[i] != pred)[0].tolist()
        qatype_batch = train_qatype_batches[i]
        for idx in incorrect:
            q_type, a_type = qatype_batch[idx]
            train_qtype_count[q_type] += 1
            train_atype_count[a_type] += 1
        pbar.add(batch_size)
    print('Training accuracy: %f' %
          (float(train_correct) / len(train_questions)))

    print('Validation QA types:')
    print(dev_qtype_count)
    print(dev_atype_count)
    print('Training QA types:')
    print(train_qtype_count)
    print(train_atype_count)
    print('Total QA types:')
    print(train_total_qtype_count)
    print(train_total_atype_count)
    print(dev_total_qtype_count)
    print(dev_total_atype_count)
    print('Time: %f s' % (time.time() - start_time))
    print('Testing finished.')
def main():
    start_time = time.time()
    #signal.signal(signal.SIGINT, InterruptHandler)
    #signal.signal(signal.SIGKILL, InterruptHandler)
    #signal.signal(signal.SIGTERM, InterruptHandler)

    parser = argparse.ArgumentParser(
        prog='testWordVec_MLP.py',
        description='Test WordVec-MLP model for visual question answering')
    parser.add_argument('--model',
                        type=str,
                        required=True,
                        metavar='<model-path>')
    parser.add_argument('--weights',
                        type=str,
                        required=True,
                        metavar='<weights-path>')
    parser.add_argument('--output',
                        type=str,
                        required=True,
                        metavar='<prediction-path>')
    args = parser.parse_args()

    word_vec_dim = 300
    batch_size = 128

    #######################
    #      Load Model     #
    #######################

    print('Loading model and weights...')
    model = model_from_json(open(args.model, 'r').read())
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.load_weights(args.weights)
    print('Model and weights loaded.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #      Load Data     #
    ######################

    print('Loading data...')

    dev_id_pairs, dev_image_ids = LoadIds('dev')
    #test_id_pairs, test_image_ids = LoadIds('test')

    dev_questions = LoadQuestions('dev')
    #test_questions = LoadQuestions('test')

    dev_choices = LoadChoices('dev')
    #test_choices = LoadChoices('test')

    dev_answers = LoadAnswers('dev')

    print('Finished loading data.')
    print('Time: %f s' % (time.time() - start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time() - start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # validation batches
    dev_question_batches = [
        b for b in MakeBatches(
            dev_questions, batch_size, fillvalue=dev_questions[-1])
    ]
    dev_answer_batches = [
        b for b in MakeBatches(
            dev_answers['labs'], batch_size, fillvalue=dev_answers['labs'][-1])
    ]
    dev_choice_batches = [
        b for b in MakeBatches(
            dev_choices, batch_size, fillvalue=dev_choices[-1])
    ]
    dev_image_batches = [
        b for b in MakeBatches(
            dev_image_ids, batch_size, fillvalue=dev_image_ids[-1])
    ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #       Testing      #
    ######################

    # evaluate on dev set
    widgets = [
        'Evaluating ',
        Percentage(), ' ',
        Bar(marker='#', left='[', right=']'), ' ',
        ETA()
    ]
    pbar = ProgressBar(widgets=widgets)

    dev_correct = 0
    predictions = []

    for i in pbar(range(len(dev_question_batches))):
        # feed forward
        X_question_batch = GetQuestionsMatrix(dev_question_batches[i],
                                              word_embedding, word_map)
        X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map,
                                        VGG_features)
        prob = model.predict_proba([X_question_batch, X_image_batch],
                                   batch_size,
                                   verbose=0)

        # get word vecs of choices
        choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding,
                                        word_map)
        similarity = np.zeros((5, batch_size), float)
        # calculate cosine distances
        for j in range(5):
            similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j]))
        # take argmax of cosine distances
        pred = np.argmax(similarity, axis=0) + 1
        predictions.extend(pred.tolist())

        dev_correct += np.count_nonzero(dev_answer_batches[i] == pred)

    dev_acc = float(dev_correct) / len(dev_questions)
    print('Validation Accuracy: %f' % dev_acc)
    print('Validation Accuracy: %f' % dev_acc, file=sys.stderr)
    SavePredictions(args.output, predictions, dev_id_pairs)
    print('Time: %f s' % (time.time() - start_time))
    print('Time: %f s' % (time.time() - start_time), file=sys.stderr)
    print('Testing finished.')
    print('Testing finished.', file=sys.stderr)
def main():
    start_time = time.time()
    signal.signal(signal.SIGINT, InterruptHandler)
    #signal.signal(signal.SIGKILL, InterruptHandler)
    signal.signal(signal.SIGTERM, InterruptHandler)

    parser = argparse.ArgumentParser(
        prog='trainMLP.py',
        description='Train MLP model for visual question answering')
    parser.add_argument('--mlp-hidden-units',
                        type=int,
                        default=1024,
                        metavar='<mlp-hidden-units>')
    parser.add_argument('--mlp-hidden-layers',
                        type=int,
                        default=3,
                        metavar='<mlp-hidden-layers>')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.5,
                        metavar='<dropout-rate>')
    parser.add_argument('--mlp-activation',
                        type=str,
                        default='relu',
                        metavar='<activation-function>')
    parser.add_argument('--num-epochs',
                        type=int,
                        default=100,
                        metavar='<num-epochs>')
    parser.add_argument('--model-save-interval',
                        type=int,
                        default=5,
                        metavar='<interval>')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        metavar='<batch-size>')
    args = parser.parse_args()

    word_vec_dim = 300
    img_dim = 4096
    ######################
    #      Load Data     #
    ######################

    print('Loading data...')

    train_id_pairs, train_image_ids = LoadIds('train')
    dev_id_pairs, dev_image_ids = LoadIds('dev')

    train_choices = LoadChoices('train')
    dev_choices = LoadChoices('dev')

    train_answers = LoadAnswers('train')
    dev_answers = LoadAnswers('dev')

    print('Finished loading data.')
    print('Time: %f s' % (time.time() - start_time))

    print('-' * 100, file=sys.stderr)
    print('Training Information', file=sys.stderr)
    print('# of MLP hidden units: %i' % args.mlp_hidden_units, file=sys.stderr)
    print('# of MLP hidden layers: %i' % args.mlp_hidden_layers,
          file=sys.stderr)
    print('Dropout: %f' % args.dropout, file=sys.stderr)
    print('MLP activation function: %s' % args.mlp_activation, file=sys.stderr)
    print('# of training epochs: %i' % args.num_epochs, file=sys.stderr)
    print('Batch size: %i' % args.batch_size, file=sys.stderr)
    print('# of train images: %i' % len(train_image_ids), file=sys.stderr)
    print('# of dev images: %i' % len(dev_image_ids), file=sys.stderr)
    print('-' * 100, file=sys.stderr)

    ######################
    # Model Descriptions #
    ######################

    # MLP model
    model = Sequential()
    model.add(Dense(output_dim=args.mlp_hidden_units, input_dim=img_dim))
    model.add(Activation(args.mlp_activation))
    model.add(Dropout(args.dropout))
    for i in range(args.mlp_hidden_layers - 1):
        model.add(Dense(args.mlp_hidden_units))
        model.add(Activation(args.mlp_activation))
        model.add(Dropout(args.dropout))
    model.add(Dense(word_vec_dim))
    model.add(Activation('softmax'))

    json_string = model.to_json()
    model_filename = 'models/mlp_units_%i_layers_%i' % (args.mlp_hidden_units,
                                                        args.mlp_hidden_layers)
    open(model_filename + '.json', 'w').write(json_string)

    # loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adagrad')
    print('Compilation finished.')
    print('Time: %f s' % (time.time() - start_time))

    ########################################
    #  Load CNN Features and Word Vectors  #
    ########################################

    # load VGG features
    print('Loading VGG features...')
    VGG_features, img_map = LoadVGGFeatures()
    print('VGG features loaded')
    print('Time: %f s' % (time.time() - start_time))

    # load GloVe vectors
    print('Loading GloVe vectors...')
    word_embedding, word_map = LoadGloVe()
    print('GloVe vectors loaded')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #    Make Batches    #
    ######################

    print('Making batches...')

    # training batches
    train_answer_batches = [
        b for b in MakeBatches(train_answers['toks'],
                               args.batch_size,
                               fillvalue=train_answers['toks'][-1])
    ]
    train_image_batches = [
        b for b in MakeBatches(
            train_image_ids, args.batch_size, fillvalue=train_image_ids[-1])
    ]
    train_indices = list(range(len(train_image_ids)))

    # validation batches
    dev_answer_batches = [
        b for b in MakeBatches(dev_answers['labs'],
                               args.batch_size,
                               fillvalue=dev_answers['labs'][-1])
    ]
    dev_choice_batches = [
        b for b in MakeBatches(
            dev_choices, args.batch_size, fillvalue=dev_choices[-1])
    ]
    dev_image_batches = [
        b for b in MakeBatches(
            dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1])
    ]

    print('Finished making batches.')
    print('Time: %f s' % (time.time() - start_time))

    ######################
    #      Training      #
    ######################

    dev_accs = []
    max_acc = -1
    max_acc_epoch = -1

    print('Training started...')
    for k in range(args.num_epochs):
        print('Epoch %i' % (k + 1), file=sys.stderr)
        print('-' * 80)
        print('Epoch %i' % (k + 1))
        progbar = generic_utils.Progbar(len(train_indices) * args.batch_size)
        # shuffle batch indices
        random.shuffle(train_indices)
        for i in train_indices:
            X_image_batch = GetImagesMatrix(train_image_batches[i], img_map,
                                            VGG_features)
            Y_answer_batch = GetAnswersMatrix(train_answer_batches[i],
                                              word_embedding, word_map)
            loss = model.train_on_batch(X_image_batch, Y_answer_batch)
            loss = loss[0].tolist()
            progbar.add(args.batch_size, values=[('train loss', loss)])

        if k % args.model_save_interval == 0:
            model.save_weights(model_filename +
                               '_epoch_{:03d}.hdf5'.format(k + 1),
                               overwrite=True)

        # evaluate on dev set
        widgets = [
            'Evaluating ',
            Percentage(), ' ',
            Bar(marker='#', left='[', right=']'), ' ',
            ETA()
        ]
        pbar = ProgressBar(widgets=widgets, redirect_stdout=True)

        dev_correct = 0

        for i in pbar(range(len(dev_image_batches))):
            # feed forward
            X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map,
                                            VGG_features)
            prob = model.predict_proba(X_image_batch,
                                       args.batch_size,
                                       verbose=0)

            # get word vecs of choices
            choice_feats = GetChoicesTensor(dev_choice_batches[i],
                                            word_embedding, word_map)
            similarity = np.zeros((5, args.batch_size), float)
            # calculate cosine distances
            for j in range(5):
                similarity[j] = np.diag(
                    cosine_similarity(prob, choice_feats[j]))
            # take argmax of cosine distances
            pred = np.argmax(similarity, axis=0) + 1

            dev_correct += np.count_nonzero(dev_answer_batches[i] == pred)

        dev_acc = float(dev_correct) / len(dev_image_ids)
        dev_accs.append(dev_acc)
        print('Validation Accuracy: %f' % dev_acc)
        print('Validation Accuracy: %f' % dev_acc, file=sys.stderr)
        print('Time: %f s' % (time.time() - start_time))
        print('Time: %f s' % (time.time() - start_time), file=sys.stderr)

        if dev_acc > max_acc:
            max_acc = dev_acc
            max_acc_epoch = k
            model.save_weights(model_filename + '_best.hdf5', overwrite=True)

    model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k + 1))
    print(dev_accs, file=sys.stderr)
    print('Best validation accuracy: epoch#%i' % max_acc_epoch)
    print('Training finished.')
    print('Training finished.', file=sys.stderr)
    print('Time: %f s' % (time.time() - start_time))
    print('Time: %f s' % (time.time() - start_time), file=sys.stderr)