def main_video():
    trainLabels, trainVideoFeatures = gather_features('train',
                                                      return_plot=False)
    valLabels, valVideoFeatures = gather_features('val', return_plot=False)
    #   train_classifier_video(valVideoFeatures, valLabels)
    train_classifier_video(trainVideoFeatures, trainLabels, valVideoFeatures,
                           valLabels)
def main_text():
    trainLabels, trainPlotFeatures = gather_features('train',
                                                     return_video=False,
                                                     reverse=True)
    valLabels, valPlotFeatures = gather_features('val',
                                                 return_video=False,
                                                 reverse=True)
    # to transpose the input, to get two lists of corresponding text & reversed
    trainPlotFeatures = np.array(map(list, zip(*trainPlotFeatures)))
    valPlotFeatures = np.array(map(list, zip(*valPlotFeatures)))
    #    train_classifier_word_embedding(valPlotFeatures, valLabels)
    train_classifier_word_embedding(trainPlotFeatures, trainLabels,
                                    valPlotFeatures, valLabels)
def main_vislang():
    trainLabels, trainPlotFeatures, trainVideoFeatures = gather_features(
        'train', reverse=True)
    valLabels, valPlotFeatures, valVideoFeatures = gather_features(
        'val', reverse=True)
    trainPlotFeatures = np.array(map(list, zip(*trainPlotFeatures)))
    valPlotFeatures = np.array(map(list, zip(*valPlotFeatures)))
    #    train_classifier_vislang(valVideoFeatures, valPlotFeatures, valLabels, merge_mode='outer')
    train_classifier_vislang(trainVideoFeatures,
                             trainPlotFeatures,
                             trainLabels,
                             valVideoFeatures,
                             valPlotFeatures,
                             valLabels,
                             merge_mode=argv[2])
def generate_precision_recall_text(mode='val'):

    model = load_moviescope_model('text')
    yTrue, plotFeatures = gather_features(mode, return_video=False, reverse=True)
    plotFeatures = np.array(map(list, zip(*plotFeatures)))
    yPreds = model.predict([plotFeatures[0], plotFeatures[1]])
    dump_pkl((yTrue, yPreds), mode+'_pred_text')

    return
def generate_precision_recall_video(mode='val'):

    model = load_moviescope_model('wiki_im_video_sgd')
    yTrue, videoFeatures = gather_features(mode, return_plot=False)
    _, videoFeatures = augment_labels_lstm(yTrue, videoFeatures, 200)
    yPreds = model.predict(videoFeatures)

    dump_pkl((yTrue, yPreds), mode+'_pred_video_sgd')

    return
def generate_precision_recall_vislang(mode='val', merge_mode='sum'):

    if merge_mode == 'bilinear':
        model = vislang_model(merge_mode)
        model.load_weights('data/weights/weights_min_loss_%s.h5' % merge_mode)
    else:
        model = load_moviescope_model('eq_VisLang_%s' % merge_mode)

    yTrue, plotFeatures, videoFeatures = gather_features(mode, reverse=True)
    plotFeatures = np.array(map(list, zip(*plotFeatures)))
    _, videoFeatures = augment_labels_lstm(yTrue, videoFeatures, 200)
    yPreds = model.predict([videoFeatures, plotFeatures[0], plotFeatures[1]])
    dump_pkl((yTrue, yPreds), mode+'_pred_eq_vislang_'+merge_mode)
def return_confident_results(mode='val'):
    
    model = load_moviescope_model('wiki_im_VisLang')
    genrePredictionDict = dict((i,[]) for i in range(26))
    textObj = load_pkl('plot_object_train')
    labels, plotFeatures, videoFeatures, movieIds = gather_features(mode, return_id=True)
    _, videoFeatures = augment_labels_lstm(labels, videoFeatures, 200)
    predictionScores = model.predict([videoFeatures, plotFeatures])
    for index  in range(len(predictionScores)):
        for i in range(26):
            genrePredictionDict[i].append((predictionScores[index][i],movieIds[index]))

    dump_pkl(genrePredictionDict, 'genrePredictionDict_'+mode)
    
    for i in range(26):
        print sorted(genrePredictionDict[i], reverse=True)[:10]
    return
def fine_tune_merge_only(merge_mode='sum'):

    if False:
        trainLabels = gather_features(mode='train',
                                      return_plot=False,
                                      return_video=False)
        valLabels = gather_features(mode='val',
                                    return_plot=False,
                                    return_video=False)

        dump_pkl(trainLabels, 'trainLabels')
        dump_pkl(valLabels, 'valLabels')
    else:
        trainLabels = load_pkl('trainLabels')
        valLabels = load_pkl('valLabels')

    train_visFeatures = load_pkl('train_visFeatures')
    train_textFeatures = load_pkl('train_textFeatures')

    val_visFeatures = load_pkl('val_visFeatures')
    val_textFeatures = load_pkl('val_textFeatures')

    visInput = Input(shape=(64, ))
    textInput = Input(shape=(64, ))

    if merge_mode in ['sum', 'concat', 'mul', 'outer']:
        if merge_mode == 'outer':
            vislangModel = Lambda(bilinear_projection,
                                  output_shape=(4096, ))([visInput, textInput])
        else:
            vislangModel = merge([visInput, textInput], mode=merge_mode)

        if merge_mode == 'sum':
            vislangModel = Dense(1000, activation='relu')(vislangModel)
            vislangModel = Dropout(0.5)(vislangModel)
            vislangModel = Dense(8, activation='relu')(vislangModel)
        elif merge_mode == 'concat':
            vislangModel = Dense(512, activation='relu')(vislangModel)
            vislangModel = Dropout(0.5)(vislangModel)
            vislangModel = Dense(14, activation='relu')(vislangModel)
        elif merge_mode == 'outer':
            vislangModel = Dense(16, activation='relu')(vislangModel)
            vislangModel = Dropout(0.5)(vislangModel)
            vislangModel = Dense(256, activation='relu')(vislangModel)

        vislangModel = Dense(number_of_classes,
                             activation='sigmoid')(vislangModel)
    else:
        vislangModel = BilinearTensorLayer(input_dim=64)([visInput, textInput])

    sgd = SGD(lr=0.1, decay=0.00001, momentum=0.9, nesterov=True)
    model = Model(input=[visInput, textInput], output=[vislangModel])
    model.compile(loss='binary_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

    checkpoint = ModelCheckpoint(filepath='./data/models/ft_vislang_%s.h5' %
                                 merge_mode,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='max')
    checkpoint_loss = ModelCheckpoint(
        filepath='./data/weights/ft_weights_min_loss_%s.h5' % merge_mode,
        monitor='val_loss',
        save_weights_only=True,
        mode='min')
    callbacks_list = [checkpoint, remote, checkpoint_loss]

    model.load_weights('data/weights/ft_weights_min_loss_%s.h5' % merge_mode)
    hist = model.fit(x=[train_visFeatures, train_textFeatures],
                     y=trainLabels,
                     validation_data=([val_visFeatures,
                                       val_textFeatures], valLabels),
                     nb_epoch=50,
                     batch_size=128,
                     callbacks=callbacks_list)

    histDict = hist.history
    dump_pkl(histDict, 'hist_ft_vislang_%s' % merge_mode)
def test_vislang(mode='val'):
    model = load_moviescope_model('wiki_im_vislang')
    labels, plotFeatures, videoFeatures = gather_features(mode)
    evaluate_vislang(model, videoFeatures, plotFeatures, labels)
def test(mode='val'):
    valLabels, plotFeatures, videoFeatures = gather_features(mode)
    plotModel = load_moviescope_model(best_plot_model)
    videoModel = load_moviescope_model(best_video_model)
    evaluate_text(plotModel, plotFeatures, valLabels)
    evaluate_visual(videoModel, videoFeatures, valLabels)
def test_text():
    valLabels, valFeatures = gather_features(mode='val', return_video=False)
    model = load_moviescope_model(best_plot_model)
    evaluate_text(model, valFeatures, valLabels)
def test_visual():
    valLabels, valFeatures = gather_features('val', return_plot=False)
    model = load_moviescope_model(best_video_model)
    evaluate_visual(model, valFeatures, valLabels)