def run_experiment(max_len, dropout_rate, n_layers):

    global dataset, train_ids, valid_ids, test_ids, mode, task, val_method, val_mode, use_PCA

    # For PCA if set to True
    visual_components = 25
    audio_components = 20
    text_components = 110

    nodes = 100
    epochs = 200
    outfile = "MOSI_sweep/int_" + mode + "_" + str(task) + "_" + str(
        n_layers) + "_" + str(max_len) + "_" + str(dropout_rate)
    experiment_prefix = "intermediate"
    batch_size = 64
    logs_path = "regression_logs/"
    experiment_name = "{}_n_{}_dr_{}_nl_{}_ml_{}".format(
        experiment_prefix, nodes, dropout_rate, n_layers, max_len)

    # sort through all the video ID, segment ID pairs
    train_set_ids = []
    for vid in train_ids:
        for sid in dataset['embeddings'][vid].keys():
            if mode == "all" or mode == "AV":
                if dataset['embeddings'][vid][sid] and dataset['facet'][vid][
                        sid] and dataset['covarep'][vid][sid]:
                    train_set_ids.append((vid, sid))
            if mode == "AT" or mode == "A":
                if dataset['embeddings'][vid][sid] and dataset['covarep'][vid][
                        sid]:
                    train_set_ids.append((vid, sid))
            if mode == "VT" or mode == "V":
                if dataset['embeddings'][vid][sid] and dataset['facet'][vid][
                        sid]:
                    train_set_ids.append((vid, sid))
            if mode == "T":
                if dataset['embeddings'][vid][sid]:
                    train_set_ids.append((vid, sid))

    valid_set_ids = []
    for vid in valid_ids:
        for sid in dataset['embeddings'][vid].keys():
            if mode == "all" or mode == "AV":
                if dataset['embeddings'][vid][sid] and dataset['facet'][vid][
                        sid] and dataset['covarep'][vid][sid]:
                    valid_set_ids.append((vid, sid))
            if mode == "AT" or mode == "A":
                if dataset['embeddings'][vid][sid] and dataset['covarep'][vid][
                        sid]:
                    valid_set_ids.append((vid, sid))
            if mode == "VT" or mode == "V":
                if dataset['embeddings'][vid][sid] and dataset['facet'][vid][
                        sid]:
                    valid_set_ids.append((vid, sid))
            if mode == "T":
                if dataset['embeddings'][vid][sid]:
                    valid_set_ids.append((vid, sid))

    test_set_ids = []
    for vid in test_ids:
        if vid in dataset['embeddings']:
            for sid in dataset['embeddings'][vid].keys():
                if mode == "all" or mode == "AV":
                    if dataset['embeddings'][vid][sid] and dataset['facet'][
                            vid][sid] and dataset['covarep'][vid][sid]:
                        test_set_ids.append((vid, sid))
                if mode == "AT" or mode == "A":
                    if dataset['embeddings'][vid][sid] and dataset['covarep'][
                            vid][sid]:
                        test_set_ids.append((vid, sid))
                if mode == "VT" or mode == "V":
                    if dataset['embeddings'][vid][sid] and dataset['facet'][
                            vid][sid]:
                        test_set_ids.append((vid, sid))
                if mode == "T":
                    if dataset['embeddings'][vid][sid]:
                        test_set_ids.append((vid, sid))

    # partition the training, valid and test set. all sequences will be padded/truncated to 15 steps
    # data will have shape (dataset_size, max_len, feature_dim)
    if mode == "all" or mode == "AV" or mode == "AT":
        train_set_audio = np.stack([
            pad(dataset['covarep'][vid][sid], max_len)
            for (vid, sid) in train_set_ids if dataset['covarep'][vid][sid]
        ],
                                   axis=0)
        valid_set_audio = np.stack([
            pad(dataset['covarep'][vid][sid], max_len)
            for (vid, sid) in valid_set_ids if dataset['covarep'][vid][sid]
        ],
                                   axis=0)
        test_set_audio = np.stack([
            pad(dataset['covarep'][vid][sid], max_len)
            for (vid, sid) in test_set_ids if dataset['covarep'][vid][sid]
        ],
                                  axis=0)
    if mode == "all" or mode == "VT" or mode == "AV":
        train_set_visual = np.stack([
            pad(dataset['facet'][vid][sid], max_len)
            for (vid, sid) in train_set_ids if dataset['facet'][vid][sid]
        ],
                                    axis=0)
        valid_set_visual = np.stack([
            pad(dataset['facet'][vid][sid], max_len)
            for (vid, sid) in valid_set_ids if dataset['facet'][vid][sid]
        ],
                                    axis=0)
        test_set_visual = np.stack([
            pad(dataset['facet'][vid][sid], max_len)
            for (vid, sid) in test_set_ids if dataset['facet'][vid][sid]
        ],
                                   axis=0)

    if mode == "all" or mode == "VT" or mode == "AT":
        train_set_text = np.stack([
            pad(dataset['embeddings'][vid][sid], max_len)
            for (vid, sid) in train_set_ids if dataset['embeddings'][vid][sid]
        ],
                                  axis=0)
        valid_set_text = np.stack([
            pad(dataset['embeddings'][vid][sid], max_len)
            for (vid, sid) in valid_set_ids if dataset['embeddings'][vid][sid]
        ],
                                  axis=0)
        test_set_text = np.stack([
            pad(dataset['embeddings'][vid][sid], max_len)
            for (vid, sid) in test_set_ids if dataset['embeddings'][vid][sid]
        ],
                                 axis=0)

    if task == "SB":
        # binarize the sentiment scores for binary classification task
        y_train = np.array(
            [sentiments[vid][sid] for (vid, sid) in train_set_ids]) > 0
        y_valid = np.array(
            [sentiments[vid][sid] for (vid, sid) in valid_set_ids]) > 0
        y_test = np.array(
            [sentiments[vid][sid] for (vid, sid) in test_set_ids]) > 0

    if task == "SR":
        y_train = np.array(
            [sentiments[vid][sid] for (vid, sid) in train_set_ids])
        y_valid = np.array(
            [sentiments[vid][sid] for (vid, sid) in valid_set_ids])
        y_test = np.array(
            [sentiments[vid][sid] for (vid, sid) in test_set_ids])

    if task == "S5":
        y_train1 = np.array(
            [sentiments[vid][sid] for (vid, sid) in train_set_ids])
        y_valid1 = np.array(
            [sentiments[vid][sid] for (vid, sid) in valid_set_ids])
        y_test1 = np.array(
            [sentiments[vid][sid] for (vid, sid) in test_set_ids])
        y_train = convert_S5_hot(y_train1)
        y_valid = convert_S5_hot(y_valid1)
        y_test = convert_S5_hot(y_test1)

    # normalize covarep and facet features, remove possible NaN values
    if mode == "all" or mode == "AV" or mode == "VT":
        visual_max = np.max(np.max(np.abs(train_set_visual), axis=0), axis=0)
        visual_max[visual_max ==
                   0] = 1  # if the maximum is 0 we don't normalize
        train_set_visual = train_set_visual / visual_max
        valid_set_visual = valid_set_visual / visual_max
        test_set_visual = test_set_visual / visual_max
        train_set_visual[train_set_visual != train_set_visual] = 0
        valid_set_visual[valid_set_visual != valid_set_visual] = 0
        test_set_visual[test_set_visual != test_set_visual] = 0

    if mode == "all" or mode == "AT" or mode == "AV":
        audio_max = np.max(np.max(np.abs(train_set_audio), axis=0), axis=0)
        train_set_audio = train_set_audio / audio_max
        valid_set_audio = valid_set_audio / audio_max
        test_set_audio = test_set_audio / audio_max
        train_set_audio[train_set_audio != train_set_audio] = 0
        valid_set_audio[valid_set_audio != valid_set_audio] = 0
        test_set_audio[test_set_audio != test_set_audio] = 0

    if use_PCA == True:
        if mode == "all" or mode == "AV" or mode == "VT":
            nsamples1, nx1, ny1 = train_set_visual.shape
            train_set_visual = train_set_visual.reshape(nsamples1 * nx1, ny1)
            nsamples2, nx2, ny2 = valid_set_visual.shape
            valid_set_visual = valid_set_visual.reshape(nsamples2 * nx2, ny2)
            nsamples3, nx3, ny3 = test_set_visual.shape
            test_set_visual = test_set_visual.reshape(nsamples3 * nx3, ny3)
            pca = decomposition.PCA(n_components=visual_components)
            train_set_visual_pca = pca.fit_transform(train_set_visual)
            valid_set_visual_pca = pca.transform(valid_set_visual)
            test_set_visual_pca = pca.transform(test_set_visual)
            train_set_visual = train_set_visual_pca.reshape(
                nsamples1, nx1, visual_components)
            valid_set_visual = valid_set_visual_pca.reshape(
                nsamples2, nx2, visual_components)
            test_set_visual = test_set_visual_pca.reshape(
                nsamples3, nx3, visual_components)

        if mode == "all" or mode == "AT" or mode == "AV":
            nsamples1, nx1, ny1 = train_set_audio.shape
            train_set_audio = train_set_audio.reshape(nsamples1 * nx1, ny1)
            nsamples2, nx2, ny2 = valid_set_audio.shape
            valid_set_audio = valid_set_audio.reshape(nsamples2 * nx2, ny2)
            nsamples3, nx3, ny3 = test_set_audio.shape
            test_set_audio = test_set_audio.reshape(nsamples3 * nx3, ny3)
            pca = decomposition.PCA(n_components=audio_components)
            train_set_audio_pca = pca.fit_transform(train_set_audio)
            valid_set_audio_pca = pca.transform(valid_set_audio)
            test_set_audio_pca = pca.transform(test_set_audio)
            train_set_audio = train_set_audio_pca.reshape(
                nsamples1, nx1, audio_components)
            valid_set_audio = valid_set_audio_pca.reshape(
                nsamples2, nx2, audio_components)
            test_set_audio = test_set_audio_pca.reshape(
                nsamples3, nx3, audio_components)

        if mode == "all" or mode == "AT" or mode == "VT":
            nsamples1, nx1, ny1 = train_set_text.shape
            train_set_text = train_set_text.reshape(nsamples1 * nx1, ny1)
            nsamples2, nx2, ny2 = valid_set_text.shape
            valid_set_text = valid_set_text.reshape(nsamples2 * nx2, ny2)
            nsamples3, nx3, ny3 = test_set_text.shape
            test_set_text = test_set_text.reshape(nsamples3 * nx3, ny3)
            pca = decomposition.PCA(n_components=text_components)
            train_set_text_pca = pca.fit_transform(train_set_text)
            valid_set_text_pca = pca.transform(valid_set_text)
            test_set_text_pca = pca.transform(test_set_text)
            train_set_text = train_set_text_pca.reshape(
                nsamples1, nx1, text_components)
            valid_set_text = valid_set_text_pca.reshape(
                nsamples2, nx2, text_components)
            test_set_text = test_set_text_pca.reshape(nsamples3, nx3,
                                                      text_components)

    k = 3
    m = 2
    if task == "SB":
        val_method = "val_acc"
        val_mode = "max"
        emote_final = 'sigmoid'
    if task == "SR":
        val_method = "val_loss"
        val_mode = "min"
        emote_final = 'linear'
    if task == "S5":
        val_method = "val_acc"
        val_mode = "max"
        emote_final = 'softmax'
    model = Sequential()

    # AUDIO
    if mode == "all" or mode == "AT" or mode == "AV":
        model1_in = Input(shape=(max_len, train_set_audio.shape[2]))
        model1_cnn = Conv1D(filters=64, kernel_size=k,
                            activation='relu')(model1_in)
        model1_mp = MaxPooling1D(m)(model1_cnn)
        model1_fl = Flatten()(model1_mp)
        model1_dropout = Dropout(dropout_rate)(model1_fl)
        model1_dense = Dense(nodes, activation="relu")(model1_dropout)
        for i in range(2, n_layers + 1):
            model1_dropout = Dropout(dropout_rate)(model1_dense)
            model1_dense = Dense(nodes, activation="relu")(model1_dropout)

    # TEXT = BLSTM from unimodal
    if mode == "all" or mode == "AT" or mode == "VT":
        model2_in = Input(shape=(max_len, train_set_text.shape[2]))
        model2_lstm = Bidirectional(LSTM(64))(model2_in)
        model2_dropout = Dropout(dropout_rate)(model2_lstm)
        model2_dense = Dense(nodes, activation="relu")(model2_dropout)
        for i in range(2, n_layers + 1):
            model2_dropout = Dropout(dropout_rate)(model2_dense)
            model2_dense = Dense(nodes, activation="relu")(model2_dropout)

    # VIDEO - CNN from unimodal
    if mode == "all" or mode == "AV" or mode == "VT":
        model3_in = Input(shape=(max_len, train_set_visual.shape[2]))
        model3_cnn = Conv1D(filters=64, kernel_size=k,
                            activation='relu')(model3_in)
        model3_mp = MaxPooling1D(m)(model3_cnn)
        model3_fl = Flatten()(model3_mp)
        model3_dropout = Dropout(dropout_rate)(model3_fl)
        model3_dense = Dense(nodes, activation="relu")(model3_dropout)
        for i in range(2, n_layers + 1):
            model3_dropout = Dropout(dropout_rate)(model3_dense)
            model3_dense = Dense(nodes, activation="relu")(model3_dropout)

    if mode == "all":
        concatenated = concatenate([model1_dense, model2_dense, model3_dense])
    if mode == "AV":
        concatenated = concatenate([model1_dense, model3_dense])
    if mode == "AT":
        concatenated = concatenate([model1_dense, model2_dense])
    if mode == "VT":
        concatenated = concatenate([model2_dense, model3_dense])
    dense = Dense(200, activation='relu')(concatenated)
    dense2 = Dense(200, activation='relu')(dense)

    if task == "SR":
        out = Dense(1, activation=emote_final)(dense2)
    if task == "SB":
        out = Dense(1, activation=emote_final)(dense2)
    if task == "S5":
        out = Dense(5, activation=emote_final)(dense2)

    if mode == "all":
        merged_model = Model([model1_in, model2_in, model3_in], out)
    if mode == "AV":
        merged_model = Model([model1_in, model3_in], out)
    if mode == "AT":
        merged_model = Model([model1_in, model2_in], out)
    if mode == "VT":
        merged_model = Model([model2_in, model3_in], out)

    if task == "SB":
        merged_model.compile('adam',
                             'binary_crossentropy',
                             metrics=['accuracy'])
    if task == "S5":
        merged_model.compile('adam',
                             'binary_crossentropy',
                             metrics=['accuracy'])
    if task == "SR":
        merged_model.compile('adam', loss='mean_absolute_error')

    if mode == "all":
        x_train = [train_set_audio, train_set_text, train_set_visual]
        x_valid = [valid_set_audio, valid_set_text, valid_set_visual]
        x_test = [test_set_audio, test_set_text, test_set_visual]
    if mode == "AV":
        x_train = [train_set_audio, train_set_visual]
        x_valid = [valid_set_audio, valid_set_visual]
        x_test = [test_set_audio, test_set_visual]
    if mode == "AT":
        x_train = [train_set_audio, train_set_text]
        x_valid = [valid_set_audio, valid_set_text]
        x_test = [test_set_audio, test_set_text]
    if mode == "VT":
        x_train = [train_set_text, train_set_visual]
        x_valid = [valid_set_text, valid_set_visual]
        x_test = [test_set_text, test_set_visual]

    early_stopping = EarlyStopping(monitor=val_method,
                                   min_delta=0,
                                   patience=10,
                                   verbose=1,
                                   mode=val_mode)
    callbacks_list = [early_stopping]
    merged_model.fit(x_train,
                     y_train,
                     batch_size=batch_size,
                     epochs=epochs,
                     validation_data=[x_valid, y_valid],
                     callbacks=callbacks_list)
    preds = merged_model.predict(x_test)
    out = open(outfile, "wb")

    print "testing output before eval metrics calcs.."
    print y_test[0]
    print preds[0]

    if task == "SR":
        preds = np.concatenate(preds)
        mae = sklearn.metrics.mean_absolute_error(y_test, preds)
        r = scipy.stats.pearsonr(y_test, preds)
        out.write("Test MAE: " + str(mae) + "\n")
        out.write("Test CORR: " + str(r) + "\n")
    if task == "S5":
        preds = convert_pred_hot(preds)
        acc = sklearn.metrics.accuracy_score(y_test, preds)
        out.write("Test ACC: " + str(acc) + "\n")
    if task == "SB":
        acc = np.mean((preds > 0.5) == y_test.reshape(-1, 1))
        preds = np.concatenate(preds)
        preds = preds > 0.5
        f1 = sklearn.metrics.f1_score(y_test, preds)
        out.write("Test ACC: " + str(acc) + "\n")
        out.write("Test F1: " + str(f1) + "\n")

    out.write("use_PCA=" + str(use_PCA) + "\n")
    out.write("dropout_rate=" + str(dropout_rate) + "\n")
    out.write("n_layers=" + str(n_layers) + "\n")
    out.write("max_len=" + str(max_len) + "\n")
    out.write("nodes=" + str(nodes) + "\n")
    out.write("task=" + str(task) + "\n")
    out.write("mode=" + str(mode) + "\n")
    out.write("num_train=" + str(len(train_set_ids)) + "\n")
    out.write("num_valid=" + str(len(valid_set_ids)) + "\n")
    out.write("num_test=" + str(len(test_set_ids)) + "\n")
    out.close()
示例#2
0
	model_cpu=Model(inputs=[q1_input,q2_input],outputs=output)

model=multi_gpu_model(model_cpu,gpus=4)
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])


model.fit([padded_q1,padded_q2],y_train,epochs=2,batch_size=128,shuffle=True)


score=model.evaluate([test_padded_q1,test_padded_q2],y_test,verbose=1)
print(score[1])
print(x_test_q1[0]+" "+x_test_q2[0])

prediction=model.predict([test_padded_q1,test_padded_q2])

count=0
output=open("wrong_prediction1.txt","a")
for p in prediction:
	if p[0]<0.5 and y_test[count]==1:
		output.write(x_test_q1[count]+" "+x_test_q2[count]+"\t"+"Pred:"+str(p[0])+"True:"+str(y_test[count])+"\n")
	elif p[0]>=0.5 and y_test[count]==0:
		output.write(x_test_q1[count]+" "+x_test_q2[count]+"\t"+"Pred:"+str(p[0])+"True:"+str(y_test[count])+"\n")
	count+=1

output.close()