def main(): configs = ["IS09", "IS10", "IS11", "IS12", "ComParE", "GeMAPS", "eGeMAPS"] database = "IEMOCAP" emotionsTest = ["Neutral", "Happiness", "Sadness", "Anger"] for config in configs: print("config: " + config) features = pd.read_csv("features/{}_{}.csv".format(database, config)) features.set_index(["speaker", "emotion", "actType", "name"], inplace=True) cm = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int) nSpeaker = len(features.index.unique('speaker')) sps_val = [0, 0, 2, 2, 4, 4, 6, 6, 8, 8] for sp in range(nSpeaker): print("Testing speaker {}".format(sp)) (x_train_scaled, y_train, x_val_scaled, y_val, x_test_scaled, y_test) = preprocess(sp, sps_val[sp], features, emotionsTest) modelfile = "models/SVM/{}_{}.sav".format(config, sp) svm = pickle.load(open(modelfile, "rb")) y_p = svm.predict(x_test_scaled) cm += confusion_matrix(y_test, y_p) wa, ua = waua(cm) cmp = cm / np.reshape(np.sum(cm, 1), (4, 1)) imageName = "results/SVM/Test_{}.png".format(config) wa, ua = np.around(waua(cm), decimals=4) title = "wa={}, ua={}".format(wa, ua) plot_wauacm(title, cmp, emotionsTest, imageName) print("wa: " + str(wa) + ", ua: " + str(ua)) print(cmp)
def main(): if socket.getfqdn(socket.gethostname()) == "d8": os.environ["CUDA_VISIBLE_DEVICES"] = "3" batch_size = 128 configs = ["IS09", "IS10", "IS11", "IS12", "ComParE", "GeMAPS", "eGeMAPS"] # IEMOCAP imp NHSA emotionsTest = ["Neutral", "Happiness", "Sadness", "Anger"] database = "IEMOCAP" for config in configs: # Load features features = pd.read_csv("features/{}_{}.csv".format(database, config)) features.set_index(["speaker", "emotion", "actType", "name"], inplace=True) nSpeaker = len(features.index.unique('speaker')) cmV = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int) cmT = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int) for sp in range(nSpeaker): (x_train_scaled, y_train_C, x_val_scaled, y_val_C, x_test_scaled, y_test_C) = preprocess(sp, sp-1, features, emotionsTest) num_classes = len(emotionsTest) model = load_model("models/DNN/{}_{}.h5".format(config, sp)) y_val_p = model.predict(x_val_scaled, batch_size=batch_size, verbose=1) y_val_p_C = [emoC.argmax() for emoC in y_val_p] cmV += confusion_matrix(y_val_C, y_val_p_C, labels=list(range(num_classes))) y_test_p = model.predict(x_test_scaled, batch_size=batch_size, verbose=1) cmT += confusion_matrix(y_test_C, [emoC.argmax() for emoC in y_test_p], labels=list(range(num_classes))) del model K.clear_session() print("speaker: " + str(sp)) print(cmV) print(cmT) waV, uaV = np.around(waua(cmV), decimals=4) waT, uaT = np.around(waua(cmT), decimals=4) cmpV = cmV / np.reshape(np.sum(cmV, 1), (4, 1)) cmpT = cmT / np.reshape(np.sum(cmT, 1), (4, 1)) imageName = "results/DNN/Test_{}_V.png".format(config) title = "wa={}, ua={}".format(waV, uaV) plot_wauacm(title, cmpV, emotionsTest, imageName) imageName = "results/DNN/Test_{}_T.png".format(config) title = "wa={}, ua={}".format(waT, uaT) plot_wauacm(title, cmpT, emotionsTest, imageName) print("waV: " + str(waV) + ", uaV: " + str(uaV)) print(cmpV) print("waT: " + str(waT) + ", uaT: " + str(uaT)) print(cmpT)
def main(): dataPath = "../../../Database/IEMOCAP_full_release/" # hyperparams tMax = 300 fMax = 400 batch_size = 64 * 2 # IEMOCAP database = "IEMOCAP" emotionsTest = ["Neutral", "Happiness", "Sadness", "Anger"] num_classes = len(emotionsTest) actTypeToUse = ["impro"] emoTest = "".join([emo[0] for emo in actTypeToUse+emotionsTest]) dataname = "{}_{}".format(database, emoTest) dataDf = utils.load_IEMOCAP(dataPath, actTypeToUse, emotionsTest) # confusion matrix cmV = np.zeros((num_classes, num_classes), dtype=int) cmT = np.zeros((num_classes, num_classes), dtype=int) for sp_test in range(10): print("test speaker:", sp_test) (x_train, y_train, y_train_raw, x_val, y_val, y_val_raw, x_test, y_test, y_test_raw) = utils.Preprocess(sp_test, emotionsTest, dataDf, tMax, fMax) model = load_model("models/CNN/{}.h5".format(sp_test)) y_val_p = model.predict(x_val, batch_size=batch_size, verbose=1) y_val_p_C = utils.y_raw_transform(y_val_p, y_val_raw) cmV += confusion_matrix(y_val_raw[:, 0], y_val_p_C) y_test_p = model.predict(x_test, batch_size=batch_size, verbose=1) y_test_p_C = utils.y_raw_transform(y_test_p, y_test_raw) cmT += confusion_matrix(y_test_raw[:, 0], y_test_p_C) print("speaker: " + str(sp_test)) print(cmV) print(cmT) del model keras.backend.clear_session() # Evaluation waV, uaV = np.around(utils.waua(cmV), decimals=4) waT, uaT = np.around(utils.waua(cmT), decimals=4) cmpV = cmV / np.reshape(np.sum(cmV, 1), (4, 1)) cmpT = cmT / np.reshape(np.sum(cmT, 1), (4, 1)) imageName = "results/CNN/Test_{}_CNN_V.png".format(dataname) title = "wa={}, ua={}".format(waV, uaV) utils.plot_wauacm(title, cmpV, emotionsTest, imageName) imageName = "results/CNN/Test_{}_CNN_T.png".format(dataname) title = "wa={}, ua={}".format(waT, uaT) utils.plot_wauacm(title, cmpT, emotionsTest, imageName) print("waV: " + str(waV) + ", uaV: " + str(uaV)) print(cmpV) print("waT: " + str(waT) + ", uaT: " + str(uaT)) print(cmpT)
def main(): if not os.path.exists("results"): os.mkdir("results") if not os.path.exists("models"): os.mkdir("models") specPath = "spectrograms/" if socket.getfqdn(socket.gethostname()) == "d8": os.environ["CUDA_VISIBLE_DEVICES"] = "2,3" batch_size = 64 * 2 else: batch_size = 64 # parameters t_uplim = 200 # 2 s f_uplim = 400 # 4 kHz t_shift_test = 40 # 0.4 s # load the DataFrame of IEMOCAP dataDf = pd.read_csv(specPath + "IEMOCAP.csv", index_col=0) speakers = list(dataDf.speaker.unique()) emotionsTest = list(dataDf.emotion.unique()) num_classes = len(emotionsTest) # cross-validation cmTest = np.zeros((num_classes, num_classes)) # %% for sp_test in range(len(speakers)): speakers_train = speakers.copy() speaker_test = speakers_train.pop(sp_test) print("Test speaker: {}".format(speaker_test)) # preprocessing # test set (x_test, y_test, y_test_raw) = utils.prepro(specPath, dataDf, speaker_test, emotionsTest, "test", t_shift_test, f_uplim, t_uplim) # class weight K.clear_session() # %% test model = load_model("models/Inception_{}.h5".format(sp_test)) y_test_p = model.predict(x_test, batch_size=batch_size, verbose=1) cmTest += utils.y_raw_evaluate(y_test_p, y_test_raw) del model # %% print results waTest, uaTest = utils.waua(cmTest) # test set print("Results on test set: wa={}, ua={}".format(waTest, uaTest)) print(cmTest) title = "wa={}, ua={}".format(waTest, uaTest) imageName = "results/Inception_Test_T.png" cmTestP = cmTest / np.reshape(np.sum(cmTest, 1), (4, 1)) utils.plot_wauacm(title, cmTestP, emotionsTest, imageName, ".4f")
def SVM_Train(config, emotionsTest, features, params): C, gamma = params cm = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int) nSpeaker = len(features.index.unique('speaker')) sps_val = [0, 0, 2, 2, 4, 4, 6, 6, 8, 8] for sp in range(nSpeaker): print("Training with test speaker {}".format(sp)) (x_train_scaled, y_train, x_val_scaled, y_val, x_test_scaled, y_test) = preprocess(sp, sps_val[sp], features, emotionsTest) svm = SVC(class_weight="balanced", C=C, gamma=gamma) svm.fit(x_train_scaled, y_train) y_p = svm.predict(x_test_scaled) cm += confusion_matrix(y_test, y_p) modelfile = "models/SVM/{}_{}.sav".format(config, sp) pickle.dump(svm, open(modelfile, "wb")) wa, ua = waua(cm) cmp = cm / np.reshape(np.sum(cm, 1), (4, 1)) imageName = "results/SVM/Train_{}.png".format(config) wa, ua = np.around(waua(cm), decimals=4) title = "C={}, gamma={}, wa={}, ua={}".format(C, gamma, wa, ua) plot_wauacm(title, cmp, emotionsTest, imageName) print("wa: " + str(wa) + ", ua: " + str(ua)) print(cmp)
def main(): dataPath = "../../../Database/IEMOCAP_full_release/" if not os.path.exists("results"): os.mkdir("results") if not os.path.exists("results/CNN"): os.mkdir("results/CNN") if not os.path.exists("models"): os.mkdir("models") if not os.path.exists("models/CNN"): os.mkdir("models/CNN") if socket.getfqdn(socket.gethostname()) == "d8": os.environ["CUDA_VISIBLE_DEVICES"] = "2,3" # hyperparams dr = 0.3 lr = 0.006 tMax = 300 fMax = 400 input_shape = (tMax, fMax, 1) batch_size = 64 * 2 # IEMOCAP database = "IEMOCAP" emotionsTest = ["Neutral", "Happiness", "Sadness", "Anger"] num_classes = len(emotionsTest) actTypeToUse = ["impro"] emoTest = "".join([emo[0] for emo in actTypeToUse + emotionsTest]) dataname = "{}_{}".format(database, emoTest) dataDf = utils.load_IEMOCAP(dataPath, actTypeToUse, emotionsTest) # confusion matrix cmV = np.zeros((num_classes, num_classes), dtype=int) cmT = np.zeros((num_classes, num_classes), dtype=int) for sp_test in range(10): print("test speaker:", sp_test) (x_train, y_train, y_train_raw, x_val, y_val, y_val_raw, x_test, y_test, y_test_raw) = utils.Preprocess(sp_test, emotionsTest, dataDf, tMax, fMax) # set training parameters mc_cb = keras.callbacks.ModelCheckpoint('BestModel_CNN.h5', monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True) mc_es = keras.callbacks.EarlyStopping(monitor="acc", patience=10, verbose=1) # class weight cw = class_weight.compute_class_weight('balanced', np.unique(y_train_raw[:, 0]), y_train_raw[:, 0]) cw2 = {0: cw[0], 1: cw[1], 2: cw[2], 3: cw[3]} model = cnn_model(num_classes=num_classes, input_shape=input_shape, dr=dr, lr=lr) epochs = 16 model.fit(x_train, y_train, class_weight=cw2, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_val, y_val)) epochs = 50 model.fit(x_train, y_train, class_weight=cw2, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=[mc_cb, mc_es], validation_data=(x_val, y_val)) del mc_cb model.load_weights("BestModel_CNN.h5") y_val_p = model.predict(x_val, batch_size=batch_size, verbose=1) y_val_p_C = utils.y_raw_transform(y_val_p, y_val_raw) cmV += confusion_matrix(y_val_raw[:, 0], y_val_p_C) y_test_p = model.predict(x_test, batch_size=batch_size, verbose=1) y_test_p_C = utils.y_raw_transform(y_test_p, y_test_raw) cmT += confusion_matrix(y_test_raw[:, 0], y_test_p_C) print("speaker: " + str(sp_test)) print(cmV) print(cmT) model.save("models/CNN/{}.h5".format(sp_test)) del model keras.backend.clear_session() # Evaluation waV, uaV = np.around(utils.waua(cmV), decimals=4) waT, uaT = np.around(utils.waua(cmT), decimals=4) cmpV = cmV / np.reshape(np.sum(cmV, 1), (4, 1)) cmpT = cmT / np.reshape(np.sum(cmT, 1), (4, 1)) imageName = "results/CNN/Train_{}_CNN_V.png".format(dataname) title = "wa={}, ua={}".format(waV, uaV) utils.plot_wauacm(title, cmpV, emotionsTest, imageName) imageName = "results/CNN/Train_{}_CNN_T.png".format(dataname) title = "wa={}, ua={}".format(waT, uaT) utils.plot_wauacm(title, cmpT, emotionsTest, imageName) print("waV: " + str(waV) + ", uaV: " + str(uaV)) print(cmpV) print("waT: " + str(waT) + ", uaT: " + str(uaT)) print(cmpT)
def DNN_Train(features, emotionsTest, config): dr = 0.4 lr = 0.0002 batch_size = 128 epochs = 100 nSpeaker = len(features.index.unique('speaker')) cmV = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int) cmT = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int) for sp in range(nSpeaker): (x_train_scaled, y_train_C, x_val_scaled, y_val_C, x_test_scaled, y_test_C) = preprocess(sp, sp - 1, features, emotionsTest) num_classes = len(emotionsTest) y_train = keras.utils.to_categorical(y_train_C, num_classes) # y_test = keras.utils.to_categorical(y_test_C, num_classes) y_val = keras.utils.to_categorical(y_val_C, num_classes) # DNN model input_shape = (features.shape[1], ) mc_cb = keras.callbacks.ModelCheckpoint('BestModel_DNN.h5', monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True) es_cb = keras.callbacks.EarlyStopping(monitor="acc", patience=10, verbose=1) # class weight cw = class_weight.compute_class_weight('balanced', np.unique(y_train_C), y_train_C) cw2 = {0: cw[0], 1: cw[1], 2: cw[2], 3: cw[3]} tf.reset_default_graph() graph = tf.Graph() with graph.as_default() as g: with tf.Session(graph=g): model = make_model(num_classes=num_classes, input_shape=input_shape, dr=dr, lr=lr) # if socket.getfqdn(socket.gethostname()) == "d8": # model = multi_gpu_model(model, gpus=4) model.fit(x_train_scaled, y_train, class_weight=cw2, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=[mc_cb, es_cb], validation_data=(x_val_scaled, y_val)) # evaluation del mc_cb model.load_weights('BestModel_DNN.h5') y_val_p = model.predict(x_val_scaled, batch_size=batch_size, verbose=1) y_val_p_C = [emoC.argmax() for emoC in y_val_p] cmV += confusion_matrix(y_val_C, y_val_p_C, labels=list(range(num_classes))) y_test_p = model.predict(x_test_scaled, batch_size=batch_size, verbose=1) cmT += confusion_matrix(y_test_C, [emoC.argmax() for emoC in y_test_p], labels=list(range(num_classes))) print("speaker: " + str(sp)) print(cmV) print(cmT) model.save("models/DNN/{}_{}.h5".format(config, sp)) waV, uaV = np.around(waua(cmV), decimals=4) waT, uaT = np.around(waua(cmT), decimals=4) cmpV = cmV / np.reshape(np.sum(cmV, 1), (4, 1)) cmpT = cmT / np.reshape(np.sum(cmT, 1), (4, 1)) imageName = "results/DNN/Train_{}_V.png".format(config) title = "wa={}, ua={}".format(waV, uaV) plot_wauacm(title, cmpV, emotionsTest, imageName) imageName = "results/DNN/Train_{}_T.png".format(config) title = "wa={}, ua={}".format(waT, uaT) plot_wauacm(title, cmpT, emotionsTest, imageName) print("waV: " + str(waV) + ", uaV: " + str(uaV)) print(cmpV) print("waT: " + str(waT) + ", uaT: " + str(uaT)) print(cmpT)
def main(): if not os.path.exists("results"): os.mkdir("results") if not os.path.exists("models"): os.mkdir("models") specPath = "spectrograms/" if socket.getfqdn(socket.gethostname()) == "d8": os.environ["CUDA_VISIBLE_DEVICES"] = "2,3" batch_size = 64 * 2 else: batch_size = 64 # parameters t_uplim = 200 # 2 s f_uplim = 400 # 4 kHz t_shift_train = 100 # 1 s t_shift_test = 40 # 0.4 s # load the DataFrame of IEMOCAP dataDf = pd.read_csv(specPath + "IEMOCAP.csv", index_col=0) speakers = list(dataDf.speaker.unique()) emotionsTest = list(dataDf.emotion.unique()) num_classes = len(emotionsTest) # cross-validation cmTest = np.zeros((num_classes, num_classes)) # %% for sp_test in range(len(speakers)): speakers_train = speakers.copy() speaker_test = speakers_train.pop(sp_test) print("Test speaker: {}".format(speaker_test)) # preprocessing # train set (x_train, y_train, y_train_raw) = utils.prepro(specPath, dataDf, speakers_train, emotionsTest, "train", t_shift_train, f_uplim, t_uplim) # test set (x_test, y_test, y_test_raw) = utils.prepro(specPath, dataDf, speaker_test, emotionsTest, "test", t_shift_test, f_uplim, t_uplim) # class weight cw = class_weight.compute_class_weight('balanced', np.unique(y_train_raw[:, 0]), y_train_raw[:, 0]) cw2 = {0: cw[0], 1: cw[1], 2: cw[2], 3: cw[3]} K.clear_session() model = cnn_model(f_uplim, t_uplim, num_classes) lrs_cb = keras.callbacks.LearningRateScheduler(lrschedule, verbose=1) epochs = 50 model.compile(optimizer=SGD(lr=0.05, momentum=0.9, decay=0.0001, nesterov=True), loss="categorical_crossentropy", metrics=["accuracy"]) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, class_weight=cw2, callbacks=[lrs_cb], validation_data=(x_test, y_test)) # %% test y_test_p = model.predict(x_test, batch_size=batch_size, verbose=1) cmTest += utils.y_raw_evaluate(y_test_p, y_test_raw) model.save("models/Inception_{}.h5".format(sp_test)) del model # %% print results waTest, uaTest = utils.waua(cmTest) # test set print("Results on test set: wa={}, ua={}".format(waTest, uaTest)) print(cmTest) title = "wa={}, ua={}".format(waTest, uaTest) imageName = "results/Inception_Train_T.png" cmTestP = cmTest / np.reshape(np.sum(cmTest, 1), (4, 1)) utils.plot_wauacm(title, cmTestP, emotionsTest, imageName, ".4f")