def performCrossValidation(x_data, y_data): metrics_final = metrics.metrics(None, None, LABELS, 2) for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): from keras import backend as K K.clear_session() classes_train = pd.concat([y_data[train], pd.get_dummies(y_data[train])],axis=1).drop(['Prior_Emotion'],axis=1) classes_test = pd.concat([y_data[test], pd.get_dummies(y_data[test])],axis=1).drop(['Prior_Emotion'],axis=1) model = create_CNN_Model() # vectors_shaped = np.expand_dims(vectors[train], axis=2) # classes_train = np.expand_dims(classes_train, axis=1) history = model.fit([data_padded[train], data_padded[train]], [classes_train, vectors[train]], batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) predicted_emotions = [] predictions = model.predict([data_padded[test], data_padded[test]]) print(predictions[0][0]) for i in range(len(predictions[0])): index = np.argmax(predictions[0][i]) predicted_emotions.append(LABELS[index]) rounding_decimals = 2 metrics_fold = metrics.metrics(y_data[test], predicted_emotions, LABELS, 2) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], predicted_emotions) metrics_final.showResults() metrics_final.createMarkdownResults()
def performCrossValidation(x_data, y_data): metrics_final = metrics.metrics(None, None, LABELS, 2) for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) model = MaxEnt(input_shape, output_shape, 'softmax') model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=OPTIMIZER) model.fit(x_data[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) predicted_classes = [] loss, accuracy = model.evaluate(x_data[test], classes_test, verbose=0) print(accuracy) predictions = model.predict(x_data[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) # Show fold result metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], predicted_classes) # Testing with sklearn # logisticRegression = LogisticRegression(n_jobs=1, C=1e5, solver='lbfgs', multi_class='multinomial', max_iter=1000, penalty='l2').fit(x_data[train], y_data[train]) # predicted = logisticRegression.predict(x_data[test]) # print(classification_report(y_data[test], predicted, target_names=LABELS)) from keras import backend as K K.clear_session() print('\nFinal Result:') metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) return
def performCrossValidation(x_data, y_data): metrics_final = metrics.metrics(None, None, LABELS, 2) # i.e seed in {0..9} for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): # Create ohe-encoding classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) model = text_cnn_model_baseline(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax') model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) print('\nINFO: Training...') model.fit(x_data[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) predicted_classes = [] print('\nINFO: Evaluating fold...') predictions = model.predict(x_data[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) # Show fold result metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], predicted_classes) # Free memory from keras import backend as K K.clear_session() print('\nINFO: Final Result:') metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) return
def evalTrainAndTestSet(instances_padded, labels, instances_padded_test, labels_test): classes_train = pd.concat([labels, pd.get_dummies(labels)], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [labels_test, pd.get_dummies(labels_test)], axis=1).drop(['Prior_Emotion'], axis=1) model = text_cnn_model_baseline(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax') model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) print('\nINFO: Training...') model.fit(instances_padded, classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) # model.save('models/epoch_' + str(i) + '.h5') # from keras.models import load_model # model = load_model('models/epoch_' + str(i+9) + '.h5') # print('models/epoch_' + str(i+9) + '.h5') predicted_classes = [] print('\nINFO: Evaluating fold...') predictions = model.predict(instances_padded_test) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) # Show fold result metrics_fold = metrics.metrics(labels_test, predicted_classes, LABELS, 2) metrics_fold.showResults()
def evalTrainAndTestSet(x_train, x_test, y_train, y_test): classes_train = pd.concat([y_train, pd.get_dummies(y_train)],axis=1).drop(['Prior_Emotion'],axis=1) classes_test = pd.concat([y_test, pd.get_dummies(y_test)],axis=1).drop(['Prior_Emotion'],axis=1) model = shallowNN_emotions_from_dimensions(len(APPRAISALS), LAYER_DIM, DROPOUT, LABELS, 'softmax') model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # appraisals_shaped = np.expand_dims(appraisals, axis=2) print('\nTraining on %d instances...' % len(x_train)) if (args.quiet): for _ in range(EPOCHS): model.fit(x_train, classes_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: model.fit(x_train, classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) predicted_classes = [] print('\nTesting on %d instances...' % len(x_test)) # appraisals_shaped = np.expand_dims(appraisals, axis=2) predictions = model.predict(x_test) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) metrics_fold = metrics.metrics(y_test, predicted_classes, LABELS, 2) metrics_fold.showResults()
def performCrossValidation(x_data, y_data): metrics_final = metrics.metrics(None, None, LABELS, 2) for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): from keras import backend as K K.clear_session() classes_train = pd.concat([y_data[train], pd.get_dummies(y_data[train])],axis=1).drop(['Prior_Emotion'],axis=1) classes_test = pd.concat([y_data[test], pd.get_dummies(y_data[test])],axis=1).drop(['Prior_Emotion'],axis=1) model = shallowNN_emotions_from_dimensions(len(APPRAISALS), LAYER_DIM, DROPOUT, LABELS, 'softmax') model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # appraisals_shaped = np.expand_dims(appraisals[train], axis=2) print('\n\nTraining on %d instances...' % len(classes_train)) if (args.quiet): for _ in range(EPOCHS): model.fit(appraisals[train], classes_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: model.fit(appraisals[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) predicted_classes = [] print('\nEvaluating fold (%d instances)...' % len(y_data[test])) # appraisals_shaped = np.expand_dims(appraisals[test], axis=2) predictions = model.predict(appraisals[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test],predicted_classes) print('\nFinal Result:') metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE)
def performCrossValidation(x_data, y_data): percentage_done = 0 metrics_final = metrics.metrics(None, None, LABELS, 2) TP_total = 0 size_total = 0 TP_Baseline = 0 for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): K.clear_session() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) # Learn to predict emotins based on text (on enISEAR) text_to_emotion_model = text_cnn_model_baseline( MAX_SEQUENCE_LENGTH, 3116, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax') text_to_emotion_model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) text_to_emotion_model.fit(data_padded[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_text_to_emotion, verbose=VERBOSITY) # text_to_emotion_model.save('saved_models/baseline_' + str(percentage_done)) # # del text_to_emotion_model # # K.clear_session() # text_to_emotion_model = load_model('saved_models/baseline_' + str(percentage_done)) predicted_classes_text_to_emotion = [] predictions = text_to_emotion_model.predict(x_data[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_text_to_emotion.append(LABELS[index]) metrics_fold1 = metrics.metrics(y_data[test], predicted_classes_text_to_emotion, LABELS, 2) metrics_fold1.showResults() # # Learn to predict emotions from dimensions model1 = shallowNN_emotions_from_dimensions( len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax') model1.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) model1.fit(vectors[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_p1, verbose=VERBOSITY) # model1.save('saved_models/dim_to_emotion_' + str(percentage_done)) # model1 = load_model('saved_models/dim_to_emotion_' + str(percentage_done)) model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(LABELS), 'sigmoid') model.compile(OPTIMIZER, 'binary_crossentropy', metrics=['accuracy']) # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY, class_weight=class_weight) model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY) # model.save('saved_models/text_to_dim_' + str(percentage_done)) # model = load_model('saved_models/text_to_dim_' + str(percentage_done)) # predict dimensions on enISEAR preds = model.predict(data_padded[test]) results = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= 0.5): value = 1 else: value = 0 res.append(value) results.append(res) results = np.array(results) predicted_classes = [] predictions = model1.predict(results) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) preds = model.predict(data_padded) results = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= 0.5): value = 1 else: value = 0 res.append(value) results.append(res) results = np.array(results) # Predict emotions from predicted appraisals on enISEAR predicted_classes_train = [] predictions = model1.predict(results) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_train.append(LABELS[index]) print('enISEAR: T->A->E') metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() # Predict emotions from text on enISEAR predicted_classes_text_to_emotion_train = [] predictions = text_to_emotion_model.predict(data_padded) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_text_to_emotion_train.append(LABELS[index]) sentences = data_enISEAR classes = classes_enISEAR TP_text_to_emotion = 0 TP_text_to_appraisal_to_emotion = 0 FN = 0 train_sentences = [] pred_combined = [] labels_selector = [] for i in range(len(data_padded)): label_gold = classes_enISEAR.iloc[i] if (label_gold == predicted_classes_text_to_emotion_train[i] and label_gold == predicted_classes_train[i]): # Both models correct labels_selector.append([1, 1]) elif (label_gold == predicted_classes_train[i]): # Only appraisal model correct labels_selector.append([1, 0]) elif (label_gold == predicted_classes_text_to_emotion_train[i] ): # Only text-to-emotion model correct labels_selector.append([0, 1]) else: # Both models predicted the wrong emotion labels_selector.append([0, 0]) # print(len(labels_selector)) labels_selector = np.array(labels_selector) selector_model = text_cnn_model_selector( MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'sigmoid') selector_model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=OPTIMIZER) selector_model.fit(data_padded, labels_selector, batch_size=32, epochs=EPOCHS_SELECTION_MODEL, verbose=VERBOSITY, class_weight=class_weight_selector_model) TP_text_to_emotion = 0 TP_text_to_appraisal_to_emotion = 0 FN = 0 selection_ = [] selections = selector_model.predict(x_data[test]) for i in range(len(selections)): selection_.append(np.argmax(selections[i])) # selection_.append(selection[i]) # print(selection_[i]) # print(selection[i]) sentences = sentence_enISEAR_raw[test] classes = classes_enISEAR[test] pred_combined = [] labels_selector = [] for i in range(len(x_data[test])): label_gold = classes.iloc[i] if (selection_[i] == 1): TP_text_to_emotion += 1 pred_combined.append(predicted_classes_text_to_emotion[i]) elif (selection_[i] == 0): # Appraisal system is correct TP_text_to_appraisal_to_emotion += 1 # = TN pred_combined.append(predicted_classes[i]) else: FN += 1 # pred_combined.append(predicted_classes_text_to_emotion[i]) percentage_done += 1 print('\nPerforming CV... (' + str(percentage_done) + "%)") print('Selected from Baseline : ' + str(TP_text_to_emotion)) print('Selected from Pipeline : ' + str(TP_text_to_appraisal_to_emotion)) metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS, 2) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], pred_combined) # TP_total += TP # size_total += size print('\nFinal Result:') metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) return
def performCrossValidation(x_data, y_data): percentage_done = 0 metrics_final = metrics.metrics(None, None, LABELS, 2) TP_total = 0 size_total = 0 for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): from keras import backend as K K.clear_session() tf.reset_default_graph() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) #################################################################### # The problem with this orcale setup is that the models somehow # influence each other if they are running in the same process # on the GPU. A workaround is to train the baseline model on the # folds and save all model weights locally. Then do the same for # the pipeline. Afterwards load the saved weights and use them to # predict the emotions. # This means 10*10 models will be saved (10 folds times 10 runs) #################################################################### #################################################################### # Uncomment this to create and save the baseline model weights #################################################################### text_to_emotion_model = text_cnn_model_baseline( MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax') text_to_emotion_model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) text_to_emotion_model.fit(x_data[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_text_to_emotion, verbose=VERBOSITY) text_to_emotion_model.save('baseline_' + str(percentage_done)) #################################################################### # This will load the baseline model weights #################################################################### text_to_emotion_model = load_model('baseline_' + str(percentage_done)) # Evaluate baseline model predicted_classes_text_to_emotion = [] predictions = text_to_emotion_model.predict(x_data[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_text_to_emotion.append(LABELS[index]) metrics_fold = metrics.metrics(y_data[test], predicted_classes_text_to_emotion, LABELS, 2) metrics_fold.showResults() #################################################################### # Uncomment this to create and save the pipeline model weights #################################################################### appraisal_emotion_predictor = shallowNN_emotions_from_dimensions( len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax') appraisal_emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) appraisal_emotion_predictor.fit(vectors[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_p1, verbose=VERBOSITY) # Save weights appraisal_emotion_predictor.save('dim_to_emotion_' + str(percentage_done)) input_shape = sentence_enISEAR.shape[1] # feature count model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'sigmoid') model.compile(OPTIMIZER, 'binary_crossentropy', metrics=['accuracy']) # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY) model.fit(x_data[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY, class_weight=class_weight) model.save('text_to_dim_' + str(percentage_done)) # Load models appraisal_emotion_predictor = load_model('dim_to_emotion_' + str(percentage_done)) model = load_model('text_to_dim_' + str(percentage_done)) # predict dimensions from ISEAR weights = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] preds = model.predict(data_padded[test]) results = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) results = np.array(results) predicted_classes = [] predictions = appraisal_emotion_predictor.predict(results) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() sentences = sentence_enISEAR_raw[test] classes = classes_enISEAR[test] TP_text_to_emotion = 0 TP_text_to_appraisal_to_emotion = 0 FN = 0 pred_combined = [] for i in range(len(x_data[test])): label_gold = classes.iloc[i] if (label_gold == predicted_classes_text_to_emotion[i]): # Text based system is correct TP_text_to_emotion += 1 # green('\n') # green(i) # green('Gold-Emotion : ' + label_gold) # green('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # green('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes_text_to_emotion[i]) elif (label_gold == predicted_classes[i]): # Appraisal system is correct TP_text_to_appraisal_to_emotion += 1 # = TN # yellow('\n') # yellow(i) # yellow('Gold-Emotion : ' + label_gold) # yellow('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # yellow('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes[i]) else: FN += 1 # # print('\nGold-Emotion : ' + label_gold) # print('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # print('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes_text_to_emotion[i]) percentage_done += 1 print('\nPerforming CV... (%2d%%)' % percentage_done) size = len(predicted_classes) TP = TP_text_to_emotion + TP_text_to_appraisal_to_emotion accuracy = (TP / size) * 100 print('Current fold:') print('Accuracy: %2.2f' % accuracy) print('Text-to-emotion TP : %2d' % TP_text_to_emotion) print('Text-to-appr-to-emotion TP : %2d' % TP_text_to_appraisal_to_emotion) metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS, 2) # metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], pred_combined) TP_total += TP size_total += size print('\n\nFinal Result:') accuracy = ((TP_total) / size_total) * 100 print('Accuracy: %2.2f' % accuracy) print(TP_total) print(size_total) metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) return
def testModel(x_test, y_test): _metrics = metrics.metrics(None, None, APPRAISALS, 2) if (not args.loadmodel[0].endswith('.h5')): args.loadmodel[0] += '.h5' try: appraisal_predictor = load_model(args.loadmodel[0]) print('INFO: Loaded appraisal prediction model weights from %s' % args.loadmodel[0]) except: print('\nUnexpected error:', sys.exc_info()[1]) sys.exit(1) if (not args.loadmodel[1].endswith('.h5')): args.loadmodel[1] += '.h5' try: emotion_predictor = load_model(args.loadmodel[1]) print('INFO: Loaded emotion prediction model weights from %s' % args.loadmodel[1]) except: print('\nUnexpected error:', sys.exc_info()[1]) sys.exit(1) print('\nINFO: Testing on %d instances...' % len(x_test)) weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] preds = appraisal_predictor.predict(x_test) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) if (args.continous == False): # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] preds = appraisal_predictor.predict(x_test) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) appraisal_predictions = np.array(results) else: preds = appraisal_predictor.predict(text_instances_padded_test) appraisal_predictions = np.array(results) emotions_predictions = emotion_predictor.predict(x_test) predicted_emotions = [] for i in range(len(emotions_predictions)): index = np.argmax(emotions_predictions[i]) predicted_emotions.append(LABELS[index]) _metrics = metrics.metrics(y_test, predicted_emotions, LABELS, 2) _metrics.showResults()
def evalTrainAndTestSet(text_instances_padded, text_instances_padded_test, appraisals_train, appraisals_test, class_labels_train, class_labels_test): emotions_train = pd.concat([class_labels_train, pd.get_dummies(class_labels_train)],axis=1).drop(['Prior_Emotion'],axis=1) emotions_test = pd.concat([class_labels_test, pd.get_dummies(class_labels_test)],axis=1).drop(['Prior_Emotion'],axis=1) print('\nINFO: Learning to predict emototions on %d instances...' % len(text_instances_padded)) emotion_predictor = shallowNN_emotions_from_dimensions( len(APPRAISALS), CONV_FILTERS, DROPOUT, LABELS, 'softmax') emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) if (args.quiet): for _ in range(EPOCHS_E): emotion_predictor.fit(appraisals_train, emotions_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: emotion_predictor.fit(appraisals_train, emotions_train, batch_size=BATCH_SIZE, epochs=EPOCHS_E, verbose=VERBOSITY) if (args.continous): activation = 'linear' # Use linear activation activation = 'sigmoid' # Use sigmoid activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric) # appraisals_shaped = np.expand_dims(appraisals, axis=2) print('\nINFO: Learning to predict appraisals on %d instances...' % len(text_instances_padded)) if (args.quiet): for _ in range(EPOCHS_A): appraisal_predictor.fit(text_instances_padded, appraisals_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: appraisal_predictor.fit(text_instances_padded,appraisals_train, batch_size=BATCH_SIZE, epochs=EPOCHS_A, verbose=VERBOSITY) print('\nINFO: Testing on %d instances...' % len(class_labels_test)) if (args.continous == False): # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] preds = appraisal_predictor.predict(text_instances_padded_test) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) appraisal_predictions = np.array(results) else: preds = appraisal_predictor.predict(text_instances_padded_test) appraisal_predictions = np.array(preds) # Predict emotions based on appraisal predictions predicted_emotions = [] # results = np.expand_dims(results, axis=2) emotions_predictions = emotion_predictor.predict(appraisal_predictions) for i in range(len(emotions_predictions)): index = np.argmax(emotions_predictions[i]) predicted_emotions.append(LABELS[index]) _metrics = metrics.metrics(class_labels_test, predicted_emotions, LABELS, 2) _metrics.showResults()
def performCrossValidation(x_data, y_data_appraisal, y_data_emotion): metrics_final = metrics.metrics(None, None, LABELS, 2) percentage_done = 1 for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data_emotion): K.clear_session() emotions_train = pd.concat([y_data_emotion[train], pd.get_dummies(y_data_emotion[train])],axis=1).drop(['Prior_Emotion'],axis=1) emotions_test = pd.concat([y_data_emotion[test], pd.get_dummies(y_data_emotion[test])],axis=1).drop(['Prior_Emotion'],axis=1) #################################################################### # Task 1 : Learn to predict appraisals from text #################################################################### if (args.continous): activation = 'linear' # Use linear activation # activation = 'sigmoid' # Use linear activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] print('\nINFO: Learning to predict appraisals from text...') appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric) if (args.quiet): for _ in range(EPOCHS_A): appraisal_predictor.fit(text_instances_padded[train], appraisals[train], batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY, class_weight=class_weight) print('.', end='', flush=True) else: appraisal_predictor.fit(text_instances_padded[train], appraisals[train], batch_size=BATCH_SIZE, epochs=EPOCHS_A, verbose=VERBOSITY, class_weight=class_weight) if (args.continous == False): # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] test_instances = text_instances_padded[test] preds = appraisal_predictor.predict(test_instances) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) appraisal_predictions = np.array(results) else: test_instances = text_instances_padded[test] appraisal_test = appraisals[test] preds = appraisal_predictor.predict(test_instances) # for i in range(len(preds)): # print('\n Predicted:', preds[i]) # print(' Correct: ', appraisal_test[i]) appraisal_predictions = np.array(preds) #################################################################### # Task 2 : Learn to predict emotions from appraisals #################################################################### print('\nINFO: Learning to predict emotions from appraisals...') emotion_predictor = shallowNN_emotions_from_dimensions( len(APPRAISALS), CONV_FILTERS, DROPOUT, LABELS, 'softmax') emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) if (args.quiet): for _ in range(EPOCHS_E): emotion_predictor.fit(appraisals[train], emotions_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: emotion_predictor.fit(appraisals[train], emotions_train, batch_size=BATCH_SIZE, epochs=EPOCHS_E, verbose=VERBOSITY) # Predict emotions based on appraisal predictions predicted_emotions = [] # results = np.expand_dims(results, axis=2) emotions_predictions = emotion_predictor.predict(appraisal_predictions) for i in range(len(emotions_predictions)): index = np.argmax(emotions_predictions[i]) predicted_emotions.append(LABELS[index]) # Show results print('\n\nINFO: Evaluating CV-fold...') metrics_fold = metrics.metrics(y_data_emotion[test], predicted_emotions, LABELS, 2) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data_emotion[test], predicted_emotions) metrics_final.showResults() metrics_final.showConfusionMatrix(False)
def performCrossValidation(x_data, y_data): metrics_final = metrics.metrics(None, None, LABELS, 2) for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): from keras import backend as K K.clear_session() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) input_shape = sentence_enISEAR.shape[1] # feature count print(input_shape) #################################################################### # Task 1 : Learn to predict appraisals from text #################################################################### print('Learning to predict dimensions from text') appraisal_predictor = Sequential() appraisal_predictor.add( Dense(7, input_shape=(input_shape, ), activity_regularizer=regularizers.l2(0.01))) appraisal_predictor.add(Activation('sigmoid')) appraisal_predictor.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam') appraisal_predictor.fit(sentence_enISEAR[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_TA, verbose=VERBOSITY, class_weight=class_weight) # weights = [0.50, 0.51, 0.485, 0.485, 0.475, 0.475, 0.485] # weights = [0.51, 0.5125, 0.475, 0.475, 0.50, 0.4750, 0.495] weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] preds = appraisal_predictor.predict(sentence_enISEAR[test]) predicted_appraisals = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) predicted_appraisals.append(res) predicted_appraisals = np.array(predicted_appraisals) #################################################################### # Task 2 : Learn to predict emotions from appraisals #################################################################### print('Learning to predict emotions from dimensions') emotion_predictor = Sequential() emotion_predictor.add( Dense(7, input_shape=(7, ), activity_regularizer=regularizers.l2(0.01))) emotion_predictor.add(Activation('softmax')) # Softmax regression emotion_predictor.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=OPTIMIZER) emotion_predictor.fit(x_data[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_AE, verbose=VERBOSITY) predicted_emotions = [] predictions = emotion_predictor.predict(predicted_appraisals) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_emotions.append(LABELS[index]) rounding_decimals = 2 metrics_fold = metrics.metrics(y_data[test], predicted_emotions, LABELS, rounding_decimals) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], predicted_emotions) print('\nFinal Result:') metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) return
def performCrossValidation(x_data, y_data): metrics_final = metrics.metrics(None, None, LABELS, 2) TP_total = 0 size_total = 0 percentage_done = 0 for seed in range(ROUNDS): np.random.seed(seed) # Create baseline model weights kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): print('Training Baseline model %i/%i' % (percentage_done + 1, KFOLDS * ROUNDS)) from keras import backend as K K.clear_session() # tf.reset_default_graph() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) text_to_emotion_model = text_cnn_model_baseline( MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax') text_to_emotion_model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) text_to_emotion_model.fit(x_data[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_text_to_emotion, verbose=VERBOSITY) text_to_emotion_model.save('saved_models/baseline_' + str(percentage_done)) percentage_done += 1 percentage_done = 0 for seed in range(ROUNDS): np.random.seed(seed) # Create pipeline model weights kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): print('Training Pipeline model %i/%i' % (percentage_done + 1, KFOLDS * ROUNDS)) from keras import backend as K K.clear_session() # tf.reset_default_graph() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) appraisal_emotion_predictor = shallowNN_emotions_from_dimensions( len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax') appraisal_emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) appraisal_emotion_predictor.fit(vectors[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_p1, verbose=VERBOSITY) # Save weights appraisal_emotion_predictor.save('saved_models/dim_to_emotion_' + str(percentage_done)) input_shape = sentence_enISEAR.shape[1] # feature count model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(DIMENSIONS), 'sigmoid') model.compile(OPTIMIZER, 'binary_crossentropy', metrics=['accuracy']) # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY) model.fit(x_data[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY, class_weight=class_weight) model.save('saved_models/text_to_dim_' + str(percentage_done)) percentage_done += 1 percentage_done = 0 for seed in range(ROUNDS): np.random.seed(seed) # Evaluate models kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): print( '\n\n############################################################' ) print('Evaluating fold %i/%i in run %i/%i' % ((percentage_done + 1) % 10, KFOLDS, seed + 1, ROUNDS)) print( '############################################################') from keras import backend as K K.clear_session() tf.reset_default_graph() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) # Load the baseline model weights text_to_emotion_model = load_model('saved_models/baseline_' + str(percentage_done)) # Evaluate baseline model predicted_classes_text_to_emotion = [] predictions = text_to_emotion_model.predict(x_data[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_text_to_emotion.append(LABELS[index]) # Show results of baseline print( '\n############################################################' ) print('Baseline result:') print( '############################################################') metrics_fold = metrics.metrics(y_data[test], predicted_classes_text_to_emotion, LABELS, 2) metrics_fold.showResults() # Load appraisal models appraisal_emotion_predictor = load_model( 'saved_models/dim_to_emotion_' + str(percentage_done)) model = load_model('saved_models/text_to_dim_' + str(percentage_done)) # Predict Appraisals from ISEAR weights = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] preds = model.predict(data_padded[test]) results = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) results = np.array(results) # Predict Emotions based on predicted Appraisals predicted_classes = [] predictions = appraisal_emotion_predictor.predict(results) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) # Show results of pipeline print( '\n############################################################' ) print('Pipeline result:') print( '############################################################') metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() # Evaluate models and create oracle 'prediction' sentences = sentence_enISEAR_raw[test] classes = classes_enISEAR[test] TP_text_to_emotion = 0 TP_text_to_appraisal_to_emotion = 0 FN = 0 pred_combined = [] for i in range(len(x_data[test])): label_gold = classes.iloc[i] if (label_gold == predicted_classes_text_to_emotion[i]): # Text based system is correct TP_text_to_emotion += 1 # green('\n') # green(i) # green('Gold-Emotion : ' + label_gold) # green('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # green('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes_text_to_emotion[i]) elif (label_gold == predicted_classes[i]): # Appraisal system is correct TP_text_to_appraisal_to_emotion += 1 # = TN # yellow('\n') # yellow(i) # yellow('Gold-Emotion : ' + label_gold) # yellow('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # yellow('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes[i]) else: FN += 1 # # print('\nGold-Emotion : ' + label_gold) # print('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # print('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes_text_to_emotion[i]) percentage_done += 1 print('\nPerforming CV... (%2d%%)' % percentage_done) size = len(predicted_classes) TP = TP_text_to_emotion + TP_text_to_appraisal_to_emotion accuracy = (TP / size) * 100 print('Current fold:') print('Accuracy: %2.2f' % accuracy) print('Text-to-emotion TP : %2d' % TP_text_to_emotion) print('Text-to-appr-to-emotion TP : %2d' % TP_text_to_appraisal_to_emotion) # Evaluate oracle 'prediction' metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS, 2) print( '\n############################################################' ) print('Oracle result:') print( '############################################################') metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], pred_combined) TP_total += TP size_total += size print('\nFinal Result:') accuracy = ((TP_total) / size_total) * 100 print('Accuracy: %2.2f' % accuracy) print(TP_total) print(size_total) metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) metrics_final.createMarkdownResults() return