def trainAndSaveModel(x_train, y_train): if (args.continous): activation = 'linear' # Use linear activation # activation = 'sigmoid' # Use linear activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric) # appraisals_shaped = np.expand_dims(appraisals, axis=2) print('\nINFO: Training on %d instances...' % len(x_train)) if (args.quiet): for _ in range(EPOCHS): appraisal_predictor.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: appraisal_predictor.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) if (not args.savemodel.endswith('.h5')): print('\nINFO: Your model does not end with ".h5".') print(' Automatically appending file extension.') args.savemodel += '.h5' appraisal_predictor.save(args.savemodel) print('\nSUCCESS: Saved model to %s' % args.savemodel)
def evalTrainAndTestSet(x_train, x_test, y_train, y_test): if (args.continous): _reg_metrics = reg_metrics.metrics_regression(APPRAISALS, 2) activation = 'linear' # Use linear activation # activation = 'sigmoid' # Use linear activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: _metrics = metrics.metrics(APPRAISALS, 2) activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric) # appraisals_shaped = np.expand_dims(appraisals, axis=2) print('\nINFO: Training on %d instances...' % len(x_train)) if (args.quiet): for _ in range(EPOCHS): appraisal_predictor.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: appraisal_predictor.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) print('\nINFO: Testing on %d instances...' % len(x_test)) if (args.continous==False): # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] preds = appraisal_predictor.predict(x_test) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) appraisal_predictions = np.array(results) _metrics.evaluateFold(appraisal_predictions, y_test) else: preds = appraisal_predictor.predict(x_test) # for i in range(len(preds)): # print('\n Predicted:', preds[i]) # print(' Correct: ', appraisal_test[i]) appraisal_predictions = np.array(preds) _reg_metrics.evaluateFold(appraisal_predictions, y_test) if (args.savemodel): if (not args.savemodel.endswith('.h5')): args.savemodel += '.h5' model.save(args.savemodel) print('\nINFO: Saved model to %s' % args.savemodel)
def trainAndSaveModel(x_train, y_appraisal, y_emotion): if (args.continous): activation = 'linear' # Use linear activation activation = 'sigmoid' # Use sigmoid activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) model.compile(OPTIMIZER, loss, metrics=metric) # appraisals_shaped = np.expand_dims(appraisals, axis=2) print('\nINFO: Learning to predict appraisals on %d instances...' % len(x_train)) if (args.quiet): for _ in range(EPOCHS_A): model.fit(x_train, y_appraisal, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: model.fit(x_train, y_appraisal, batch_size=BATCH_SIZE, epochs=EPOCHS_A, verbose=VERBOSITY) if (not args.savemodel[0].endswith('.h5')): print('\nINFO: Your appraisal model does not end with ".h5".') print(' Automatically appending file extension.') args.savemodel[0] += '.h5' model.save(args.savemodel[0]) print('\nINFO: Saved appraisal prediction model to %s' % args.savemodel[0]) emotion_predictor = shallowNN_emotions_from_dimensions( len(APPRAISALS), CONV_FILTERS, DROPOUT, LABELS, 'softmax') emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) emotions_train = pd.concat([y_emotion, pd.get_dummies(y_emotion)],axis=1).drop(['Prior_Emotion'],axis=1) print('\nINFO: Learning to predict emototions on %d instances...' % len(text_instances_padded)) if (args.quiet): for _ in range(EPOCHS_E): emotion_predictor.fit(y_appraisal, emotions_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: emotion_predictor.fit(y_appraisal, emotions_train, batch_size=BATCH_SIZE, epochs=EPOCHS_E, verbose=VERBOSITY) if (not args.savemodel[1].endswith('.h5')): print('\nINFO: Your emotion model does not end with ".h5".') print(' Automatically appending file extension.') args.savemodel[1] += '.h5' emotion_predictor.save(args.savemodel[1]) print('\nINFO: Saved emotion prediction model to %s' % args.savemodel[1])
def performCrossValidation(x_data, y_data): percentage_done = 0 metrics_final = metrics.metrics(None, None, LABELS, 2) TP_total = 0 size_total = 0 TP_Baseline = 0 for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): K.clear_session() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) # Learn to predict emotins based on text (on enISEAR) text_to_emotion_model = text_cnn_model_baseline( MAX_SEQUENCE_LENGTH, 3116, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax') text_to_emotion_model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) text_to_emotion_model.fit(data_padded[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_text_to_emotion, verbose=VERBOSITY) # text_to_emotion_model.save('saved_models/baseline_' + str(percentage_done)) # # del text_to_emotion_model # # K.clear_session() # text_to_emotion_model = load_model('saved_models/baseline_' + str(percentage_done)) predicted_classes_text_to_emotion = [] predictions = text_to_emotion_model.predict(x_data[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_text_to_emotion.append(LABELS[index]) metrics_fold1 = metrics.metrics(y_data[test], predicted_classes_text_to_emotion, LABELS, 2) metrics_fold1.showResults() # # Learn to predict emotions from dimensions model1 = shallowNN_emotions_from_dimensions( len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax') model1.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) model1.fit(vectors[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_p1, verbose=VERBOSITY) # model1.save('saved_models/dim_to_emotion_' + str(percentage_done)) # model1 = load_model('saved_models/dim_to_emotion_' + str(percentage_done)) model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(LABELS), 'sigmoid') model.compile(OPTIMIZER, 'binary_crossentropy', metrics=['accuracy']) # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY, class_weight=class_weight) model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY) # model.save('saved_models/text_to_dim_' + str(percentage_done)) # model = load_model('saved_models/text_to_dim_' + str(percentage_done)) # predict dimensions on enISEAR preds = model.predict(data_padded[test]) results = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= 0.5): value = 1 else: value = 0 res.append(value) results.append(res) results = np.array(results) predicted_classes = [] predictions = model1.predict(results) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) preds = model.predict(data_padded) results = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= 0.5): value = 1 else: value = 0 res.append(value) results.append(res) results = np.array(results) # Predict emotions from predicted appraisals on enISEAR predicted_classes_train = [] predictions = model1.predict(results) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_train.append(LABELS[index]) print('enISEAR: T->A->E') metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() # Predict emotions from text on enISEAR predicted_classes_text_to_emotion_train = [] predictions = text_to_emotion_model.predict(data_padded) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_text_to_emotion_train.append(LABELS[index]) sentences = data_enISEAR classes = classes_enISEAR TP_text_to_emotion = 0 TP_text_to_appraisal_to_emotion = 0 FN = 0 train_sentences = [] pred_combined = [] labels_selector = [] for i in range(len(data_padded)): label_gold = classes_enISEAR.iloc[i] if (label_gold == predicted_classes_text_to_emotion_train[i] and label_gold == predicted_classes_train[i]): # Both models correct labels_selector.append([1, 1]) elif (label_gold == predicted_classes_train[i]): # Only appraisal model correct labels_selector.append([1, 0]) elif (label_gold == predicted_classes_text_to_emotion_train[i] ): # Only text-to-emotion model correct labels_selector.append([0, 1]) else: # Both models predicted the wrong emotion labels_selector.append([0, 0]) # print(len(labels_selector)) labels_selector = np.array(labels_selector) selector_model = text_cnn_model_selector( MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'sigmoid') selector_model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=OPTIMIZER) selector_model.fit(data_padded, labels_selector, batch_size=32, epochs=EPOCHS_SELECTION_MODEL, verbose=VERBOSITY, class_weight=class_weight_selector_model) TP_text_to_emotion = 0 TP_text_to_appraisal_to_emotion = 0 FN = 0 selection_ = [] selections = selector_model.predict(x_data[test]) for i in range(len(selections)): selection_.append(np.argmax(selections[i])) # selection_.append(selection[i]) # print(selection_[i]) # print(selection[i]) sentences = sentence_enISEAR_raw[test] classes = classes_enISEAR[test] pred_combined = [] labels_selector = [] for i in range(len(x_data[test])): label_gold = classes.iloc[i] if (selection_[i] == 1): TP_text_to_emotion += 1 pred_combined.append(predicted_classes_text_to_emotion[i]) elif (selection_[i] == 0): # Appraisal system is correct TP_text_to_appraisal_to_emotion += 1 # = TN pred_combined.append(predicted_classes[i]) else: FN += 1 # pred_combined.append(predicted_classes_text_to_emotion[i]) percentage_done += 1 print('\nPerforming CV... (' + str(percentage_done) + "%)") print('Selected from Baseline : ' + str(TP_text_to_emotion)) print('Selected from Pipeline : ' + str(TP_text_to_appraisal_to_emotion)) metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS, 2) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], pred_combined) # TP_total += TP # size_total += size print('\nFinal Result:') metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) return
def performCrossValidation(x_data, y_data): percentage_done = 0 metrics_final = metrics.metrics(None, None, LABELS, 2) TP_total = 0 size_total = 0 for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): from keras import backend as K K.clear_session() tf.reset_default_graph() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) #################################################################### # The problem with this orcale setup is that the models somehow # influence each other if they are running in the same process # on the GPU. A workaround is to train the baseline model on the # folds and save all model weights locally. Then do the same for # the pipeline. Afterwards load the saved weights and use them to # predict the emotions. # This means 10*10 models will be saved (10 folds times 10 runs) #################################################################### #################################################################### # Uncomment this to create and save the baseline model weights #################################################################### text_to_emotion_model = text_cnn_model_baseline( MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax') text_to_emotion_model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) text_to_emotion_model.fit(x_data[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_text_to_emotion, verbose=VERBOSITY) text_to_emotion_model.save('baseline_' + str(percentage_done)) #################################################################### # This will load the baseline model weights #################################################################### text_to_emotion_model = load_model('baseline_' + str(percentage_done)) # Evaluate baseline model predicted_classes_text_to_emotion = [] predictions = text_to_emotion_model.predict(x_data[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_text_to_emotion.append(LABELS[index]) metrics_fold = metrics.metrics(y_data[test], predicted_classes_text_to_emotion, LABELS, 2) metrics_fold.showResults() #################################################################### # Uncomment this to create and save the pipeline model weights #################################################################### appraisal_emotion_predictor = shallowNN_emotions_from_dimensions( len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax') appraisal_emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) appraisal_emotion_predictor.fit(vectors[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_p1, verbose=VERBOSITY) # Save weights appraisal_emotion_predictor.save('dim_to_emotion_' + str(percentage_done)) input_shape = sentence_enISEAR.shape[1] # feature count model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'sigmoid') model.compile(OPTIMIZER, 'binary_crossentropy', metrics=['accuracy']) # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY) model.fit(x_data[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY, class_weight=class_weight) model.save('text_to_dim_' + str(percentage_done)) # Load models appraisal_emotion_predictor = load_model('dim_to_emotion_' + str(percentage_done)) model = load_model('text_to_dim_' + str(percentage_done)) # predict dimensions from ISEAR weights = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] preds = model.predict(data_padded[test]) results = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) results = np.array(results) predicted_classes = [] predictions = appraisal_emotion_predictor.predict(results) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() sentences = sentence_enISEAR_raw[test] classes = classes_enISEAR[test] TP_text_to_emotion = 0 TP_text_to_appraisal_to_emotion = 0 FN = 0 pred_combined = [] for i in range(len(x_data[test])): label_gold = classes.iloc[i] if (label_gold == predicted_classes_text_to_emotion[i]): # Text based system is correct TP_text_to_emotion += 1 # green('\n') # green(i) # green('Gold-Emotion : ' + label_gold) # green('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # green('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes_text_to_emotion[i]) elif (label_gold == predicted_classes[i]): # Appraisal system is correct TP_text_to_appraisal_to_emotion += 1 # = TN # yellow('\n') # yellow(i) # yellow('Gold-Emotion : ' + label_gold) # yellow('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # yellow('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes[i]) else: FN += 1 # # print('\nGold-Emotion : ' + label_gold) # print('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # print('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes_text_to_emotion[i]) percentage_done += 1 print('\nPerforming CV... (%2d%%)' % percentage_done) size = len(predicted_classes) TP = TP_text_to_emotion + TP_text_to_appraisal_to_emotion accuracy = (TP / size) * 100 print('Current fold:') print('Accuracy: %2.2f' % accuracy) print('Text-to-emotion TP : %2d' % TP_text_to_emotion) print('Text-to-appr-to-emotion TP : %2d' % TP_text_to_appraisal_to_emotion) metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS, 2) # metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], pred_combined) TP_total += TP size_total += size print('\n\nFinal Result:') accuracy = ((TP_total) / size_total) * 100 print('Accuracy: %2.2f' % accuracy) print(TP_total) print(size_total) metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) return
def annotatePredictedAppraisals(text_instances_padded, text_instances_padded_annotate, appraisals): print('INFO: Annotating Dataset') if (args.continous): _reg_metrics = reg_metrics.metrics_regression(APPRAISALS, 2) activation = 'linear' # Use linear activation # activation = 'sigmoid' # Use linear activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: _metrics = metrics.metrics(APPRAISALS, 2) activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] if (args.loadmodel): if (not args.loadmodel.endswith('.h5')): args.loadmodel += '.h5' try: appraisal_predictor = load_model(args.loadmodel) print('INFO: Loaded pre-trained model: %s' % args.loadmodel) except: print('\nUnexpected error:', sys.exc_info()[1]) sys.exit(1) else: appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric) appraisal_predictor.fit(text_instances_padded, appraisals, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY) if (args.continous==False): weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] preds = appraisal_predictor.predict(text_instances_padded_annotate) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) appraisal_predictions = list(results) else: preds = appraisal_predictor.predict(text_instances_padded_annotate) appraisal_predictions = list(preds) if (args.annotate.endswith('.tsv')): extension = '.tsv' elif(args.annotate.endswith('.csv')): extension = '.csv' out_file_name = args.annotate[:len(args.annotate)-4] + '_appraisals' + extension first_line = True with open(out_file_name, 'w') as out_file: with open(args.annotate, 'r') as in_file: for i, line in enumerate(in_file): if (first_line): annotation = '' for dimension in APPRAISALS: annotation += sep + dimension out_file.write(line.rstrip('\n') + str(annotation) + '\n') first_line = False else: annotation = '' for p in range(len(APPRAISALS)): annotation += sep + str(appraisal_predictions[i-1][p]) out_file.write(line.rstrip('\n') + annotation + '\n') print('INFO: Created dataset with appraisal annotation: %s' % out_file_name)
def performCrossValidation(x_data, y_data): if (args.continous): _reg_metrics = reg_metrics.metrics_regression(APPRAISALS, 2) else: _metrics = metrics.metrics(APPRAISALS, 2) for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): K.clear_session() if (args.continous): activation = 'linear' # Use linear activation # activation = 'sigmoid' # Use linear activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric) if (args.quiet): for _ in range(EPOCHS): appraisal_predictor.fit(text_instances_padded[train], appraisals[train], batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY, class_weight=class_weight) print('.', end='', flush=True) else: appraisal_predictor.fit(text_instances_padded[train], appraisals[train], batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=VERBOSITY, class_weight=class_weight) if (args.continous==False): # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] test_instances = text_instances_padded[test] preds = appraisal_predictor.predict(test_instances) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) appraisal_predictions = np.array(results) _metrics.evaluateFold(appraisal_predictions, appraisals[test]) else: test_instances = text_instances_padded[test] appraisal_test = appraisals[test] preds = appraisal_predictor.predict(test_instances) # for i in range(len(preds)): # print('\n Predicted:', preds[i]) # print(' Correct: ', appraisal_test[i]) appraisal_predictions = np.array(preds) _reg_metrics.evaluateFold(appraisal_predictions, appraisals[test]) if (args.continous == False): if (args.format): _metrics.showFinalResults(args.format) else: _metrics.showFinalResults(format='text') else: _reg_metrics.showResults()
def evalTrainAndTestSet(text_instances_padded, text_instances_padded_test, appraisals_train, appraisals_test, class_labels_train, class_labels_test): emotions_train = pd.concat([class_labels_train, pd.get_dummies(class_labels_train)],axis=1).drop(['Prior_Emotion'],axis=1) emotions_test = pd.concat([class_labels_test, pd.get_dummies(class_labels_test)],axis=1).drop(['Prior_Emotion'],axis=1) print('\nINFO: Learning to predict emototions on %d instances...' % len(text_instances_padded)) emotion_predictor = shallowNN_emotions_from_dimensions( len(APPRAISALS), CONV_FILTERS, DROPOUT, LABELS, 'softmax') emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) if (args.quiet): for _ in range(EPOCHS_E): emotion_predictor.fit(appraisals_train, emotions_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: emotion_predictor.fit(appraisals_train, emotions_train, batch_size=BATCH_SIZE, epochs=EPOCHS_E, verbose=VERBOSITY) if (args.continous): activation = 'linear' # Use linear activation activation = 'sigmoid' # Use sigmoid activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric) # appraisals_shaped = np.expand_dims(appraisals, axis=2) print('\nINFO: Learning to predict appraisals on %d instances...' % len(text_instances_padded)) if (args.quiet): for _ in range(EPOCHS_A): appraisal_predictor.fit(text_instances_padded, appraisals_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: appraisal_predictor.fit(text_instances_padded,appraisals_train, batch_size=BATCH_SIZE, epochs=EPOCHS_A, verbose=VERBOSITY) print('\nINFO: Testing on %d instances...' % len(class_labels_test)) if (args.continous == False): # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] preds = appraisal_predictor.predict(text_instances_padded_test) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) appraisal_predictions = np.array(results) else: preds = appraisal_predictor.predict(text_instances_padded_test) appraisal_predictions = np.array(preds) # Predict emotions based on appraisal predictions predicted_emotions = [] # results = np.expand_dims(results, axis=2) emotions_predictions = emotion_predictor.predict(appraisal_predictions) for i in range(len(emotions_predictions)): index = np.argmax(emotions_predictions[i]) predicted_emotions.append(LABELS[index]) _metrics = metrics.metrics(class_labels_test, predicted_emotions, LABELS, 2) _metrics.showResults()
def performCrossValidation(x_data, y_data_appraisal, y_data_emotion): metrics_final = metrics.metrics(None, None, LABELS, 2) percentage_done = 1 for seed in range(ROUNDS): np.random.seed(seed) kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data_emotion): K.clear_session() emotions_train = pd.concat([y_data_emotion[train], pd.get_dummies(y_data_emotion[train])],axis=1).drop(['Prior_Emotion'],axis=1) emotions_test = pd.concat([y_data_emotion[test], pd.get_dummies(y_data_emotion[test])],axis=1).drop(['Prior_Emotion'],axis=1) #################################################################### # Task 1 : Learn to predict appraisals from text #################################################################### if (args.continous): activation = 'linear' # Use linear activation # activation = 'sigmoid' # Use linear activation loss = 'mse' # Use mean squared error loss metric = ['mse', 'mae'] else: activation = 'sigmoid' loss = 'binary_crossentropy' metric = ['accuracy'] print('\nINFO: Learning to predict appraisals from text...') appraisal_predictor = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(APPRAISALS), activation) appraisal_predictor.compile(OPTIMIZER, loss, metrics=metric) if (args.quiet): for _ in range(EPOCHS_A): appraisal_predictor.fit(text_instances_padded[train], appraisals[train], batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY, class_weight=class_weight) print('.', end='', flush=True) else: appraisal_predictor.fit(text_instances_padded[train], appraisals[train], batch_size=BATCH_SIZE, epochs=EPOCHS_A, verbose=VERBOSITY, class_weight=class_weight) if (args.continous == False): # weights = [0.55, 0.65, 0.48, 0.3, 0.425, 0.4, 0.45] # Some experimental settings weights = [0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50] test_instances = text_instances_padded[test] preds = appraisal_predictor.predict(test_instances) results = [] for row in range(len(preds)): res = [] for dim in range(len(APPRAISALS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) appraisal_predictions = np.array(results) else: test_instances = text_instances_padded[test] appraisal_test = appraisals[test] preds = appraisal_predictor.predict(test_instances) # for i in range(len(preds)): # print('\n Predicted:', preds[i]) # print(' Correct: ', appraisal_test[i]) appraisal_predictions = np.array(preds) #################################################################### # Task 2 : Learn to predict emotions from appraisals #################################################################### print('\nINFO: Learning to predict emotions from appraisals...') emotion_predictor = shallowNN_emotions_from_dimensions( len(APPRAISALS), CONV_FILTERS, DROPOUT, LABELS, 'softmax') emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) if (args.quiet): for _ in range(EPOCHS_E): emotion_predictor.fit(appraisals[train], emotions_train, batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSITY) print('.', end='', flush=True) else: emotion_predictor.fit(appraisals[train], emotions_train, batch_size=BATCH_SIZE, epochs=EPOCHS_E, verbose=VERBOSITY) # Predict emotions based on appraisal predictions predicted_emotions = [] # results = np.expand_dims(results, axis=2) emotions_predictions = emotion_predictor.predict(appraisal_predictions) for i in range(len(emotions_predictions)): index = np.argmax(emotions_predictions[i]) predicted_emotions.append(LABELS[index]) # Show results print('\n\nINFO: Evaluating CV-fold...') metrics_fold = metrics.metrics(y_data_emotion[test], predicted_emotions, LABELS, 2) metrics_fold.showResults() metrics_final.addIntermediateResults(y_data_emotion[test], predicted_emotions) metrics_final.showResults() metrics_final.showConfusionMatrix(False)
def performCrossValidation(x_data, y_data): metrics_final = metrics.metrics(None, None, LABELS, 2) TP_total = 0 size_total = 0 percentage_done = 0 for seed in range(ROUNDS): np.random.seed(seed) # Create baseline model weights kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): print('Training Baseline model %i/%i' % (percentage_done + 1, KFOLDS * ROUNDS)) from keras import backend as K K.clear_session() # tf.reset_default_graph() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) text_to_emotion_model = text_cnn_model_baseline( MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, LABELS, 'softmax') text_to_emotion_model.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) text_to_emotion_model.fit(x_data[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_text_to_emotion, verbose=VERBOSITY) text_to_emotion_model.save('saved_models/baseline_' + str(percentage_done)) percentage_done += 1 percentage_done = 0 for seed in range(ROUNDS): np.random.seed(seed) # Create pipeline model weights kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): print('Training Pipeline model %i/%i' % (percentage_done + 1, KFOLDS * ROUNDS)) from keras import backend as K K.clear_session() # tf.reset_default_graph() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) appraisal_emotion_predictor = shallowNN_emotions_from_dimensions( len(DIMENSIONS), LAYER_DIM, DROPOUT, LABELS, 'softmax') appraisal_emotion_predictor.compile(OPTIMIZER, 'categorical_crossentropy', metrics=['accuracy']) # vectors_shaped = np.expand_dims(vectors[train], axis=2) appraisal_emotion_predictor.fit(vectors[train], classes_train, batch_size=BATCH_SIZE, epochs=EPOCHS_p1, verbose=VERBOSITY) # Save weights appraisal_emotion_predictor.save('saved_models/dim_to_emotion_' + str(percentage_done)) input_shape = sentence_enISEAR.shape[1] # feature count model = text_cnn_model_appraisals(MAX_SEQUENCE_LENGTH, vocab_size, EMBEDDING_DIMS, FILTER_SIZE, CONV_FILTERS, embedding_matrix, DROPOUT, len(DIMENSIONS), 'sigmoid') model.compile(OPTIMIZER, 'binary_crossentropy', metrics=['accuracy']) # model.fit(data_padded[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY) model.fit(x_data[train], vectors[train], batch_size=BATCH_SIZE, epochs=EPOCHS_p2, verbose=VERBOSITY, class_weight=class_weight) model.save('saved_models/text_to_dim_' + str(percentage_done)) percentage_done += 1 percentage_done = 0 for seed in range(ROUNDS): np.random.seed(seed) # Evaluate models kfold = KFold(n_splits=KFOLDS, shuffle=True, random_state=seed) for train, test in kfold.split(x_data, y_data): print( '\n\n############################################################' ) print('Evaluating fold %i/%i in run %i/%i' % ((percentage_done + 1) % 10, KFOLDS, seed + 1, ROUNDS)) print( '############################################################') from keras import backend as K K.clear_session() tf.reset_default_graph() classes_train = pd.concat( [y_data[train], pd.get_dummies(y_data[train])], axis=1).drop(['Prior_Emotion'], axis=1) classes_test = pd.concat( [y_data[test], pd.get_dummies(y_data[test])], axis=1).drop(['Prior_Emotion'], axis=1) # Load the baseline model weights text_to_emotion_model = load_model('saved_models/baseline_' + str(percentage_done)) # Evaluate baseline model predicted_classes_text_to_emotion = [] predictions = text_to_emotion_model.predict(x_data[test]) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes_text_to_emotion.append(LABELS[index]) # Show results of baseline print( '\n############################################################' ) print('Baseline result:') print( '############################################################') metrics_fold = metrics.metrics(y_data[test], predicted_classes_text_to_emotion, LABELS, 2) metrics_fold.showResults() # Load appraisal models appraisal_emotion_predictor = load_model( 'saved_models/dim_to_emotion_' + str(percentage_done)) model = load_model('saved_models/text_to_dim_' + str(percentage_done)) # Predict Appraisals from ISEAR weights = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] preds = model.predict(data_padded[test]) results = [] for row in range(len(preds)): res = [] for dim in range(len(DIMENSIONS)): value = preds[row][dim] if (value >= weights[dim]): value = 1 else: value = 0 res.append(value) results.append(res) results = np.array(results) # Predict Emotions based on predicted Appraisals predicted_classes = [] predictions = appraisal_emotion_predictor.predict(results) for i in range(len(predictions)): index = np.argmax(predictions[i]) predicted_classes.append(LABELS[index]) # Show results of pipeline print( '\n############################################################' ) print('Pipeline result:') print( '############################################################') metrics_fold = metrics.metrics(y_data[test], predicted_classes, LABELS, 2) metrics_fold.showResults() # Evaluate models and create oracle 'prediction' sentences = sentence_enISEAR_raw[test] classes = classes_enISEAR[test] TP_text_to_emotion = 0 TP_text_to_appraisal_to_emotion = 0 FN = 0 pred_combined = [] for i in range(len(x_data[test])): label_gold = classes.iloc[i] if (label_gold == predicted_classes_text_to_emotion[i]): # Text based system is correct TP_text_to_emotion += 1 # green('\n') # green(i) # green('Gold-Emotion : ' + label_gold) # green('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # green('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes_text_to_emotion[i]) elif (label_gold == predicted_classes[i]): # Appraisal system is correct TP_text_to_appraisal_to_emotion += 1 # = TN # yellow('\n') # yellow(i) # yellow('Gold-Emotion : ' + label_gold) # yellow('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # yellow('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes[i]) else: FN += 1 # # print('\nGold-Emotion : ' + label_gold) # print('Prediction T->E : ' + str(predicted_classes_text_to_emotion[i])) # print('Prediction T->A->E: ' + predicted_classes[i]) pred_combined.append(predicted_classes_text_to_emotion[i]) percentage_done += 1 print('\nPerforming CV... (%2d%%)' % percentage_done) size = len(predicted_classes) TP = TP_text_to_emotion + TP_text_to_appraisal_to_emotion accuracy = (TP / size) * 100 print('Current fold:') print('Accuracy: %2.2f' % accuracy) print('Text-to-emotion TP : %2d' % TP_text_to_emotion) print('Text-to-appr-to-emotion TP : %2d' % TP_text_to_appraisal_to_emotion) # Evaluate oracle 'prediction' metrics_fold = metrics.metrics(y_data[test], pred_combined, LABELS, 2) print( '\n############################################################' ) print('Oracle result:') print( '############################################################') metrics_fold.showResults() metrics_final.addIntermediateResults(y_data[test], pred_combined) TP_total += TP size_total += size print('\nFinal Result:') accuracy = ((TP_total) / size_total) * 100 print('Accuracy: %2.2f' % accuracy) print(TP_total) print(size_total) metrics_final.writeResults(EXPERIMENTNAME, SAVEFILE) metrics_final.createMarkdownResults() return