def error_analysis(self): """ Method for printing incorrect classifications for manual error inspection. """ # Error Analysis is only possible in train or eval mode if self.mode not in ['train', 'eval']: print( "ERROR: Error analysis is only possible in train and eval mode." ) sys.exit(1) # Loop through classes and print incorrect classifications for clf_class in self.classes: print( "Incorrect classifications for class {}:\n".format(clf_class)) # Load training data for respective class to extract the test set data = get_training_set(self.train_path, self.Fe, label=clf_class, original_labels=True) X = data.iloc[:, :-1] y = np.ravel(data.iloc[:, -1]) # Get the correct train-test split _, X_test, _, y_test = train_test_split( X, y, test_size=self.split, random_state=self.random_state, stratify=y) # Get the original labels and text y_orig_labels_test = X_test.iloc[:, -2:-1] y_orig_text_test = X_test.iloc[:, -1:] X_test = X_test.iloc[:, :-2] clf = self.clfs[clf_class] # Get the predictions y_pred = clf.predict(X_test) for orig, text, pred in zip(y_orig_labels_test.values, y_orig_text_test.values, y_pred): orig = orig[0] text = text[0] orig_split = orig.split(",") if pred != any( [label.startswith(clf_class) for label in orig_split]): print("Text: {}, original label: {}, predicted label: {}". format(text, orig, pred)) print("\n")
def evaluate(self, span_detection=False, save=False): """ Method for evaluating the classifiers on the held out test sets. :param span_detection: If True, use postprocessing method for better span detection. :param save: if True the clfs trained for evaluation are saved for further inspection """ # Evaluation is only possible in train or eval mode if self.mode not in ['train', 'eval']: print("ERROR: Evaluation is only possible in train and eval mode.") sys.exit(1) majority_classes = { 'direct': None, 'indirect': None, 'free_indirect': None, 'reported': None } # Loop through classes and evaluate for clf_class in self.classes: # Load training data for respective class to extract the train and test set, # as well as the original labels and texts to compute the accuracy on word level data = get_training_set(self.train_path, self.Fe, label=clf_class, original_labels=True) X = data.iloc[:, :-1] y = np.ravel(data.iloc[:, -1]) # Get the correct train-test split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=self.split, random_state=self.random_state, stratify=y) # Get the original labels and text for span evaluation after the split y_orig_train = X_train.iloc[:, -2:] X_train = X_train.iloc[:, :-2] y_orig_test = X_test.iloc[:, -2:] X_test = X_test.iloc[:, :-2] # For SVM the features should be scaled for efficiency reasons if self.model == "svm": scaling = MinMaxScaler(feature_range=(-1, 1)).fit(X_train) X_train = scaling.transform(X_train) X_train = pd.DataFrame(X_train) X_test = scaling.transform(X_test) X_test = pd.DataFrame(X_test) # Use a wrapper for training the classifier, in order to trigger methods for countering class imbalance clf = self.models[self.model]['classifier']( **self.models[self.model]['parameter'][clf_class]) clf_wrapped = CLFWrapper(clf) print("Training the {} classifier for label {}.\n".format( self.model, clf_class)) # Get method for countering class imbalance if respective parameter is given if self.augment_data in ['oversampling', 'SMOTE']: augment_method = DATA_AUGMENT[self.augment_data] elif self.augment_data == 'augmentation': augment_method = DATA_AUGMENT[self.augment_data][clf_class] else: augment_method = None # Fit classifier on training data clf_wrapped.fit(X_train, y_train, augment_method=augment_method) # Save the classifier if flag indicates this if save: # Save the trained classifier self.clfs[clf_class] = clf joblib.dump(clf, 'models/{}/{}.clf'.format(self.model, clf_class)) y_pred = clf.predict(X_test) precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) print( "Classifier for label {} on test set: Precision {}, Recall {}, F1 {}" .format(clf_class, precision, recall, f1)) majority_classes[clf_class] = self.get_max_type( y_orig_train.iloc[:, -2:-1], clf_class) y_pred_stw = [ self.annotate_stw(t[0], clf_class, majority_classes=majority_classes) for ind, t in enumerate(y_orig_test.iloc[:, -1:].values.tolist()) if y_pred[ind] ] y_true_s, y_true_t, y_true_w = get_labels_stw([ y[0] for ind, y in enumerate( y_orig_test.iloc[:, -2:-1].values.tolist()) if y_pred[ind] ], clf_class) for type in ['speech', 'thought', 'writing']: if type == 'speech': y_true_stw = y_true_s elif type == 'thought': y_true_stw = y_true_t else: y_true_stw = y_true_w y_pred_type = [int(y == type) for y in y_pred_stw] precision = precision_score(y_true_stw, y_pred_type) recall = recall_score(y_true_stw, y_pred_type) f1 = f1_score(y_true_stw, y_pred_type) print( "Classification for label {} on predictions for class {} on test set: Precision {}, Recall {}, F1 {} (Count instances: {})" .format(type, clf_class, precision, recall, f1, sum(y_true_stw))) # Evaluate accuracy of span prediction y_pred_test = y_orig_test.copy() # Get full spans before postprocessing y_pred_test.iloc[:, 0] = [ "{},0,{}".format(clf_class, str(len(y_orig_test.iloc[i, 1]))) if int(y_hat) == 1 else "" for i, y_hat in enumerate(y_pred) ] # Do span detection if chosen if span_detection: y_pred_test.iloc[:, 0] = [ postprocess_spans(row, cl=clf_class) for _, row in y_pred_test.iterrows() ] # Mark gold and predicted labeled words with different signs marked_text_gold = list( map( lambda segment: mark_labeled_words(segment[1], segment[ 0], clf_class), y_orig_test.values)) marked_text_predicted = list( map( lambda segment: mark_labeled_words( segment[1], segment[0], clf_class, mark='#'), y_pred_test.values)) num_words_correctly_marked_total = 0 num_words_incorrectly_marked_total = 0 num_words_total = 0 num_words_correctly_marked_correct_labels = 0 num_words_incorrectly_marked_correct_labels = 0 num_words_correct_labels = 0 for i, gold_segment in enumerate(marked_text_gold): tokens_pred = marked_text_predicted[i].split() tokens = gold_segment.split() num_words_total += len(tokens) len_gold = len( [token for token in tokens if token.endswith('$')]) # Correctly identified instances if len_gold > 0 and int(y_pred[i]) == 1: num_words_correct_labels += len(tokens) for j, token in enumerate(tokens_pred): if token.endswith('#'): if tokens[j].endswith('$'): num_words_correctly_marked_total += 1 num_words_correctly_marked_correct_labels += 1 else: if len_gold > 0 and int(y_pred[i]) == 1: num_words_incorrectly_marked_correct_labels += 1 num_words_incorrectly_marked_total += 1 else: if tokens[j].endswith('$'): num_words_incorrectly_marked_total += 1 if int(y_pred[i]) == 1: num_words_incorrectly_marked_correct_labels += 1 else: num_words_correctly_marked_total += 1 if len_gold > 0 and int(y_pred[i]) == 1: num_words_correctly_marked_correct_labels += 1 print( "Word-level accuracy all instances: {}% of total words correctly labeled, {}% of total words incorrectly labeled." .format( round( (num_words_correctly_marked_total / num_words_total) * 100, 2), round( (num_words_incorrectly_marked_total / num_words_total) * 100, 2))) print( "Word-level accuracy within correctly identified instances: {}% of words within labeled instances correctly labeled, {}% of words within labeled instances incorrectly labeled.\n" .format( round((num_words_correctly_marked_correct_labels / num_words_correct_labels) * 100, 2), round((num_words_incorrectly_marked_correct_labels / num_words_correct_labels) * 100, 2))) return
def train(self, clf_class, cross_val=False, augment_data='oversampling'): """ Train a binary classifier with the given ML technique (model) to be used in classification of clf_class instances. :param clf_class: label of the positive class instances. :param cross_val: if True, print evaluation with stratified 10-fold cross validation. :param augment_data: keyword for optional method to counter class imbalance within the training data. :return: the trained classifier. """ print("Training the {} classifier for label {}.\n".format( self.model, clf_class)) # Load training data data = get_training_set(self.train_path, self.Fe, label=clf_class, original_labels=True) X = data.iloc[:, :-1] y = np.ravel(data.iloc[:, -1:]) # Get the original labels for speech, thought, writing classification y_orig = X.iloc[:, -2:-1] X = X.iloc[:, :-2] # For SVM the features should be scaled for efficiency reasons if self.model == "svm": scaling = MinMaxScaler(feature_range=(-1, 1)).fit(X) X = scaling.transform(X) X = pd.DataFrame(X) # Use a wrapper for training the classifier, in order to trigger methods for countering class imbalance clf = self.models[self.model]['classifier']( **self.models[self.model]['parameter'][clf_class]) clf_wrapped = CLFWrapper(clf) print("\nTraining...") # Get method for countering class imbalance if respective parameter is given if augment_data in ['oversampling', 'SMOTE']: augment_method = DATA_AUGMENT[augment_data] elif augment_data == 'augmentation': augment_method = DATA_AUGMENT[augment_data][clf_class] else: augment_method = None if cross_val: # For cross validation, use stratified train-test split in order to have a hold-out test-set which is NOT used as a dev set in cross validation X_train, _, y_train, _ = train_test_split( X, y, test_size=self.split, random_state=self.random_state, stratify=y) # Stratified 10-fold cross validation, treatment of imbalanced data sets by oversampling, data augmentation etc. is triggered via the fit_params parameter recall = cross_val_score( clf_wrapped, X_train, y_train, cv=10, scoring='recall', fit_params={'augment_method': augment_method}) precision = cross_val_score( clf_wrapped, X_train, y_train, cv=10, scoring='precision', fit_params={'augment_method': augment_method}) f1 = cross_val_score(clf_wrapped, X_train, y_train, cv=10, scoring='f1', fit_params={'augment_method': augment_method}) # Precision, Recall, F1 print( "Scores on training set with 10-fold cross validation for class {}: Precision {}, Recall {}, F1 {}" .format(clf_class, precision.mean(), recall.mean(), f1.mean())) # After evaluation fit classifier on all available training data clf_wrapped.fit(X, y, augment_method=augment_method) # Get the trained classifier clf = clf_wrapped.clf # Check that directory exists directory = os.getcwd() + "/models" if not os.path.exists(directory): os.makedirs(directory) print("Saving the trained classifier...") # Save the trained classifier joblib.dump(clf, 'models/{}/{}.clf'.format(self.model, clf_class)) # Get the majority class (one of speech, thought, writing) from data for clf_class self.majority_classes[clf_class] = self.get_max_type(y_orig, clf_class) print("Done.\n") return (clf)