def _eval_epochs_done_callback(task_name, global_vars, label_ids, graph_fold=None, normalize_cm=True): labels = np.asarray(global_vars[task_name + '_all_labels']) preds = np.asarray(global_vars[task_name + '_all_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 labels = labels[subtokens_mask] preds = preds[subtokens_mask] accuracy = sum(labels == preds) / labels.shape[0] logging.info(f'Accuracy for task {task_name}: {accuracy}') # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size])) classification_report = get_classification_report(labels, preds, label_ids) logging.info(classification_report) # calculate and plot confusion_matrix if graph_fold: plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm, prefix=task_name) return accuracy
def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_label_id=0, normalize_cm=True): labels = np.asarray(global_vars['all_labels']) preds = np.asarray(global_vars['all_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 labels = labels[subtokens_mask] preds = preds[subtokens_mask] # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size])) accuracy = sum(labels == preds) / labels.shape[0] logging.info(f'Accuracy: {accuracy}') f1_scores = get_f1_scores(labels, preds, average_modes=['weighted', 'macro', 'micro']) for k, v in f1_scores.items(): logging.info(f'{k}: {v}') classification_report = get_classification_report(labels, preds, label_ids) logging.info(classification_report) # calculate and plot confusion_matrix if graph_fold: plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm) return dict({'Accuracy': accuracy})
def _eval_epochs_done_callback(task_name, global_vars, label_ids, graph_fold=None, normalize_cm=True): labels = np.array(global_vars[task_name + '_labels']) preds = np.array(global_vars[task_name + '_preds']) # calculate and plot confusion_matrix if graph_fold: plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm, prefix=task_name) logging.info(f'{get_classification_report(labels, preds, label_ids)}') return get_classification_report(labels, preds, label_ids, output_dict=True)
def eval_epochs_done_callback(global_vars, graph_fold): labels = np.asarray(global_vars['all_labels']) preds = np.asarray(global_vars['all_preds']) accuracy = sum(labels == preds) / labels.shape[0] logging.info(f'Accuracy: {accuracy}') # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size])) plot_confusion_matrix(labels, preds, graph_fold) logging.info(classification_report(labels, preds)) return dict({"accuracy": accuracy})
def eval_epochs_done_callback(global_vars, graph_fold): intent_labels = np.asarray(global_vars['all_intent_labels']) intent_preds = np.asarray(global_vars['all_intent_preds']) slot_labels = np.asarray(global_vars['all_slot_labels']) slot_preds = np.asarray(global_vars['all_slot_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 slot_labels = slot_labels[subtokens_mask] slot_preds = slot_preds[subtokens_mask] # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if intent_preds.shape[0] > sample_size + 1: i = random.randint(0, intent_preds.shape[0] - sample_size - 1) logging.info("Sampled i_preds: [%s]" % list2str(intent_preds[i:i + sample_size])) logging.info("Sampled intents: [%s]" % list2str(intent_labels[i:i + sample_size])) logging.info("Sampled s_preds: [%s]" % list2str(slot_preds[i:i + sample_size])) logging.info("Sampled slots: [%s]" % list2str(slot_labels[i:i + sample_size])) plot_confusion_matrix(intent_labels, intent_preds, graph_fold) logging.info('Intent prediction results') correct_preds = sum(intent_labels == intent_preds) intent_accuracy = correct_preds / intent_labels.shape[0] logging.info(f'Intent accuracy: {intent_accuracy}') logging.info(f'Classification report:\n \ {classification_report(intent_labels, intent_preds)}') logging.info('Slot prediction results') slot_accuracy = sum(slot_labels == slot_preds) / slot_labels.shape[0] logging.info(f'Slot accuracy: {slot_accuracy}') logging.info(f'Classification report:\n \ {classification_report(slot_labels[:-2], slot_preds[:-2])}') return dict({ 'intent_accuracy': intent_accuracy, 'slot_accuracy': slot_accuracy })
def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_label_id=0, normalize_cm=True): labels = np.asarray(global_vars['all_labels']) preds = np.asarray(global_vars['all_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 labels = labels[subtokens_mask] preds = preds[subtokens_mask] accuracy = sum(labels == preds) / labels.shape[0] logging.info(f'Accuracy: {accuracy}') # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size])) logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size])) # remove labels from label_ids that don't appear in the dev set used_labels = set(labels) | set(preds) label_ids = { k: label_ids[k] for k, v in label_ids.items() if v in used_labels } logging.info(classification_report(labels, preds, target_names=label_ids)) # calculate and plot confusion_matrix if graph_fold: plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm) return dict({'Accuracy': accuracy})
def eval_epochs_done_callback(global_vars, intents_label_ids, slots_label_ids, graph_fold=None, normalize_cm=True): intent_labels = np.asarray(global_vars['all_intent_labels']) intent_preds = np.asarray(global_vars['all_intent_preds']) slot_labels = np.asarray(global_vars['all_slot_labels']) slot_preds = np.asarray(global_vars['all_slot_preds']) subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5 slot_labels = slot_labels[subtokens_mask] slot_preds = slot_preds[subtokens_mask] # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if intent_preds.shape[0] > sample_size + 1: i = random.randint(0, intent_preds.shape[0] - sample_size - 1) logging.info("Sampled i_preds: [%s]" % list2str(intent_preds[i:i + sample_size])) logging.info("Sampled intents: [%s]" % list2str(intent_labels[i:i + sample_size])) logging.info("Sampled s_preds: [%s]" % list2str(slot_preds[i:i + sample_size])) logging.info("Sampled slots: [%s]" % list2str(slot_labels[i:i + sample_size])) if graph_fold: # calculate, plot and save the confusion_matrix plot_confusion_matrix(intent_labels, intent_preds, graph_fold, intents_label_ids, normalize=normalize_cm, prefix='Intent') plot_confusion_matrix(slot_labels, slot_preds, graph_fold, slots_label_ids, normalize=normalize_cm, prefix='Slot') logging.info('Slot Prediction Results:') slot_accuracy = np.mean(slot_labels == slot_preds) logging.info(f'Slot Accuracy: {slot_accuracy}') f1_scores = get_f1_scores(slot_labels, slot_preds, average_modes=['weighted', 'macro', 'micro']) for k, v in f1_scores.items(): logging.info(f'{k}: {v}') logging.info( f'\n {get_classification_report(slot_labels, slot_preds, label_ids=slots_label_ids)}' ) logging.info('Intent Prediction Results:') intent_accuracy = np.mean(intent_labels == intent_preds) logging.info(f'Intent Accuracy: {intent_accuracy}') f1_scores = get_f1_scores(intent_labels, intent_preds, average_modes=['weighted', 'macro', 'micro']) for k, v in f1_scores.items(): logging.info(f'{k}: {v}') logging.info( f'\n {get_classification_report(intent_labels, intent_preds, label_ids=intents_label_ids)}' ) return dict({ 'intent_accuracy': intent_accuracy, 'slot_accuracy': slot_accuracy })