def _eval_epochs_done_callback(task_name,
                               global_vars,
                               label_ids,
                               graph_fold=None,
                               normalize_cm=True):
    labels = np.asarray(global_vars[task_name + '_all_labels'])
    preds = np.asarray(global_vars[task_name + '_all_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    labels = labels[subtokens_mask]
    preds = preds[subtokens_mask]

    accuracy = sum(labels == preds) / labels.shape[0]
    logging.info(f'Accuracy for task {task_name}: {accuracy}')

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)
    logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size]))

    classification_report = get_classification_report(labels, preds, label_ids)
    logging.info(classification_report)

    # calculate and plot confusion_matrix
    if graph_fold:
        plot_confusion_matrix(labels,
                              preds,
                              graph_fold,
                              label_ids,
                              normalize=normalize_cm,
                              prefix=task_name)
    return accuracy
def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_label_id=0, normalize_cm=True):
    labels = np.asarray(global_vars['all_labels'])
    preds = np.asarray(global_vars['all_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    labels = labels[subtokens_mask]
    preds = preds[subtokens_mask]

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)
    logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size]))

    accuracy = sum(labels == preds) / labels.shape[0]
    logging.info(f'Accuracy: {accuracy}')

    f1_scores = get_f1_scores(labels, preds, average_modes=['weighted', 'macro', 'micro'])
    for k, v in f1_scores.items():
        logging.info(f'{k}: {v}')

    classification_report = get_classification_report(labels, preds, label_ids)
    logging.info(classification_report)

    # calculate and plot confusion_matrix
    if graph_fold:
        plot_confusion_matrix(labels, preds, graph_fold, label_ids, normalize=normalize_cm)

    return dict({'Accuracy': accuracy})
def eval_epochs_done_callback(global_vars, graph_fold):
    labels = np.asarray(global_vars['all_labels'])
    preds = np.asarray(global_vars['all_preds'])
    accuracy = sum(labels == preds) / labels.shape[0]
    logging.info(f'Accuracy: {accuracy}')

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)
    logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size]))
    plot_confusion_matrix(labels, preds, graph_fold)
    logging.info(classification_report(labels, preds))
    return dict({"accuracy": accuracy})
示例#4
0
def eval_epochs_done_callback(global_vars, graph_fold):
    intent_labels = np.asarray(global_vars['all_intent_labels'])
    intent_preds = np.asarray(global_vars['all_intent_preds'])

    slot_labels = np.asarray(global_vars['all_slot_labels'])
    slot_preds = np.asarray(global_vars['all_slot_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    slot_labels = slot_labels[subtokens_mask]
    slot_preds = slot_preds[subtokens_mask]

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if intent_preds.shape[0] > sample_size + 1:
        i = random.randint(0, intent_preds.shape[0] - sample_size - 1)
    logging.info("Sampled i_preds: [%s]" %
                 list2str(intent_preds[i:i + sample_size]))
    logging.info("Sampled intents: [%s]" %
                 list2str(intent_labels[i:i + sample_size]))
    logging.info("Sampled s_preds: [%s]" %
                 list2str(slot_preds[i:i + sample_size]))
    logging.info("Sampled slots: [%s]" %
                 list2str(slot_labels[i:i + sample_size]))

    plot_confusion_matrix(intent_labels, intent_preds, graph_fold)

    logging.info('Intent prediction results')
    correct_preds = sum(intent_labels == intent_preds)
    intent_accuracy = correct_preds / intent_labels.shape[0]
    logging.info(f'Intent accuracy: {intent_accuracy}')
    logging.info(f'Classification report:\n \
        {classification_report(intent_labels, intent_preds)}')

    logging.info('Slot prediction results')
    slot_accuracy = sum(slot_labels == slot_preds) / slot_labels.shape[0]
    logging.info(f'Slot accuracy: {slot_accuracy}')
    logging.info(f'Classification report:\n \
        {classification_report(slot_labels[:-2], slot_preds[:-2])}')

    return dict({
        'intent_accuracy': intent_accuracy,
        'slot_accuracy': slot_accuracy
    })
示例#5
0
    def convert_sequences_to_features(self, all_sent_subtokens, sent_labels,
                                      tokenizer, max_seq_length):
        """Loads a data file into a list of `InputBatch`s.
        """

        self.features = []
        for sent_id in range(len(all_sent_subtokens)):
            sent_subtokens = all_sent_subtokens[sent_id]
            sent_label = sent_labels[sent_id]

            input_ids = [
                tokenizer._convert_token_to_id(t) for t in sent_subtokens
            ]

            # The mask has 1 for real tokens and 0 for padding tokens.
            # Only real tokens are attended to.
            input_mask = [1] * len(input_ids)

            # Zero-pad up to the sequence length.
            while len(input_ids) < max_seq_length:
                input_ids.append(0)
                input_mask.append(0)
            segment_ids = [0] * max_seq_length

            assert len(input_ids) == max_seq_length
            assert len(input_mask) == max_seq_length

            if sent_id == 0:
                logging.info("*** Example ***")
                logging.info("example_index: %s" % sent_id)
                logging.info("subtokens: %s" % " ".join(sent_subtokens))
                logging.info("sent_label: %s" % sent_label)
                logging.info("input_ids: %s" % list2str(input_ids))
                logging.info("input_mask: %s" % list2str(input_mask))

            self.features.append(
                InputFeatures(
                    sent_id=sent_id,
                    sent_label=sent_label,
                    input_ids=input_ids,
                    input_mask=input_mask,
                    segment_ids=segment_ids,
                ))
示例#6
0
def eval_epochs_done_callback(global_vars,
                              label_ids,
                              graph_fold=None,
                              none_label_id=0,
                              normalize_cm=True):
    labels = np.asarray(global_vars['all_labels'])
    preds = np.asarray(global_vars['all_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    labels = labels[subtokens_mask]
    preds = preds[subtokens_mask]

    accuracy = sum(labels == preds) / labels.shape[0]
    logging.info(f'Accuracy: {accuracy}')

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)
    logging.info("Sampled preds: [%s]" % list2str(preds[i:i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i:i + sample_size]))

    # remove labels from label_ids that don't appear in the dev set
    used_labels = set(labels) | set(preds)
    label_ids = {
        k: label_ids[k]
        for k, v in label_ids.items() if v in used_labels
    }

    logging.info(classification_report(labels, preds, target_names=label_ids))

    # calculate and plot confusion_matrix
    if graph_fold:
        plot_confusion_matrix(labels,
                              preds,
                              graph_fold,
                              label_ids,
                              normalize=normalize_cm)

    return dict({'Accuracy': accuracy})
def eval_epochs_done_callback(global_vars, output_dir, task_name):
    labels = np.asarray(global_vars['all_labels'])
    preds = np.asarray(global_vars['all_preds'])

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if preds.shape[0] > sample_size + 1:
        i = random.randint(0, preds.shape[0] - sample_size - 1)

    logging.info("Task name: %s" % task_name.upper())
    logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size]))
    logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size]))

    results = compute_metrics(task_name, preds, labels)

    os.makedirs(output_dir, exist_ok=True)
    with open(os.path.join(output_dir, task_name + '.txt'), 'w') as f:
        f.write('labels\t' + list2str(labels) + '\n')
        f.write('preds\t' + list2str(preds) + '\n')

    logging.info(results)

    return results
示例#8
0
def merge(data_dir, subdirs, dataset_name, modes=['train', 'test']):
    outfold = f'{data_dir}/{dataset_name}'
    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
        logging.info(DATABASE_EXISTS_TMP.format('SNIPS-ATIS', outfold))
        slots = get_vocab(f'{outfold}/dict.slots.csv')
        none_slot = 0
        for key in slots:
            if slots[key] == 'O':
                none_slot = key
                break
        return outfold, int(none_slot)

    os.makedirs(outfold, exist_ok=True)

    data_files, slot_files = {}, {}
    for mode in modes:
        data_files[mode] = open(f'{outfold}/{mode}.tsv', 'w')
        data_files[mode].write('sentence\tlabel\n')
        slot_files[mode] = open(f'{outfold}/{mode}_slots.tsv', 'w')

    intents, slots = {}, {}
    intent_shift, slot_shift = 0, 0
    none_intent, none_slot = -1, -1

    for subdir in subdirs:
        curr_intents = get_vocab(f'{data_dir}/{subdir}/dict.intents.csv')
        curr_slots = get_vocab(f'{data_dir}/{subdir}/dict.slots.csv')

        for key in curr_intents:
            if intent_shift > 0 and curr_intents[key] == 'O':
                continue
            if curr_intents[key] == 'O' and intent_shift == 0:
                none_intent = int(key)
            intents[int(key) + intent_shift] = curr_intents[key]

        for key in curr_slots:
            if slot_shift > 0 and curr_slots[key] == 'O':
                continue
            if slot_shift == 0 and curr_slots[key] == 'O':
                none_slot = int(key)
            slots[int(key) + slot_shift] = curr_slots[key]

        for mode in modes:
            with open(f'{data_dir}/{subdir}/{mode}.tsv', 'r') as f:
                for line in f.readlines()[1:]:
                    text, label = line.strip().split('\t')
                    label = int(label)
                    if curr_intents[label] == 'O':
                        label = none_intent
                    else:
                        label = label + intent_shift
                    data_files[mode].write(f'{text}\t{label}\n')

            with open(f'{data_dir}/{subdir}/{mode}_slots.tsv', 'r') as f:
                for line in f.readlines():
                    labels = [int(label) for label in line.strip().split()]
                    shifted_labels = []
                    for label in labels:
                        if curr_slots[label] == 'O':
                            shifted_labels.append(none_slot)
                        else:
                            shifted_labels.append(label + slot_shift)
                    slot_files[mode].write(list2str(shifted_labels) + '\n')

        intent_shift += len(curr_intents)
        slot_shift += len(curr_slots)

    write_vocab_in_order(intents, f'{outfold}/dict.intents.csv')
    write_vocab_in_order(slots, f'{outfold}/dict.slots.csv')
    return outfold, none_slot
示例#9
0
def eval_epochs_done_callback(global_vars,
                              intents_label_ids,
                              slots_label_ids,
                              graph_fold=None,
                              normalize_cm=True):
    intent_labels = np.asarray(global_vars['all_intent_labels'])
    intent_preds = np.asarray(global_vars['all_intent_preds'])

    slot_labels = np.asarray(global_vars['all_slot_labels'])
    slot_preds = np.asarray(global_vars['all_slot_preds'])
    subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5

    slot_labels = slot_labels[subtokens_mask]
    slot_preds = slot_preds[subtokens_mask]

    # print predictions and labels for a small random subset of data
    sample_size = 20
    i = 0
    if intent_preds.shape[0] > sample_size + 1:
        i = random.randint(0, intent_preds.shape[0] - sample_size - 1)
    logging.info("Sampled i_preds: [%s]" %
                 list2str(intent_preds[i:i + sample_size]))
    logging.info("Sampled intents: [%s]" %
                 list2str(intent_labels[i:i + sample_size]))
    logging.info("Sampled s_preds: [%s]" %
                 list2str(slot_preds[i:i + sample_size]))
    logging.info("Sampled slots: [%s]" %
                 list2str(slot_labels[i:i + sample_size]))

    if graph_fold:
        # calculate, plot and save the confusion_matrix
        plot_confusion_matrix(intent_labels,
                              intent_preds,
                              graph_fold,
                              intents_label_ids,
                              normalize=normalize_cm,
                              prefix='Intent')
        plot_confusion_matrix(slot_labels,
                              slot_preds,
                              graph_fold,
                              slots_label_ids,
                              normalize=normalize_cm,
                              prefix='Slot')

    logging.info('Slot Prediction Results:')
    slot_accuracy = np.mean(slot_labels == slot_preds)
    logging.info(f'Slot Accuracy: {slot_accuracy}')
    f1_scores = get_f1_scores(slot_labels,
                              slot_preds,
                              average_modes=['weighted', 'macro', 'micro'])
    for k, v in f1_scores.items():
        logging.info(f'{k}: {v}')

    logging.info(
        f'\n {get_classification_report(slot_labels, slot_preds, label_ids=slots_label_ids)}'
    )

    logging.info('Intent Prediction Results:')
    intent_accuracy = np.mean(intent_labels == intent_preds)
    logging.info(f'Intent Accuracy: {intent_accuracy}')
    f1_scores = get_f1_scores(intent_labels,
                              intent_preds,
                              average_modes=['weighted', 'macro', 'micro'])
    for k, v in f1_scores.items():
        logging.info(f'{k}: {v}')

    logging.info(
        f'\n {get_classification_report(intent_labels, intent_preds, label_ids=intents_label_ids)}'
    )

    return dict({
        'intent_accuracy': intent_accuracy,
        'slot_accuracy': slot_accuracy
    })