示例#1
0
def do_eval(args):
    paddle.set_device(args.device)

    # Create dataset, tokenizer and dataloader.
    train_ds, eval_ds = load_dataset('msra_ner',
                                     splits=('train', 'test'),
                                     lazy=False)
    tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path)

    label_list = train_ds.label_list
    label_num = len(label_list)
    no_entity_id = label_num - 1
    trans_func = partial(tokenize_and_align_labels,
                         tokenizer=tokenizer,
                         no_entity_id=no_entity_id,
                         max_seq_len=args.max_seq_length)
    ignore_label = -100
    batchify_fn = lambda samples, fn=Dict({
        'input_ids':
        Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int32'),  # input
        'token_type_ids':
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype='int32'
            ),  # segment
        'seq_len':
        Stack(dtype='int64'),
        'labels':
        Pad(axis=0, pad_val=ignore_label, dtype='int64')  # label
    }): fn(samples)
    eval_ds = eval_ds.map(trans_func)
    eval_data_loader = DataLoader(dataset=eval_ds,
                                  collate_fn=batchify_fn,
                                  num_workers=0,
                                  batch_size=args.batch_size,
                                  return_list=True)

    # Define the model netword and its loss
    model = BertForTokenClassification.from_pretrained(args.model_name_or_path,
                                                       num_classes=label_num)
    if args.init_checkpoint_path:
        model_dict = paddle.load(args.init_checkpoint_path)
        model.set_dict(model_dict)
    loss_fct = paddle.nn.loss.CrossEntropyLoss(ignore_index=ignore_label)

    metric = ChunkEvaluator(label_list=label_list)

    model.eval()
    metric.reset()
    for step, batch in enumerate(eval_data_loader):
        input_ids, token_type_ids, length, labels = batch
        logits = model(input_ids, token_type_ids)
        loss = loss_fct(logits, labels)
        avg_loss = paddle.mean(loss)
        preds = logits.argmax(axis=2)
        num_infer_chunks, num_label_chunks, num_correct_chunks = metric.compute(
            length, preds, labels)
        metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(),
                      num_correct_chunks.numpy())
        precision, recall, f1_score = metric.accumulate()
    print("eval loss: %f, precision: %f, recall: %f, f1: %f" %
          (avg_loss, precision, recall, f1_score))
示例#2
0
def evaluate(args):
    paddle.set_device(args.device)

    # create dataset.
    test_ds = load_dataset(datafiles=(os.path.join(args.data_dir, 'test.tsv')))
    word_vocab = load_vocab(os.path.join(args.data_dir, 'word.dic'))
    label_vocab = load_vocab(os.path.join(args.data_dir, 'tag.dic'))
    # q2b.dic is used to replace DBC case to SBC case
    normlize_vocab = load_vocab(os.path.join(args.data_dir, 'q2b.dic'))

    trans_func = partial(
        convert_example,
        max_seq_len=args.max_seq_len,
        word_vocab=word_vocab,
        label_vocab=label_vocab,
        normlize_vocab=normlize_vocab)
    test_ds.map(trans_func)

    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=0, dtype='int64'),  # word_ids
        Stack(dtype='int64'),  # length
        Pad(axis=0, pad_val=0, dtype='int64'),  # label_ids
    ): fn(samples)

    # Create sampler for dataloader
    test_sampler = paddle.io.BatchSampler(
        dataset=test_ds,
        batch_size=args.batch_size,
        shuffle=False,
        drop_last=False)
    test_loader = paddle.io.DataLoader(
        dataset=test_ds,
        batch_sampler=test_sampler,
        return_list=True,
        collate_fn=batchify_fn)

    # Define the model network and metric evaluator
    model = BiGruCrf(args.emb_dim, args.hidden_size,
                     len(word_vocab), len(label_vocab))
    chunk_evaluator = ChunkEvaluator(label_list=label_vocab.keys(), suffix=True)

    # Load the model and start predicting
    model_dict = paddle.load(args.init_checkpoint)
    model.load_dict(model_dict)

    model.eval()
    chunk_evaluator.reset()
    for batch in test_loader:
        token_ids, length, labels = batch
        preds = model(token_ids, length)
        num_infer_chunks, num_label_chunks, num_correct_chunks = chunk_evaluator.compute(
            length, preds, labels)
        chunk_evaluator.update(num_infer_chunks.numpy(),
                               num_label_chunks.numpy(),
                               num_correct_chunks.numpy())
        precision, recall, f1_score = chunk_evaluator.accumulate()
    print("eval precision: %f, recall: %f, f1: %f" %
          (precision, recall, f1_score))
示例#3
0
def test_ner_dataset(client):
    from paddlenlp.metrics import ChunkEvaluator
    from datasets import load_dataset
    import paddle

    dev_ds = load_dataset("msra_ner", split="test")

    import os
    if os.environ.get('https_proxy'):
        del os.environ['https_proxy']
    if os.environ.get('http_proxy'):
        del os.environ['http_proxy']

    print("Start infer...")
    metric = ChunkEvaluator(
        label_list=['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC'])
    idx = 0
    batch_size = 32
    max_len = len(dev_ds["tokens"]) - 1
    while idx < max_len:
        end_idx = idx + batch_size if idx + batch_size < max_len else max_len
        data = dev_ds["tokens"][idx:end_idx]
        ret = client.predict(feed_dict={"tokens": data})
        if ret.err_no != 0:
            raise ValueError("err_no", ret.err_no, "err_msg: ", ret.err_msg)
        # print("ret:", ret)
        if idx < batch_size * 2:
            print_ret(json.loads(ret.value[0]), data)

        # calculate metric
        preds = json.loads(ret.value[1])
        label_list = dev_ds["ner_tags"][idx:end_idx]
        label_list = label_pad(label_list, preds)
        label_list = paddle.to_tensor(label_list)
        preds = paddle.to_tensor(preds)
        seq_len = [preds.shape[1]] * preds.shape[0]

        num_infer_chunks, num_label_chunks, num_correct_chunks = metric.compute(
            paddle.to_tensor(seq_len), preds, label_list)
        metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(),
                      num_correct_chunks.numpy())
        idx += batch_size
        print(idx)

    res = metric.accumulate()
    print("acc: ", res)
示例#4
0
class Electra(nn.Layer):
    """
    Electra model
    """
    def __init__(
        self,
        task: str = None,
        load_checkpoint: str = None,
        label_map: Dict = None,
        num_classes: int = 2,
        **kwargs,
    ):
        super(Electra, self).__init__()
        if label_map:
            self.label_map = label_map
            self.num_classes = len(label_map)
        else:
            self.num_classes = num_classes

        if task == 'sequence_classification':
            task = 'seq-cls'
            logger.warning(
                "current task name 'sequence_classification' was renamed to 'seq-cls', "
                "'sequence_classification' has been deprecated and will be removed in the future.",
            )
        if task == 'seq-cls':
            self.model = ElectraForSequenceClassification.from_pretrained(
                pretrained_model_name_or_path='electra-small',
                num_classes=self.num_classes,
                **kwargs)
            self.criterion = paddle.nn.loss.CrossEntropyLoss()
            self.metric = paddle.metric.Accuracy()
        elif task == 'token-cls':
            self.model = ElectraForTokenClassification.from_pretrained(
                pretrained_model_name_or_path='electra-small',
                num_classes=self.num_classes,
                **kwargs)
            self.criterion = paddle.nn.loss.CrossEntropyLoss()
            self.metric = ChunkEvaluator(label_list=[
                self.label_map[i] for i in sorted(self.label_map.keys())
            ])
        elif task == 'text-matching':
            self.model = ElectraModel.from_pretrained(
                pretrained_model_name_or_path='electra-small', **kwargs)
            self.dropout = paddle.nn.Dropout(0.1)
            self.classifier = paddle.nn.Linear(
                self.model.config['hidden_size'] * 3, 2)
            self.criterion = paddle.nn.loss.CrossEntropyLoss()
            self.metric = paddle.metric.Accuracy()
        elif task is None:
            self.model = ElectraModel.from_pretrained(
                pretrained_model_name_or_path='electra-small', **kwargs)
        else:
            raise RuntimeError(
                "Unknown task {}, task should be one in {}".format(
                    task, self._tasks_supported))

        self.task = task

        if load_checkpoint is not None and os.path.isfile(load_checkpoint):
            state_dict = paddle.load(load_checkpoint)
            self.set_state_dict(state_dict)
            logger.info('Loaded parameters from %s' %
                        os.path.abspath(load_checkpoint))

    def forward(self,
                input_ids=None,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None,
                query_input_ids=None,
                query_token_type_ids=None,
                query_position_ids=None,
                query_attention_mask=None,
                title_input_ids=None,
                title_token_type_ids=None,
                title_position_ids=None,
                title_attention_mask=None,
                seq_lengths=None,
                labels=None):

        if self.task != 'text-matching':
            result = self.model(input_ids, token_type_ids, position_ids,
                                attention_mask)
        else:
            query_result = self.model(query_input_ids, query_token_type_ids,
                                      query_position_ids, query_attention_mask)
            title_result = self.model(title_input_ids, title_token_type_ids,
                                      title_position_ids, title_attention_mask)

        if self.task == 'seq-cls':
            logits = result
            probs = F.softmax(logits, axis=1)
            if labels is not None:
                loss = self.criterion(logits, labels)
                correct = self.metric.compute(probs, labels)
                acc = self.metric.update(correct)
                return probs, loss, {'acc': acc}
            return probs
        elif self.task == 'token-cls':
            logits = result
            token_level_probs = F.softmax(logits, axis=-1)
            preds = token_level_probs.argmax(axis=-1)
            if labels is not None:
                loss = self.criterion(logits, labels.unsqueeze(-1))
                num_infer_chunks, num_label_chunks, num_correct_chunks = \
                    self.metric.compute(None, seq_lengths, preds, labels)
                self.metric.update(num_infer_chunks.numpy(),
                                   num_label_chunks.numpy(),
                                   num_correct_chunks.numpy())
                _, _, f1_score = map(float, self.metric.accumulate())
                return token_level_probs, loss, {'f1_score': f1_score}
            return token_level_probs
        elif self.task == 'text-matching':
            query_token_embedding = query_result
            query_token_embedding = self.dropout(query_token_embedding)
            query_attention_mask = paddle.unsqueeze(
                (query_input_ids != self.model.pad_token_id).astype(
                    query_token_embedding.dtype),
                axis=2)
            query_token_embedding = query_token_embedding * query_attention_mask
            query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
            query_sum_mask = paddle.sum(query_attention_mask, axis=1)
            query_mean = query_sum_embedding / query_sum_mask

            title_token_embedding = title_result
            title_token_embedding = self.dropout(title_token_embedding)
            title_attention_mask = paddle.unsqueeze(
                (title_input_ids != self.model.pad_token_id).astype(
                    title_token_embedding.dtype),
                axis=2)
            title_token_embedding = title_token_embedding * title_attention_mask
            title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
            title_sum_mask = paddle.sum(title_attention_mask, axis=1)
            title_mean = title_sum_embedding / title_sum_mask

            sub = paddle.abs(paddle.subtract(query_mean, title_mean))
            projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
            logits = self.classifier(projection)
            probs = F.softmax(logits)
            if labels is not None:
                loss = self.criterion(logits, labels)
                correct = self.metric.compute(probs, labels)
                acc = self.metric.update(correct)
                return probs, loss, {'acc': acc}
            return probs
        else:
            sequence_output, pooled_output = result
            return sequence_output, pooled_output

    @staticmethod
    def get_tokenizer(*args, **kwargs):
        """
        Gets the tokenizer that is customized for this module.
        """
        return ElectraTokenizer.from_pretrained(
            pretrained_model_name_or_path='electra-small', *args, **kwargs)
示例#5
0
class ErnieV2(nn.Layer):
    """
    Ernie model
    """
    def __init__(
        self,
        task: str = None,
        load_checkpoint: str = None,
        label_map: Dict = None,
        num_classes: int = 2,
        **kwargs,
    ):
        super(ErnieV2, self).__init__()
        if label_map:
            self.label_map = label_map
            self.num_classes = len(label_map)
        else:
            self.num_classes = num_classes

        if task == 'sequence_classification':
            task = 'seq-cls'
            logger.warning(
                "current task name 'sequence_classification' was renamed to 'seq-cls', "
                "'sequence_classification' has been deprecated and will be removed in the future.",
            )
        if task == 'seq-cls':
            self.model = ErnieForSequenceClassification.from_pretrained(
                pretrained_model_name_or_path='ernie-2.0-en',
                num_classes=self.num_classes,
                **kwargs)
            self.criterion = paddle.nn.loss.CrossEntropyLoss()
            self.metric = paddle.metric.Accuracy()
        elif task == 'token-cls':
            self.model = ErnieForTokenClassification.from_pretrained(
                pretrained_model_name_or_path='ernie-2.0-en',
                num_classes=self.num_classes,
                **kwargs)
            self.criterion = paddle.nn.loss.CrossEntropyLoss()
            self.metric = ChunkEvaluator(label_list=[
                self.label_map[i] for i in sorted(self.label_map.keys())
            ])
        elif task is None:
            self.model = ErnieModel.from_pretrained(
                pretrained_model_name_or_path='ernie-2.0-en', **kwargs)
        else:
            raise RuntimeError(
                "Unknown task {}, task should be one in {}".format(
                    task, self._tasks_supported))

        self.task = task

        if load_checkpoint is not None and os.path.isfile(load_checkpoint):
            state_dict = paddle.load(load_checkpoint)
            self.set_state_dict(state_dict)
            logger.info('Loaded parameters from %s' %
                        os.path.abspath(load_checkpoint))

    def forward(self,
                input_ids,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None,
                seq_lengths=None,
                labels=None):
        result = self.model(input_ids, token_type_ids, position_ids,
                            attention_mask)
        if self.task == 'seq-cls':
            logits = result
            probs = F.softmax(logits, axis=1)
            if labels is not None:
                loss = self.criterion(logits, labels)
                correct = self.metric.compute(probs, labels)
                acc = self.metric.update(correct)
                return probs, loss, {'acc': acc}
            return probs
        elif self.task == 'token-cls':
            logits = result
            token_level_probs = F.softmax(logits, axis=-1)
            preds = token_level_probs.argmax(axis=-1)
            if labels is not None:
                loss = self.criterion(logits, labels.unsqueeze(-1))
                num_infer_chunks, num_label_chunks, num_correct_chunks = \
                    self.metric.compute(None, seq_lengths, preds, labels)
                self.metric.update(num_infer_chunks.numpy(),
                                   num_label_chunks.numpy(),
                                   num_correct_chunks.numpy())
                _, _, f1_score = map(float, self.metric.accumulate())
                return token_level_probs, loss, {'f1_score': f1_score}
            return token_level_probs
        else:
            sequence_output, pooled_output = result
            return sequence_output, pooled_output

    @staticmethod
    def get_tokenizer(*args, **kwargs):
        """
        Gets the tokenizer that is customized for this module.
        """
        return ErnieTokenizer.from_pretrained(
            pretrained_model_name_or_path='ernie-2.0-en', *args, **kwargs)
示例#6
0
def run(args):
    paddle.set_device(args.device)
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    raw_datasets = load_dataset(args.task_name)

    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
    train_ds = raw_datasets['train']
    column_names = train_ds.column_names

    label_list = train_ds.features['ner_tags'].feature.names
    label_num = len(label_list)

    batchify_fn = DataCollatorForTokenClassification(tokenizer=tokenizer)

    # Define the model netword and its loss
    model = AutoModelForTokenClassification.from_pretrained(
        args.model_name_or_path, num_classes=label_num)

    if paddle.distributed.get_world_size() > 1:
        model = paddle.DataParallel(model)

    def tokenize_and_align_labels(examples, no_entity_id=0):
        tokenized_inputs = tokenizer(
            examples['tokens'],
            max_seq_len=args.max_seq_length,
            # We use this argument because the texts in our dataset are lists of words (with a label for each word).
            is_split_into_words=True,
            return_length=True)
        labels = []

        for i, label in enumerate(examples['ner_tags']):
            label_ids = label
            if len(tokenized_inputs['input_ids'][i]) - 2 < len(label_ids):
                label_ids = label_ids[:len(tokenized_inputs['input_ids'][i]) -
                                      2]
            label_ids = [no_entity_id] + label_ids + [no_entity_id]
            label_ids += [no_entity_id] * (
                len(tokenized_inputs['input_ids'][i]) - len(label_ids))

            labels.append(label_ids)
        tokenized_inputs["labels"] = labels
        return tokenized_inputs

    test_ds = raw_datasets['test']
    test_ds = test_ds.select(range(len(test_ds) - 1))
    test_ds = test_ds.map(tokenize_and_align_labels,
                          batched=True,
                          remove_columns=column_names)
    test_data_loader = DataLoader(dataset=test_ds,
                                  collate_fn=batchify_fn,
                                  num_workers=0,
                                  batch_size=args.batch_size,
                                  return_list=True)

    if args.do_train:
        train_ds = train_ds.select(range(len(train_ds) - 1))

        train_ds = train_ds.map(tokenize_and_align_labels,
                                batched=True,
                                remove_columns=column_names)

        train_batch_sampler = paddle.io.DistributedBatchSampler(
            train_ds, batch_size=args.batch_size, shuffle=True, drop_last=True)

        train_data_loader = DataLoader(dataset=train_ds,
                                       collate_fn=batchify_fn,
                                       num_workers=0,
                                       batch_sampler=train_batch_sampler,
                                       return_list=True)

        num_training_steps = args.max_steps if args.max_steps > 0 else len(
            train_data_loader) * args.num_train_epochs

        lr_scheduler = LinearDecayWithWarmup(args.learning_rate,
                                             num_training_steps,
                                             args.warmup_steps)

        # Generate parameter names needed to perform weight decay.
        # All bias and LayerNorm parameters are excluded.
        decay_params = [
            p.name for n, p in model.named_parameters()
            if not any(nd in n for nd in ["bias", "norm"])
        ]
        optimizer = paddle.optimizer.AdamW(
            learning_rate=lr_scheduler,
            epsilon=args.adam_epsilon,
            parameters=model.parameters(),
            weight_decay=args.weight_decay,
            apply_decay_param_fun=lambda x: x in decay_params)

        loss_fct = paddle.nn.loss.CrossEntropyLoss()

        metric = ChunkEvaluator(label_list=label_list)

        global_step = 0
        best_f1 = 0.0
        last_step = args.num_train_epochs * len(train_data_loader)
        tic_train = time.time()
        for epoch in range(args.num_train_epochs):
            for step, batch in enumerate(train_data_loader):
                global_step += 1
                logits = model(batch['input_ids'], batch['token_type_ids'])
                loss = loss_fct(logits, batch['labels'])
                avg_loss = paddle.mean(loss)
                if global_step % args.logging_steps == 0:
                    print(
                        "global step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s"
                        % (global_step, epoch, step, avg_loss,
                           args.logging_steps / (time.time() - tic_train)))
                    tic_train = time.time()
                avg_loss.backward()
                optimizer.step()
                lr_scheduler.step()
                optimizer.clear_grad()
                if global_step % args.save_steps == 0 or global_step == num_training_steps:
                    if paddle.distributed.get_rank() == 0:
                        f1 = evaluate(model, loss_fct, metric,
                                      test_data_loader, label_num, "test")
                        if f1 > best_f1:
                            best_f1 = f1
                            output_dir = args.output_dir
                            if not os.path.exists(output_dir):
                                os.makedirs(output_dir)
                            # Need better way to get inner model of DataParallel
                            model_to_save = model._layers if isinstance(
                                model, paddle.DataParallel) else model
                            model_to_save.save_pretrained(output_dir)
                            tokenizer.save_pretrained(output_dir)
                if global_step >= num_training_steps:
                    print("best_f1: ", best_f1)
                    return
        print("best_f1: ", best_f1)

    if args.do_eval:
        eval_data_loader = DataLoader(dataset=test_ds,
                                      collate_fn=batchify_fn,
                                      num_workers=0,
                                      batch_size=args.batch_size,
                                      return_list=True)

        # Define the model netword and its loss
        model = AutoModelForTokenClassification.from_pretrained(
            args.model_name_or_path, num_classes=label_num)
        loss_fct = paddle.nn.loss.CrossEntropyLoss()

        metric = ChunkEvaluator(label_list=label_list)

        model.eval()
        metric.reset()
        for step, batch in enumerate(eval_data_loader):
            logits = model(batch["input_ids"], batch["token_type_ids"])
            loss = loss_fct(logits, batch["labels"])
            avg_loss = paddle.mean(loss)
            preds = logits.argmax(axis=2)
            num_infer_chunks, num_label_chunks, num_correct_chunks = metric.compute(
                batch["length"], preds, batch["labels"])
            metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(),
                          num_correct_chunks.numpy())
            precision, recall, f1_score = metric.accumulate()
        print("eval loss: %f, precision: %f, recall: %f, f1: %f" %
              (avg_loss, precision, recall, f1_score))
示例#7
0
def do_eval(args):
    paddle.set_device(args.device)

    # Create dataset, tokenizer and dataloader.
    train_ds, eval_ds = load_dataset('msra_ner', split=('train', 'test'))
    tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path)

    label_list = train_ds.features['ner_tags'].feature.names
    label_num = len(label_list)
    no_entity_id = 0

    def tokenize_and_align_labels(examples):
        tokenized_inputs = tokenizer(
            examples['tokens'],
            max_seq_len=args.max_seq_length,
            # We use this argument because the texts in our dataset are lists of words (with a label for each word).
            is_split_into_words=True,
            return_length=True)
        labels = []

        for i, label in enumerate(examples['ner_tags']):
            label_ids = label
            if len(tokenized_inputs['input_ids'][i]) - 2 < len(label_ids):
                label_ids = label_ids[:len(tokenized_inputs['input_ids'][i]) -
                                      2]
            label_ids = [no_entity_id] + label_ids + [no_entity_id]
            label_ids += [no_entity_id] * (
                len(tokenized_inputs['input_ids'][i]) - len(label_ids))

            labels.append(label_ids)
        tokenized_inputs["labels"] = labels
        return tokenized_inputs

    ignore_label = -100
    batchify_fn = lambda samples, fn=Dict({
        'input_ids':
        Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int32'),  # input
        'token_type_ids':
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype='int32'
            ),  # segment
        'seq_len':
        Stack(dtype='int64'),
        'labels':
        Pad(axis=0, pad_val=ignore_label, dtype='int64')  # label
    }): fn(samples)

    eval_ds = eval_ds.select(range(len(eval_ds) - 1))
    eval_ds = eval_ds.map(tokenize_and_align_labels, batched=True)
    eval_data_loader = DataLoader(dataset=eval_ds,
                                  collate_fn=batchify_fn,
                                  num_workers=0,
                                  batch_size=args.batch_size,
                                  return_list=True)

    # Define the model netword and its loss
    model = BertForTokenClassification.from_pretrained(args.model_name_or_path,
                                                       num_classes=label_num)
    if args.init_checkpoint_path:
        model_dict = paddle.load(args.init_checkpoint_path)
        model.set_dict(model_dict)
    loss_fct = paddle.nn.loss.CrossEntropyLoss(ignore_index=ignore_label)

    metric = ChunkEvaluator(label_list=label_list)

    model.eval()
    metric.reset()
    for step, batch in enumerate(eval_data_loader):
        input_ids, token_type_ids, length, labels = batch
        logits = model(input_ids, token_type_ids)
        loss = loss_fct(logits, labels)
        avg_loss = paddle.mean(loss)
        preds = logits.argmax(axis=2)
        num_infer_chunks, num_label_chunks, num_correct_chunks = metric.compute(
            length, preds, labels)
        metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(),
                      num_correct_chunks.numpy())
        precision, recall, f1_score = metric.accumulate()
    print("eval loss: %f, precision: %f, recall: %f, f1: %f" %
          (avg_loss, precision, recall, f1_score))
示例#8
0
    def predict(self,
                dataset,
                tokenizer,
                batchify_fn,
                args,
                dev_example=None,
                dev_ds_ori=None):
        if args.collect_shape:
            self.set_dynamic_shape(args.max_seq_length, args.batch_size)
        if args.task_name == "cmrc2018":
            dataset_removed = dataset.remove_columns(
                ["offset_mapping", "attention_mask", "example_id"])
            sample_num = len(dataset)
            batches = []
            for i in range(0, sample_num, args.batch_size):
                batch_size = min(args.batch_size, sample_num - i)
                batch = [dataset_removed[i + j] for j in range(batch_size)]
                batches.append(batch)
        else:
            sample_num = len(dataset)
            batches = []
            for i in range(0, sample_num, args.batch_size):
                batch_size = min(args.batch_size, sample_num - i)
                batch = [dataset[i + j] for j in range(batch_size)]
                batches.append(batch)
        if args.perf:
            for i, batch in enumerate(batches):
                batch = batchify_fn(batch)
                input_ids, segment_ids = batch["input_ids"].numpy(
                ), batch["token_type_ids"].numpy()
                output = self.predict_batch([input_ids, segment_ids])
                if i > args.perf_warmup_steps:
                    break
            time1 = time.time()
            nums = 0
            for batch in batches:
                batch = batchify_fn(batch)
                input_ids, segment_ids = batch["input_ids"].numpy(
                ), batch["token_type_ids"].numpy()
                nums = nums + input_ids.shape[0]
                output = self.predict_batch([input_ids, segment_ids])
            total_time = time.time() - time1
            print("task name: %s, sample nums: %s, time: %s, QPS: %s " %
                  (args.task_name, nums, total_time, nums / total_time))

        else:
            if args.task_name == "msra_ner":
                metric = ChunkEvaluator(label_list=args.label_list)
                metric.reset()
                all_predictions = []
                batch_num = len(dataset['input_ids'])
                for batch in batches:
                    batch = batchify_fn(batch)
                    input_ids, segment_ids = batch["input_ids"].numpy(
                    ), batch["token_type_ids"].numpy()
                    output = self.predict_batch([input_ids, segment_ids])[0]
                    preds = np.argmax(output, axis=2)
                    all_predictions.append(preds.tolist())
                    num_infer_chunks, num_label_chunks, num_correct_chunks = metric.compute(
                        batch["seq_len"], paddle.to_tensor(preds),
                        batch["labels"])
                    metric.update(num_infer_chunks.numpy(),
                                  num_label_chunks.numpy(),
                                  num_correct_chunks.numpy())
                res = metric.accumulate()
                print("task name: %s, (precision, recall, f1): %s, " %
                      (args.task_name, res))
            elif args.task_name == "cmrc2018":
                all_start_logits = []
                all_end_logits = []
                for batch in batches:
                    batch = batchify_fn(batch)
                    input_ids, segment_ids = batch["input_ids"].numpy(
                    ), batch["token_type_ids"].numpy()
                    start_logits, end_logits = self.predict_batch(
                        [input_ids, segment_ids])
                    for idx in range(start_logits.shape[0]):
                        if len(all_start_logits) % 1000 == 0 and len(
                                all_start_logits):
                            print("Processing example: %d" %
                                  len(all_start_logits))
                        all_start_logits.append(start_logits[idx])
                        all_end_logits.append(end_logits[idx])
                all_predictions, _, _ = compute_prediction(
                    dev_example, dataset, (all_start_logits, all_end_logits),
                    False, args.n_best_size, args.max_answer_length)
                res = squad_evaluate(
                    examples=[raw_data for raw_data in dev_example],
                    preds=all_predictions,
                    is_whitespace_splited=False)
                print("task name: %s, EM: %s, F1: %s" %
                      (args.task_name, res['exact'], res['f1']))
                return all_predictions
            else:
                all_predictions = []
                metric = METRIC_CLASSES[args.task_name]()
                metric.reset()
                for i, batch in enumerate(batches):
                    batch = batchify_fn(batch)
                    output = self.predict_batch([
                        batch["input_ids"].numpy(),
                        batch["token_type_ids"].numpy()
                    ])[0]
                    preds = np.argmax(output, axis=1)
                    all_predictions.append(preds.tolist())
                    correct = metric.compute(paddle.to_tensor(output),
                                             batch["labels"])
                    metric.update(correct)
                res = metric.accumulate()

                print("task name: %s, acc: %s, " % (args.task_name, res))
                return all_predictions