예제 #1
0
def train_one_epoch(model, optimizer, scheduler, train_loader, logger, args):
    device = torch.device(args.device)
    for data_blob in logger.log_every(train_loader):

        optimizer.zero_grad()

        image1, image2, flow_gt, valid_flow_mask = (x.to(device)
                                                    for x in data_blob)
        flow_predictions = model(image1,
                                 image2,
                                 num_flow_updates=args.num_flow_updates)

        loss = utils.sequence_loss(flow_predictions, flow_gt, valid_flow_mask,
                                   args.gamma)
        metrics, _ = utils.compute_metrics(flow_predictions[-1], flow_gt,
                                           valid_flow_mask)

        metrics.pop("f1")
        logger.update(loss=loss, **metrics)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

        optimizer.step()
        scheduler.step()
예제 #2
0
    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == 'test':
            dataset = self.test_dataset
        elif mode == 'dev':
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'token_type_ids': batch[2],
                          'labels': batch[3],
                          'e1_mask': batch[4],
                          'e2_mask': batch[5]}
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {
            "loss": eval_loss
        }
        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)

        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))

        return results
예제 #3
0
    def inner_loop(blob):
        if blob[0].dim() == 3:
            # input is not batched so we add an extra dim for consistency
            blob = [x[None, :, :, :] if x is not None else None for x in blob]

        image1, image2, flow_gt = blob[:3]
        valid_flow_mask = None if len(blob) == 3 else blob[-1]

        image1, image2 = image1.to(device), image2.to(device)

        padder = utils.InputPadder(image1.shape, mode=padder_mode)
        image1, image2 = padder.pad(image1, image2)

        flow_predictions = model(image1,
                                 image2,
                                 num_flow_updates=num_flow_updates)
        flow_pred = flow_predictions[-1]
        flow_pred = padder.unpad(flow_pred).cpu()

        metrics, num_pixels_tot = utils.compute_metrics(
            flow_pred, flow_gt, valid_flow_mask)

        # We compute per-pixel epe (epe) and per-image epe (called f1-epe in RAFT paper).
        # per-pixel epe: average epe of all pixels of all images
        # per-image epe: average epe on each image independently, then average over images
        for name in ("epe", "1px", "3px", "5px",
                     "f1"):  # f1 is called f1-all in paper
            logger.meters[name].update(metrics[name], n=num_pixels_tot)
        logger.meters["per_image_epe"].update(metrics["epe"], n=batch_size)
def evaluate_tfidf(index, tokenized_candidates, tfidf_corpus, tokenized_names):
    metrics = []
    for i, example in tqdm(enumerate(tfidf_corpus)):
        top_5_idx = np.argsort(index.get_similarities(example))[-1:-5:-1]
        candidates = [tokenized_candidates[j] for j in top_5_idx]
        metrics.append(compute_metrics(tokenized_names[i], candidates))
    return pd.DataFrame(metrics)
def evaluate(args, model, eval_dataset):
    batch_size = args.batch_size
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=batch_size)

    logger.info("***** Running normal evaluation *****")
    logger.info(" Num examples = %d", len(eval_dataset))
    logger.info(" Batch size = %d", batch_size)
    eval_loss = 0.
    eval_steps = 0
    preds = None
    out_label_ids = None
    for batch in tqdm.tqdm(eval_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            outputs = _predict(model, args.model_type, batch)
            tmp_eval_loss, logits = outputs[:2]
            eval_loss += np.mean(tmp_eval_loss.tolist())
        eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = batch[3].detach().cpu().numpy() if args.model_type != 'char-cnn' else batch[1].detach().cpu().numpy()
        else:
            label_ids = batch[3].detach().cpu().numpy() if args.model_type != 'char-cnn' else batch[1].detach().cpu().numpy()
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)  # (B, 2)
            out_label_ids = np.append(out_label_ids, label_ids, axis=0)  # (B,)
    preds = np.argmax(preds, axis=1)
    acc = utils.compute_metrics(preds, out_label_ids)
    logger.info("eval result acc={:.4f} loss={:.2f}".format(acc, eval_loss / eval_steps))

    return acc
예제 #6
0
파일: trainer.py 프로젝트: Neehan/robust-re
    def evaluate(self, dataset, mode="test"):
        # We use test dataset because semeval doesn't have dev dataset
        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(
            dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size
        )

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[3],
                    "e1_mask": batch[4],
                    "e2_mask": batch[5],
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0
                )

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        write_prediction(
            self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds
        )

        result = compute_metrics(preds, out_label_ids)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  {:15}: {:.4f}".format(key, results[key]))

        return results
예제 #7
0
    def compute_test_metrics(self, X_test, y_test, models_trained):

        metrics = {}
        for name, model in models_trained:

            metrics[name] = compute_metrics(X_test, y_test, model)

        return metrics
예제 #8
0
def for_loop(net, data_loader, train_optimizer):
    is_train = train_optimizer is not None
    net.train() if is_train else net.eval()

    total_loss, total_time, total_num, preds, targets = 0.0, 0.0, 0, [], []
    data_bar = tqdm(data_loader, dynamic_ncols=True)
    with (torch.enable_grad() if is_train else torch.no_grad()):
        for data, target, grad, boundary, name in data_bar:
            data, target, grad, boundary = data.cuda(), target.cuda(), grad.cuda(), boundary.cuda()
            torch.cuda.synchronize()
            start_time = time.time()
            seg, edge = net(data, grad)
            prediction = torch.argmax(seg.detach(), dim=1)
            torch.cuda.synchronize()
            end_time = time.time()
            semantic_loss = semantic_criterion(seg, target)
            edge_loss = edge_criterion(edge, target, boundary)
            task_loss = task_criterion(seg, edge, target)
            loss = semantic_loss + 20 * edge_loss + task_loss

            if is_train:
                train_optimizer.zero_grad()
                loss.backward()
                train_optimizer.step()

            total_num += data.size(0)
            total_time += end_time - start_time
            total_loss += loss.item() * data.size(0)
            preds.append(prediction.cpu())
            targets.append(target.cpu())

            if not is_train:
                if data_loader.dataset.split == 'test':
                    # revert train id to regular id
                    for key in sorted(trainId2label.keys(), reverse=True):
                        prediction[prediction == key] = trainId2label[key].id
                # save pred images
                save_root = '{}/{}_{}_{}/{}'.format(save_path, backbone_type, crop_h, crop_w, data_loader.dataset.split)
                if not os.path.exists(save_root):
                    os.makedirs(save_root)
                for pred_tensor, pred_name in zip(prediction, name):
                    pred_img = ToPILImage()(pred_tensor.unsqueeze(dim=0).byte().cpu())
                    if data_loader.dataset.split == 'val':
                        pred_img.putpalette(get_palette())
                    pred_name = pred_name.replace('leftImg8bit', 'color')
                    path = '{}/{}'.format(save_root, pred_name)
                    pred_img.save(path)
            data_bar.set_description('{} Epoch: [{}/{}] Loss: {:.4f} FPS: {:.0f}'
                                     .format(data_loader.dataset.split.capitalize(), epoch, epochs,
                                             total_loss / total_num, total_num / total_time))
        # compute metrics
        preds = torch.cat(preds, dim=0)
        targets = torch.cat(targets, dim=0)
        pa, mpa, class_iou, category_iou = compute_metrics(preds, targets)
        print('{} Epoch: [{}/{}] PA: {:.2f}% mPA: {:.2f}% Class_mIOU: {:.2f}% Category_mIOU: {:.2f}%'
              .format(data_loader.dataset.split.capitalize(), epoch, epochs,
                      pa * 100, mpa * 100, class_iou * 100, category_iou * 100))
    return total_loss / total_num, pa * 100, mpa * 100, class_iou * 100, category_iou * 100
예제 #9
0
def main(args):

    data_config = load_config_from_json(args.data_config_path)
    model_config = load_config_from_json(
        os.path.join(args.saved_model_path, "config.jsonl"))

    # initialize model
    model = SFNet(model_config["sfnet"])
    model = model.to(device)

    if not os.path.exists(args.saved_model_path):
        raise FileNotFoundError(args.saved_model_path)

    checkpoint = os.path.join(args.saved_model_path, args.checkpoint)
    model.load_state_dict(torch.load(checkpoint, map_location="cpu"))
    print("Model loaded from %s" % (args.saved_model_path))

    # tracker to keep true labels and predicted probabilitites
    target_tracker = []
    pred_tracker = []

    print("Preparing test data ...")
    dataset = ModCloth(data_config, split="test")
    data_loader = DataLoader(
        dataset=dataset,
        batch_size=model_config["trainer"]["batch_size"],
        shuffle=False,
    )

    print("Evaluating model on test data ...")
    model.eval()
    with torch.no_grad():

        for iteration, batch in enumerate(data_loader):

            for k, v in batch.items():
                if torch.is_tensor(v):
                    batch[k] = to_var(v)

            # Forward pass
            _, pred_probs = model(batch)

            target_tracker.append(batch["fit"].cpu().numpy())
            pred_tracker.append(pred_probs.cpu().data.numpy())

    target_tracker = np.stack(target_tracker[:-1]).reshape(-1)
    pred_tracker = np.stack(pred_tracker[:-1], axis=0).reshape(
        -1, model_config["sfnet"]["num_targets"])
    precision, recall, f1_score, accuracy, auc = compute_metrics(
        target_tracker, pred_tracker)

    print("-" * 50)
    print(
        "Metrics:\n Precision = {:.3f}\n Recall = {:.3f}\n F1-score = {:.3f}\n Accuracy = {:.3f}\n AUC = {:.3f}\n "
        .format(precision, recall, f1_score, accuracy, auc))
    print("-" * 50)
예제 #10
0
def main(args):
    video = imageio.get_reader(args.video)
    n_frames = video.count_frames()
    fps = video.get_meta_data()['fps']
    frame_w, frame_h = video.get_meta_data()['size']

    model = load_model(args.model, compile=False)
    input_shape = model.input.shape[1:3]

    # default RoI
    if None in (args.rl, args.rt, args.rr, args.rb):
        side = min(frame_w, frame_h)
        args.rl = (frame_w - side) / 2
        args.rt = (frame_h - side) / 2
        args.rr = (frame_w + side) / 2
        args.rb = (frame_h + side) / 2

    crop = (args.rl, args.rt, args.rr, args.rb)

    def preprocess(frame):
        frame = Image.fromarray(frame)
        eye = frame.crop(crop)
        eye = ImageOps.grayscale(eye)
        eye = eye.resize(input_shape)
        return eye

    def predict(eye):
        eye = np.array(eye).astype(np.float32) / 255.0
        eye = eye[None, :, :, None]
        return model.predict(eye)

    out_video = imageio.get_writer(args.output_video, fps=fps)

    cropped = map(preprocess, video)
    frames_and_predictions = map(lambda x: (x, predict(x)), cropped)

    with open(args.output_csv, 'w') as out_csv:
        print('frame,pupil-area,pupil-x,pupil-y,eye,blink', file=out_csv)
        for idx, (frame, predictions) in enumerate(
                tqdm(frames_and_predictions, total=n_frames)):
            pupil_map, tags = predictions
            is_eye, is_blink = tags.squeeze()
            (pupil_y, pupil_x), pupil_area = compute_metrics(pupil_map,
                                                             thr=args.thr,
                                                             nms=True)

            row = [idx, pupil_area, pupil_x, pupil_y, is_eye, is_blink]
            row = ','.join(list(map(str, row)))
            print(row, file=out_csv)

            img = draw_predictions(frame, predictions, thr=args.thr)
            img = np.array(img)
            out_video.append_data(img)

    out_video.close()
def main():
    with open('config.json', 'r', encoding='utf-8') as f:
        args = AttrDict(json.load(f))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    tokenizer = load_tokenizer(args)

    checkpoints = sorted([
        dir for dir in glob.glob(f'{args.save_model_dir}/*')
        if os.path.isdir(dir)
    ])
    if not args.eval_all_ckpts: checkpoints = checkpoints[-1:]

    results = {}
    eval_preds, eval_labels = [], []
    for ckpt in checkpoints:
        steps = ckpt.split('-')[-1]
        model = AutoModelForSequenceClassification.from_pretrained(ckpt).to(
            device)

        test_dataset = DATASET_LIST[args.model_mode](args, tokenizer, "test")
        test_dataloader = DataLoader(dataset=test_dataset,
                                     sampler=SequentialSampler(test_dataset),
                                     batch_size=args.eval_batch_size)

        all_preds, all_out_label_ids, texts = predict(args, model, tokenizer,
                                                      device, test_dataloader)
        all_preds_argmax = np.argmax(all_preds, axis=1)

        eval_preds.append(all_preds_argmax)
        eval_labels.append(all_out_label_ids)
        results[steps] = compute_metrics(all_preds_argmax, all_out_label_ids)

        result = [{
            "id": idx,
            "text": t[0],
            "label": test_dataset.answer2labels[an]
        } for idx, (t, an) in enumerate(zip(texts, all_preds_argmax))]
        result = {'annotations': result}
        with open(os.path.join(ckpt, 'results.json'), 'w',
                  encoding='utf-8') as f:
            json.dump(result, f, ensure_ascii=False, indent='\t')

    with open(os.path.join(args.save_model_dir, 'eval_results.txt'),
              'w',
              encoding='utf-8') as f:
        for idx, key in enumerate(sorted(results.keys())):
            print(f"{key}: {str(results[key]['acc'])}")
            print(confusion_matrix(eval_labels[idx], eval_preds[idx]).tolist())
            print()
            f.write(f"{key}: {str(results[key]['acc'])}\n")
            f.write(
                f"{confusion_matrix(eval_labels[idx], eval_preds[idx]).tolist()}\n\n"
            )
예제 #12
0
def read_metrics_zhou_results(results, record):
    tp, tn, fp, fn = 0, 0, 0, 0

    tp = int(results[record][0])
    tn = int(results[record][1])
    fp = int(results[record][2])
    fn = int(results[record][3])

    se, sp, ppv, acc = utils.compute_metrics(tp, tn, fp, fn)
    mcc = utils.compute_mcc(tp, tn, fp, fn)

    return [record, "ZHOU", tp, tn, fp, fn, se, sp, ppv, acc, mcc]
예제 #13
0
def local_train(stemmer=data_provider.NoStemmer(), text_representation='bag-of-words', C=1, max_iter=10000):
    data_provider.STATE['stemmer'] = stemmer
    X, Y = data_provider.get_data(input_format='hot_vector',
                                  output_format='numerical',
                                  ngrams=text_representation=='ngrams',
                                  all_data=True)
    model = create_model(C, max_iter)
    X_train, X_val = split(X, 0.9)
    Y_train, Y_val = split(Y, 0.9)

    data_provider.STATE = data_provider.initial_state()
    del X, Y
    gc.collect()
    #X_train, X_val, Y_train, Y_val = np.array(X_train), np.array(X_val), np.array(Y_train), np.array(Y_val)
    data_provider.STATE = data_provider.initial_state()
    print(">>> {} {} {} {} {}".format(type(stemmer).__name__, text_representation, 'svm', C, max_iter))
    start = time.time()
    model.fit(X_train, Y_train)
    print(">>> TRAINING TIME: {}s".format(time.time() - start))
    Y_pred = model.predict(X_val)
    compute_metrics(Y_val, Y_pred)
예제 #14
0
    def predict(self):
        logger.info("***** Model Loaded *****")
        test_loader = build_loader(self.args, self.tokenizer, 'test')
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(test_loader, desc="Predicting"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2]
                }

                outputs = self.model(**inputs)
                pooled_output = outputs[1]
                logits = self.classifier(pooled_output)

            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = batch[3].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids,
                                          batch[3].detach().cpu().numpy(),
                                          axis=0)

        results = {}
        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
        results.update(result)
        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))

        # with open(f'{self.args.test_data_dir}_test.json', 'r', encoding='utf-8') as f, \
        #     open(f'wrong_sports.txt', "w", encoding="utf-8") as fw:
        #     data = json.load(f)
        #     for line, pred in zip(data, preds):
        #         if line['sentiment'] != pred:
        #             fw.write(f"{line['text']}\t{line['sentiment']}\t{pred}\n")

        # # Write to output file
        # with open(self.args.output_file, "w", encoding="utf-8") as f:
        #     for pred in preds:
        #         f.write("{}\n".format(pred))

        logger.info("Prediction Done!")
    def evaluate(self, eval_dataloader, mode):
        logger.info("  ***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(eval_dataloader))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        preds = None
        out_label_ids = None
        loss_fct = nn.CrossEntropyLoss()
        nb_eval_steps = 0

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'token_type_ids': batch[2]
                }
                labels = batch[3]
                outputs = self.model(**inputs)
                pooled_output = outputs[1]
                pooled_output = self.dropout(pooled_output)
                logits = self.classifier(pooled_output)
                loss = loss_fct(logits.view(-1, 2), labels.view(-1))

                eval_loss += loss.item()

            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = labels.detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids,
                                          labels.detach().cpu().numpy(),
                                          axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}

        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))

        return results['loss'], results['acc']
예제 #16
0
    def evaluate(self, dataset_orig_test, verbose=True):
        assert self.classifier, 'There is no model to use. Please fit the model first.'

        best_ultimate_thres = self.best_ultimate_thres

        # Transform into standardized dataframe
        if not self.is_valid:
            # dataset_orig_test[self.label_name] = self.favorable_classes[0]
            dataset_transf_test = generate_formatted_dataframe(dataset_orig_test, label_name=self.label_name, \
                                                            favorable_classes=self.favorable_classes, \
                                                            protected_attribute_names=self.protected_attribute_names, \
                                                            privileged_classes=self.privileged_classes,\
                                                            categorical_features=self.categorical_features, \
                                                            features_to_keep=self.features_to_keep, \
                                                            features_to_drop=self.features_to_drop,\
                                                            na_values=self.na_values, \
                                                            custom_preprocessing=self.custom_preprocessing, \
                                                            metadata=self.metadata)

            print("Data has been transformed into standardized dataframe.")
        else:
            dataset_transf_test = dataset_orig_test

        dataset_transf_test_pred = dataset_transf_test.copy(deepcopy=True)
        X_test = dataset_transf_test_pred.features
        y_test = dataset_transf_test_pred.labels

        # Predict_proba on test data
        pos_ind = dataset_transf_test_pred.favorable_label
        y_pred = self.classifier.predict_proba(X_test)[:,
                                                       int(pos_ind)].reshape(
                                                           -1, 1)

        fav_inds = y_pred > best_ultimate_thres
        dataset_transf_test_pred.labels[
            fav_inds] = dataset_transf_test_pred.favorable_label
        dataset_transf_test_pred.labels[
            ~fav_inds] = dataset_transf_test_pred.unfavorable_label

        metric_test_aft = compute_metrics(dataset_transf_test, dataset_transf_test_pred, \
                                          self.unprivileged_groups, self.privileged_groups, disp=True)
        if verbose:
            print(
                "Optimal classification threshold (after fairness processing) = %.4f"
                % best_ultimate_thres)
            display(
                pd.DataFrame(metric_test_aft,
                             columns=metric_test_aft.keys(),
                             index=[0]))

        return metric_test_aft
예제 #17
0
    def evaluate(self):
        # self.load_model()  # Load model

        eval_sampler = SequentialSampler(self.test_dataset)
        eval_dataloader = DataLoader(self.test_dataset, sampler=eval_sampler, batch_size=self.config.batch_size)

        # Eval!
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(self.test_dataset))
        logger.info("  Batch size = %d", self.config.batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        results = {}

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            self.model.eval()
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'token_type_ids': batch[2],
                          'labels': batch[3],
                          'e1_mask': batch[4],
                          'e2_mask': batch[5]}
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
        results.update(result)
        logger.info("***** Eval results *****")
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))

        write_prediction(os.path.join(self.config.eval_dir, "proposed_answers.txt"), preds)
        return results
    def evaluate(self, eval_dataloader, mode):
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = batch[:-1]
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'token_type_ids': batch[2],
                    'labels': batch[3]
                }

                outputs = self.model(**inputs)

                tmp_eval_loss, logits = outputs[:2]
                eval_loss += tmp_eval_loss.mean().item()

            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs['labels'].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}

        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))

        return results['loss'], results['acc']
def main():
    args = parseArguments()

    data, t = DataLoader.load_data(args['dataFile'], args['trainingPoints'],
                                   args['validationPoints'])
    _SVM = SVM(args['B'], args['K'], args['C'], args['gamma'], args['xi'],
               args['trainingPoints'], args['type'])
    alpha, b = _SVM.train_SVM(data, t)

    if args['fig']:
        utils.plot_figure(_SVM, alpha, data, t, b, args['trainingPoints'],
                          args['type'])

    precision, recall, f_score, accuracy = utils.compute_metrics(
        _SVM, alpha, data, t, b, args['trainingPoints'],
        args['validationPoints'])
    print(f'{precision=} {recall=} {f_score=} {accuracy=}')
예제 #20
0
    def test(self):
        logger.info("***** Model Loaded *****")
        test_loader = build_loader(self.args, self.tokenizer, 'test')
        preds = None
        out_label_ids = None
        nb_eval_steps = 0

        self.model.eval()

        for batch in tqdm(test_loader, desc="Predicting"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": None
                }
                outputs = self.model(**inputs)
                logits = outputs[0]

            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = batch[3].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids,
                                          batch[3].detach().cpu().numpy(),
                                          axis=0)

        results = {}
        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
        results.update(result)
        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))

        # Write to output file
        with open(self.args.test_output_file, "w", encoding="utf-8") as f:
            for pred in preds:
                f.write("{}\n".format(pred))

        logger.info("Prediction Done!")
예제 #21
0
def evaluate(model, data_loader, tokenizer, ignore_pad_token_for_loss,
             min_target_length, max_target_length):
    model.eval()
    all_preds = []
    all_labels = []
    model = model._layers if isinstance(model, paddle.DataParallel) else model
    for batch in tqdm(data_loader, total=len(data_loader), desc="Eval step"):
        input_ids, _, _, labels = batch
        preds = model.generate(input_ids=input_ids,
                               min_length=min_target_length,
                               max_length=max_target_length,
                               use_cache=True)[0]
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())
    rouge_result, decoded_preds = compute_metrics(all_preds, all_labels,
                                                  tokenizer,
                                                  ignore_pad_token_for_loss)
    logger.info(rouge_result)
    model.train()
예제 #22
0
    def evaluate(self, mode):
        if mode == 'test':
            dataset = self.test_dataset
        elif mode == 'dev':
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        intent_preds = None
        slot_preds = None
        out_intent_label_ids = None
        out_slot_labels_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'intent_label_ids': batch[3],
                    'slot_labels_ids': batch[4]
                }
                if self.args.model_type != 'distilbert':
                    inputs['token_type_ids'] = batch[2]
                outputs = self.model(**inputs)
                tmp_eval_loss, (intent_logits, slot_logits) = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()  # 对batch内的
            nb_eval_steps += 1

            # Intent prediction
            if intent_preds is None:
                intent_preds = intent_logits.detach().cpu().numpy(
                )  #intent输出转化成numpy()
                out_intent_label_ids = inputs['intent_label_ids'].detach().cpu(
                ).numpy()
            else:
                intent_preds = np.append(intent_preds,
                                         intent_logits.detach().cpu().numpy(),
                                         axis=0)  # np.append()是拼接两个nparray的操作
                out_intent_label_ids = np.append(
                    out_intent_label_ids,
                    inputs['intent_label_ids'].detach().cpu().numpy(),
                    axis=0)

            # Slot prediction
            if slot_preds is None:
                if self.args.use_crf:
                    # decode() in `torchcrf` returns list with best index directly
                    slot_preds = np.array(self.model.crf.decode(slot_logits))
                else:
                    slot_preds = slot_logits.detach().cpu().numpy()

                out_slot_labels_ids = inputs["slot_labels_ids"].detach().cpu(
                ).numpy()
            else:
                if self.args.use_crf:
                    slot_preds = np.append(
                        slot_preds,
                        np.array(self.model.crf.decode(slot_logits)),
                        axis=0)
                else:
                    slot_preds = np.append(slot_preds,
                                           slot_logits.detach().cpu().numpy(),
                                           axis=0)

                out_slot_labels_ids = np.append(
                    out_slot_labels_ids,
                    inputs["slot_labels_ids"].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}

        # Intent result
        intent_preds = np.argmax(intent_preds, axis=1)

        # Slot result
        if not self.args.use_crf:
            slot_preds = np.argmax(slot_preds, axis=2)
        slot_label_map = {
            i: label
            for i, label in enumerate(self.slot_label_lst)
        }  # {sentence_id:label_list}
        out_slot_label_list = [[] for _ in range(out_slot_labels_ids.shape[0])
                               ]  #建立测试样例个空数组
        slot_preds_list = [[] for _ in range(out_slot_labels_ids.shape[0])]

        for i in range(out_slot_labels_ids.shape[0]):
            for j in range(out_slot_labels_ids.shape[1]):
                if out_slot_labels_ids[i, j] != self.pad_token_label_id:
                    out_slot_label_list[i].append(
                        slot_label_map[out_slot_labels_ids[i][j]])
                    slot_preds_list[i].append(slot_label_map[slot_preds[i][j]])

        total_result = compute_metrics(intent_preds, out_intent_label_ids,
                                       slot_preds_list, out_slot_label_list)
        results.update(total_result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))

        return results
예제 #23
0
        # disc.method = Orange.preprocess.discretize.EqualFreq(n=3)
        disc_predicted_data_table = disc(predicted_data_table)
        # disc_predicted_test_data_table = disc(predicted_test_data_table)
        disc_predicted_test_data_table = Orange.data.Table.from_table(disc_predicted_data_table.domain, predicted_test_data_table)


        from utils import uniform_enlarge_dataset,estimated_enlarge_dataset

        rate = 1.0 * 49804 / predicted_data_table.X.shape[0]
        # rate = 0
        print("sampling rate",rate)
        new_predicted_data_table = estimated_enlarge_dataset(predicted_data_table,black_box,sampling_rate=rate,random_seed=random_seed)
        print(new_predicted_data_table.X.shape)
        disc_new_predicted_data_table =  Orange.data.Table.from_table(disc_predicted_data_table.domain, new_predicted_data_table)

        from approach import explain_tabular
        explanations,explainer = explain_tabular(disc_new_predicted_data_table, black_box, target_class_idx=1, random_seed=random_seed,beta = 0,use_pre_mined=True, objective = 'bayesian')

        print(len(explanations))
        from utils import  rule_to_string,ruleset_predict
        our_prediction = ruleset_predict(explanations,disc_predicted_test_data_table.X)

        import sklearn
        print('Blackbox and our, acc', sklearn.metrics.accuracy_score(predicted_test_data_table.Y, our_prediction))
        print('Blackbox and our, f1 score', sklearn.metrics.f1_score(predicted_test_data_table.Y, our_prediction))
        print('Blackbox and our,recall', sklearn.metrics.recall_score(predicted_test_data_table.Y, our_prediction))
        print('Blackbox and our,precision', sklearn.metrics.precision_score(predicted_test_data_table.Y, our_prediction))

        from utils import compute_metrics
        compute_metrics(explanations,disc_predicted_data_table.domain)
예제 #24
0
    def evaluate(self, mode, step):
        if mode == 'test':
            dataset = self.test_dataset
        elif mode == 'dev':
            dataset = self.dev_dataset
        elif mode == 'train':
            dataset = self.train_dataset
        else:
            raise Exception("Only train, dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset,
                                     sampler=eval_sampler,
                                     batch_size=self.args.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            self.model.eval()
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    'word_ids': batch[0],
                    'char_ids': batch[1],
                    'mask': batch[2],
                    'label_ids': batch[3]
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            # Slot prediction
            if preds is None:
                # decode() in `torchcrf` returns list with best index directly
                preds = np.array(
                    self.model.crf.decode(logits, mask=inputs['mask'].byte()))
                out_label_ids = inputs["label_ids"].detach().cpu().numpy()
            else:
                preds = np.append(preds,
                                  np.array(
                                      self.model.crf.decode(
                                          logits, mask=inputs['mask'].byte())),
                                  axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs["label_ids"].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}

        # Slot result
        slot_label_map = {i: label for i, label in enumerate(self.label_lst)}
        out_label_list = [[] for _ in range(out_label_ids.shape[0])]
        preds_list = [[] for _ in range(out_label_ids.shape[0])]

        for i in range(out_label_ids.shape[0]):
            for j in range(out_label_ids.shape[1]):
                if out_label_ids[i, j] != self.pad_token_label_id:
                    out_label_list[i].append(
                        slot_label_map[out_label_ids[i][j]])
                    preds_list[i].append(slot_label_map[preds[i][j]])

        if self.args.write_pred:
            if not os.path.exists(self.args.pred_dir):
                os.mkdir(self.args.pred_dir)

            with open(os.path.join(self.args.pred_dir,
                                   "pred_{}.txt".format(step)),
                      "w",
                      encoding="utf-8") as f:
                for text, true_label, pred_label in zip(
                        self.test_texts, out_label_list, preds_list):
                    for t, tl, pl in zip(text, true_label, pred_label):
                        f.write("{} {} {}\n".format(t, tl, pl))
                    f.write("\n")

        result = compute_metrics(out_label_list, preds_list)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))
        logger.info("\n" + show_report(
            out_label_list, preds_list))  # Get the report for each tag result

        return results
예제 #25
0
def evaluate(model, eval_features):
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_attention_mask = torch.tensor([f.attention_mask for f in eval_features], dtype=torch.long)
    all_token_type_ids = torch.tensor([f.token_type_ids for f in eval_features], dtype=torch.long)
    all_intent_label_ids = torch.tensor([f.intent_label_id for f in eval_features], dtype=torch.long)
    all_slot_labels_ids = torch.tensor([f.slot_labels_ids for f in eval_features], dtype=torch.long)
    dev_dataset = TensorDataset(all_input_ids, all_attention_mask,
                                all_token_type_ids, all_intent_label_ids, all_slot_labels_ids)
    eval_sampler = SequentialSampler(dev_dataset)
    eval_dataloader = DataLoader(dev_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

    # Eval!
    print("***** Running evaluation on dataset *****")
    print("  Num examples = %d", len(dev_dataset))
    print("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    intent_preds = None
    slot_preds = None
    out_intent_label_ids = None
    out_slot_labels_ids = None
    model.eval()
    for batch in tqdm(eval_dataloader, desc='Evaluating'):
        batch = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            inputs = {'input_ids': batch[0],
                      'attention_mask': batch[1],
                      'token_type_ids': batch[2],
                      'intent_label_ids': batch[3],
                      'slot_labels_ids': batch[4]}
            tmp_eval_loss, intent_logits, slot_logits = model(**inputs)
            eval_loss += tmp_eval_loss.mean().item()

        nb_eval_steps += 1

        # Intent prediction
        if intent_preds is None:
            intent_preds = intent_logits.detach().cpu().numpy()
            out_intent_label_ids = inputs['intent_label_ids'].detach().cpu().numpy()
        else:
            intent_preds = np.append(intent_preds, intent_logits.detach().cpu().numpy(), axis=0)
            out_intent_label_ids = np.append(
                out_intent_label_ids, inputs['intent_label_ids'].detach().cpu().numpy(), axis=0)

        # Slot prediction
        if slot_preds is None:
            if args.use_crf:
                # decode() in `torchcrf` returns list with best index directly
                slot_preds = np.array(model.crf.decode(slot_logits))
            else:
                slot_preds = slot_logits.detach().cpu().numpy()

            out_slot_labels_ids = inputs["slot_labels_ids"].detach().cpu().numpy()
        else:
            if args.use_crf:
                slot_preds = np.append(slot_preds, np.array(model.crf.decode(slot_logits)), axis=0)
            else:
                slot_preds = np.append(slot_preds, slot_logits.detach().cpu().numpy(), axis=0)

            out_slot_labels_ids = np.append(out_slot_labels_ids, inputs["slot_labels_ids"].detach().cpu().numpy(),
                                            axis=0)

    eval_loss = eval_loss / nb_eval_steps
    results = {
        "loss": eval_loss
    }

    intent_preds = np.argmax(intent_preds, axis=1)

    if not args.use_crf:
        slot_preds = np.argmax(slot_preds, axis=2)

    slot_label_map = {i: label for i, label in enumerate(slot_label_lst)}
    out_slot_label_list = [[] for _ in range(out_slot_labels_ids.shape[0])]
    slot_preds_list = [[] for _ in range(out_slot_labels_ids.shape[0])]

    for i in range(out_slot_labels_ids.shape[0]):
        for j in range(out_slot_labels_ids.shape[1]):
            if out_slot_labels_ids[i, j] != args.ignore_index:
                out_slot_label_list[i].append(slot_label_map[out_slot_labels_ids[i][j]])
                slot_preds_list[i].append(slot_label_map[slot_preds[i][j]])

    total_result = compute_metrics(intent_preds, out_intent_label_ids, slot_preds_list, out_slot_label_list)
    results.update(total_result)

    print("***** Eval results *****")
    for key in sorted(results.keys()):
        print("  %s = %s", key, str(results[key]))
    return results['loss']
예제 #26
0
    def evaluate(self, mode):  #test
        # We use test dataset because semeval doesn't have dev dataset
        if mode == 'test':
            dataset = self.test_dataset
        elif mode == 'dev':
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        intent_preds = None
        slot_preds = None
        out_intent_label_ids = None
        out_slot_labels_ids = None

        self.model.eval()  #验证模式

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():  # 关闭梯度计算
                inputs = {
                    'input_ids_1': batch[0],
                    'attention_mask_1': batch[1],
                    'token_type_ids_1': batch[2],
                    'input_ids_2': batch[3],  # 第一行
                    'attention_mask_2': batch[4],  # 第二行
                    'token_type_ids_2': batch[5],
                    'intent_label_ids': batch[6]
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, (
                    intent_logits,
                    slot_logits,
                ) = outputs[:2]  # 模型输出的前两项为loss和logits
                # tmp_eval_loss, (slot_logits,), x= outputs[:2]# 模型输出的前两项为loss和logits

                eval_loss += tmp_eval_loss.mean().item()  # item()返回一个值
            nb_eval_steps += 1

            # Intent prediction
            if intent_preds is None:
                intent_preds = intent_logits.detach().cpu().numpy()
                out_intent_label_ids = inputs['intent_label_ids'].detach().cpu(
                ).numpy()
            else:
                intent_preds = np.append(intent_preds,
                                         intent_logits.detach().cpu().numpy(),
                                         axis=0)
                out_intent_label_ids = np.append(
                    out_intent_label_ids,
                    inputs['intent_label_ids'].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps  # 平均损失
        results = {"loss": eval_loss}

        # Intent result
        intent_preds = np.argmax(intent_preds,
                                 axis=1)  #axis=1:按行查找最大元素 axis=0:按列查找最大元素

        total_result = compute_metrics(intent_preds, out_intent_label_ids)
        # total_result = compute_metrics_slot(slot_preds_list, out_slot_label_list)

        results.update(total_result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))
        if mode == 'test':
            f = open('result/result.txt', 'a', encoding='utf-8')
            for key in sorted(results.keys()):
                f.write("  %s = %s" % (key, str(results[key])))
            f.write("\n")
            f.close()
        return results
예제 #27
0
mask_ts_[mask_tiles == ts5] = 1
mask_ts_[mask_tiles == ts6] = 1
mask_ts_[mask_tiles == ts7] = 1
mask_ts_[mask_tiles == ts8] = 1
mask_ts_[mask_tiles == ts9] = 1

#% Load model
model = load_model(filepath + 'unet_exp_' + str(exp) + '.h5', compile=False)
area = 11
# Prediction
ref_final, pre_final, prob_recontructed, ref_reconstructed, mask_no_considered_, mask_ts, time_ts = prediction(
    model, image_array, image_ref, final_mask, mask_ts_, patch_size, area)

# Metrics
cm = confusion_matrix(ref_final, pre_final)
metrics = compute_metrics(ref_final, pre_final)
print('Confusion  matrix \n', cm)
print('Accuracy: ', metrics[0])
print('F1score: ', metrics[1])
print('Recall: ', metrics[2])
print('Precision: ', metrics[3])

# Alarm area
total = (cm[1, 1] + cm[0, 1]) / len(ref_final) * 100
print('Area to be analyzed', total)

print('training time', end_training)
print('test time', time_ts)

#%% Show the results
# prediction of the whole image
예제 #28
0
def evaluate(args, model, eval_dataset, prefix=""):
    eval_output_dir = args.output_dir

    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {
                'input_ids':
                batch[0],
                'entity_a':
                batch[1],
                'entity_b':
                batch[2],
                'attention_mask':
                batch[3],
                'token_type_ids':
                batch[4] if args.model_type in ['bert', 'xlnet'] else None,
                # XLM and RoBERTa don't use segment_ids
                'labels':
                batch[5]
            }
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs['labels'].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids,
                                      inputs['labels'].detach().cpu().numpy(),
                                      axis=0)

    eval_loss = eval_loss / nb_eval_steps
    if args.output_mode == "classification":
        preds = np.argmax(preds, axis=1)
    elif args.output_mode == "regression":
        preds = np.squeeze(preds)

    with open(args.result_file, "w", encoding="utf-8") as fo:
        for p in preds:
            fo.write(str(p) + "\n")

    result = compute_metrics(args, preds, out_label_ids)

    logger.info("***** Eval results {} *****".format(prefix))
    for key in sorted(result.keys()):
        logger.info("  %s = %s", key, str(result[key]))

    return result, eval_loss
예제 #29
0
    if args.max_level is None:
        max_level = class_tree.get_height()
    else:
        max_level = args.max_level

    wstc = WSTC(input_shape=x.shape,
                class_tree=class_tree,
                max_level=max_level,
                sup_source=args.sup_source,
                y=y,
                vocab_sz=vocab_sz,
                word_embedding_dim=word_embedding_dim,
                block_thre=args.gamma,
                block_level=args.block_level)

    total_counts = sum(word_counts[ele] for ele in word_counts)
    total_counts -= word_counts[vocabulary_inv_list[0]]
    background_array = np.zeros(vocab_sz)
    for i in range(1, vocab_sz):
        background_array[i] = word_counts[vocabulary_inv[i]] / total_counts

    for level in range(max_level):
        y_pred = proceed_level(x, sequences, wstc, args, pretrain_epochs,
                               self_lr, decay, update_interval, delta,
                               class_tree, level, expand_num, background_array,
                               max_doc_length, max_sent_length, len_avg,
                               len_std, beta, alpha, vocabulary_inv,
                               common_words)
    write_output(y_pred, perm, class_tree, './' + args.dataset)
    compute_metrics(y_pred, y)
예제 #30
0
def evaluate(args, model, tokenizer, processor, prefix="", eval_split=None):
    eval_task_names = (args.task_name, )
    eval_outputs_dirs = (args.output_dir, )

    assert eval_split is not None

    results = {}
    if os.path.exists("/output/metrics.json"):
        with open("/output/metrics.json", "r") as f:
            existing_results = json.loads(f.read())
        f.close()
        results.update(existing_results)

    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset, examples = load_and_cache_examples(args,
                                                         eval_task,
                                                         tokenizer,
                                                         evaluate=True,
                                                         eval_split=eval_split)

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(
            eval_dataset) if args.local_rank == -1 else DistributedSampler(
                eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation {} on {} *****".format(
            prefix, eval_split))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        node_preds = None
        out_label_ids = None
        out_node_label_ids = None
        for batch in tqdm(eval_dataloader,
                          desc="Evaluating",
                          mininterval=10,
                          ncols=100):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)

            with torch.no_grad():
                inputs = {
                    'input_ids':
                    batch[0],
                    'attention_mask':
                    batch[1],
                    'token_type_ids':
                    batch[2] if args.model_type
                    in ['bert', 'xlnet', 'bert_mc'] else None,
                    # XLM don't use segment_ids
                    'proof_offset':
                    batch[3],
                    'node_label':
                    batch[4],
                    'labels':
                    batch[5]
                }
                outputs = model(**inputs)
                tmp_eval_loss, tmp_qa_loss, tmp_node_loss, logits, node_logits = outputs[:
                                                                                         5]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                node_preds = node_logits.detach().cpu().numpy()
                if not eval_split == "test":
                    out_label_ids = inputs['labels'].detach().cpu().numpy()
                    out_node_label_ids = inputs['node_label'].detach().cpu(
                    ).numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                node_preds = np.append(node_preds,
                                       node_logits.detach().cpu().numpy(),
                                       axis=0)
                if not eval_split == "test":
                    out_label_ids = np.append(
                        out_label_ids,
                        inputs['labels'].detach().cpu().numpy(),
                        axis=0)
                    out_node_label_ids = np.append(
                        out_node_label_ids,
                        inputs['node_label'].detach().cpu().numpy(),
                        axis=0)

        eval_loss = eval_loss / nb_eval_steps
        preds = np.argmax(preds, axis=1)
        node_preds = np.argmax(node_preds, axis=2)

        if not eval_split == "test":
            result = compute_metrics(eval_task, preds, out_label_ids)
            result_split = {}
            for k, v in result.items():
                result_split[k + "_{}".format(eval_split)] = v
            results.update(result_split)

            output_eval_file = os.path.join(
                eval_output_dir, "eval_results_{}.txt".format(eval_split))
            with open(output_eval_file, "w") as writer:
                logger.info("***** Eval results {} on {} *****".format(
                    prefix, eval_split))
                for key in sorted(result_split.keys()):
                    logger.info("  %s = %s", key, str(result_split[key]))
                    writer.write("%s = %s\n" % (key, str(result_split[key])))

        # predictions
        output_pred_file = os.path.join(
            eval_output_dir, "predictions_{}.lst".format(eval_split))
        with open(output_pred_file, "w") as writer:
            logger.info("***** Write predictions {} on {} *****".format(
                prefix, eval_split))
            for pred in preds:
                writer.write("{}\n".format(processor.get_labels()[pred]))

        # prediction nodes
        output_node_pred_file = os.path.join(
            eval_output_dir, "prediction_nodes_{}.lst".format(eval_split))
        with open(output_node_pred_file, "w") as writer:
            logger.info("***** Write predictions {} on {} *****".format(
                prefix, eval_split))
            for node_gold, node_pred in zip(out_node_label_ids, node_preds):
                node_gold = node_gold[np.where(node_gold != -100)[0]]
                node_pred = node_pred[:len(node_gold)]
                writer.write(str(list(node_pred)) + "\n")

    return results