예제 #1
0
파일: main.py 프로젝트: mlbench/mlbench
def main():
    options = get_options()

    options = initialize(options)
    options = create_dataset(options, train=True)
    options = create_dataset(options, train=False)

    model = get_model(options)

    optimizer = get_optimizer(options, model)

    scheduler = get_scheduler(options, optimizer)

    # Criterions are like `torch.nn.CrossEntropyLoss()`
    criterion = get_criterion(options, model)

    metrics = get_metrics(options)

    model = convert_dtype(options.dtype, model)
    criterion = convert_dtype(options.dtype, criterion)
    if options.use_cuda:
        model.cuda()
        criterion.cuda()

    options = checkpoint.maybe_resume(options, model, optimizer, scheduler)

    controlflow = get_controlflow(options)
    controlflow(model=model, optimizer=optimizer, criterion=criterion,
                metrics=metrics, scheduler=scheduler, options=options)
예제 #2
0
파일: main.py 프로젝트: mrerabek/mlbench
def main():
    options = get_options()

    options = initialize(options)
    options = create_dataset(options, train=True)
    options = create_dataset(options, train=False)

    model = get_model(options)

    optimizer = get_optimizer(options, model)

    scheduler = get_scheduler(options, optimizer)

    # Criterions are like `torch.nn.CrossEntropyLoss()`
    criterion = get_criterion(options)

    metrics = get_metrics(options)

    if options.use_cuda:
        model.cuda()
        criterion.cuda()

    options = checkpoint.maybe_resume(options, model, optimizer, scheduler)

    controlflow = get_controlflow(options)
    controlflow(model=model,
                optimizer=optimizer,
                criterion=criterion,
                metrics=metrics,
                scheduler=scheduler,
                options=options)
def construct_policy_frontier(preds_df, outcomes_df, reward_params,
                              validate=True,
                              test_outcomes_df=None,
                              num_trials=20):

    # Get names of outcome columns
    outcome_cols = outcomes_df.drop(columns=['example_id']).columns

    # Get all reward parameter combinations to be tested
    param_names = list(reward_params[0].keys())

    metric_names = None

    # Cohorts for which we are going to compute policy performance frontiers
    cohorts_to_evaluate = ['train', 'val'] if validate else ['train', 'test']

    frontiers_dict = {}

    for cohort in cohorts_to_evaluate:

        logging.info(f"Evaluating models on {cohort} cohort...")

        all_stats = []
        for trial in range(num_trials):
            logging.info(f"Calculating metrics for split {trial}")

            is_train = (cohort == 'train')
            preds_for_split_df = preds_df[(preds_df['split_ct'] == trial) &
                                        (preds_df['is_train'] == is_train)].reset_index(drop=True)

            for i, combo in enumerate(reward_params):

                logging.info(f"Evaluating at parameter setting {i} / {len(reward_params)}")

                preds_for_split_df = preds_for_split_df.rename(columns={
                    f'predicted_prob_{outcome}': outcome for outcome in outcome_cols                     
                })[['example_id'] + list(outcome_cols)]

                preds_for_split_df['action'] = preds_for_split_df.apply(
                    lambda x: get_policy_for_row(x, combo), axis=1
                )
                outcomes_to_merge_df = outcomes_df if cohort != 'test' else test_outcomes_df
                policy_outcomes_df = preds_for_split_df[['example_id', 'action']].merge(outcomes_to_merge_df, on='example_id', how='inner')

                metrics = get_metrics(policy_outcomes_df)
                if metric_names is None:
                    metric_names = list(metrics.keys())

                curr_reward_param_list = [combo[param_name] for param_name in param_names]
                stats_for_trial_combo = [metrics[metric_name] for metric_name in metric_names]

                all_stats.append(curr_reward_param_list + stats_for_trial_combo)
    
        all_stats = pd.DataFrame(all_stats, columns=param_names + metric_names)
        all_stats_means = all_stats.groupby(param_names).mean().reset_index()

        frontiers_dict[cohort] = all_stats_means

    return frontiers_dict
def get_stats_for_train_val_preds(train_preds_outcomes, val_preds_outcomes,
                                  threshold_setting):

    outcome_order = ['NIT', 'SXT', 'CIP', 'LVX']

    train_preds_outcomes['action'] = train_preds_outcomes.apply(
        lambda x: get_policy(x, threshold_setting, outcomes=outcome_order),
        axis=1)

    val_preds_outcomes['action'] = val_preds_outcomes.apply(
        lambda x: get_policy(x, threshold_setting, outcomes=outcome_order),
        axis=1)

    train_metrics = get_metrics(train_preds_outcomes)
    val_metrics = get_metrics(val_preds_outcomes)

    return train_metrics, val_metrics
예제 #5
0
def import_losses_and_metrics(config):
    """Import losses and metrics from configuration
    """
    loss_dict = dict([(loss['task_id'], loss['name'])
                      for loss in config['losses']])
    losses = custom_losses.get_losses(loss_dict)
    metric_dict = dict([(metric['task_id'], metric['name'])
                        for metric in config['metrics']])
    metrics = custom_metrics.get_metrics(metric_dict)
    return losses, metrics
예제 #6
0
    def on_epoch_end(self, epoch, logs={}):
        self.epoch_index += 1
        self.losses.append(logs['loss'])
        self.val_losses.append(logs['val_loss'])
        loss_line, = self.ax.plot(range(1, self.epoch_index + 1),
                                  self.losses,
                                  'g-',
                                  label='Training Loss')
        val_loss_line, = self.ax.plot(range(1, self.epoch_index + 1),
                                      self.val_losses,
                                      'r-',
                                      label='Validation Loss')
        self.ax.legend(handles=[loss_line, val_loss_line])
        self.ax.set_ylim(
            (MetricsCallback.GRAPH_MIN, MetricsCallback.GRAPH_MAX))
        self.fig.canvas.draw()
        if logs['val_loss'] < self.best_val_loss:
            self.val_loss_reductions += 1
            self.best_val_loss = logs['val_loss']
            self.best_weights = self.model.get_weights()
            print '\r    \r'  # to remove the previous line of verbose output of model fit
            #time.sleep(0.1)
            info('Found lower val loss for epoch {} => {}'.format(
                self.epoch_index, round(logs['val_loss'], 5)))
            if self.val_loss_reductions % MetricsCallback.EPOCHS_BEFORE_VALIDATION == 0:

                info('Validation Loss Reduced {} times'.format(
                    self.val_loss_reductions))
                info('Evaluating on Validation Data')
                Xv_file, yv_file = get_data_files(self.base_load_directory,
                                                  self.classifications_type,
                                                  self.level, 'validation')
                Xv, yv = get_data(Xv_file, yv_file, mmap=True)
                yvp = self.model.predict_generator(generator=batch_generator(
                    Xv_file,
                    yv_file,
                    self.batch_size,
                    is_mlp=self.is_mlp,
                    validate=True),
                                                   max_q_size=QUEUE_SIZE,
                                                   val_samples=yv.shape[1])
                yvp_binary = get_binary_0_5(yvp)
                info('Generating Validation Metrics')
                validation_metrics = get_metrics(yv, yvp, yvp_binary)
                print "****** Validation Metrics: Cov Err: {:.3f} | Top 3: {:.3f} | Top 5: {:.3f} | F1 Micro: {:.3f} | F1 Macro: {:.3f}".format(
                    validation_metrics['coverage_error'],
                    validation_metrics['top_3'], validation_metrics['top_5'],
                    validation_metrics['f1_micro'],
                    validation_metrics['f1_macro'])
                self.metrics_dict[self.epoch_index] = validation_metrics
예제 #7
0
def test_detect_metrics():
    tps, fns, fps = 0, 0, 0
    for img_path in glob(test_folder + "/*.jpg"):
        json_path = str(img_path)[:-3] + "json"
        img = cv2.imread(str(img_path))
        results = engine.diff_areas(img)
        predictions = [item['bbox'] for item in results]
        ground_truths = get_bbox_from_json(read_json_file(json_path))
        tp, fn, fp = get_metrics(predictions, ground_truths, iou_thres=0.3)
        tps += tp
        fns += fn
        fps += fp
    precision = tps / (tps + fps)
    recall = tps / (tps + fns)
    f1_score = 2 * precision * recall / (precision + recall)
    print(
        f"precision: {precision:.4f}, recall: {recall:.4f}, f1_score: {f1_score:.4f}"
    )
def run_nb_streams(
    stream_trained,
    stream_untrained,
    model,
    drift_detector,
    batch_size=32,
    print_every=1,
    device="cpu",
):
    """
    Runs the trained stream to collect the labels, and then runs the untrained stream
    to detect changes between the models.

    Args:
        stream_trained (WOSStream): the Web of Science stream on which the model was trained
        stream_untrained (WOSStream): the Web of Science stream to be compared against the trained one
        model (NaiveBayes): the Naive Bayes model to evaluate
        drift_detector: the drift detector used to detect concept drift
        batch_size (int): number of batches
        print_every (int): how often we print
        device (str): cpu or cuda

    Returns:
        a list of accuracies plus, potential warnings or drifts

    """
    i = 0
    # Accuracies list (tuples of accuracy, and drift level)
    trained_accuracies = []
    labels = []
    print("Running trained stream...")
    while stream_trained.has_more_samples():
        # Get the batch from the stream
        if stream_trained.n_remaining_samples() >= batch_size:
            x_, _ = stream_trained.next_sample(batch_size)
        else:
            break

        # Unpack x_ (we do not need the sequence lengths for NB)
        x = x_[0].numpy()
        # Take the maximum over the axis 1
        x = np.amax(x, axis=1)

        # Get the predictions and metrics
        y_pred = model.predict(x)
        labels.append(y_pred)

        # Print if necessary
        if i % print_every == print_every - 1:
            print("Accuracy: {}".format(1.0))

            # Add to drift detector
            drift_detector.add_element(1 -
                                       np.random.uniform(low=0.9, high=1.0))
            if drift_detector.detected_warning_zone():
                trained_accuracies.append((1.0, "W"))
                print("Warning zone")
            elif drift_detector.detected_change():
                trained_accuracies.append((1.0, "D"))
                print("Drift detected")
            else:
                trained_accuracies.append((1.0, "N"))

        i += 1

    i = 0
    running_acc = 0.0
    # Accuracies list (tuples of accuracy, and drift level)
    untrained_accuracies = []
    print("Running untrained stream...")
    while stream_untrained.has_more_samples():
        # Get the batch from the stream
        if stream_untrained.n_remaining_samples() >= batch_size:
            x_, _ = stream_untrained.next_sample(batch_size)
            y = labels[i]
        else:
            break

        # Unpack x_ (we do not need the sequence lengths for NB)
        x = x_[0].numpy()
        # Take the maximum over the axis 1
        x = np.amax(x, axis=1)

        # Get the predictions and metrics
        y_pred = model.predict(x)
        metrics = get_metrics(labels=y,
                              predictions=y_pred,
                              no_labels=stream_untrained.n_classes)
        accuracy = metrics["accuracy"]

        # Print if necessary
        running_acc += accuracy
        if i % print_every == print_every - 1:
            print("Accuracy: {}".format(running_acc / print_every))
            running_acc = 0.0

            # Add to drift detector
            drift_detector.add_element(1 - accuracy)
            if drift_detector.detected_warning_zone():
                untrained_accuracies.append((accuracy, "W"))
                print("Warning zone")
            elif drift_detector.detected_change():
                untrained_accuracies.append((accuracy, "D"))
                print("Drift detected")
            else:
                untrained_accuracies.append((accuracy, "N"))

        i += 1

    return trained_accuracies, untrained_accuracies
예제 #9
0
# Run python evaluate_model.py [path to array of predictions] [path to test/validation dataframe]
import sys
from PIL import Image
from random import randint
import numpy as np
import pandas as pd
import math
import warnings
import pdb
from matplotlib import pyplot as plt
from utils.metrics import get_metrics
from utils.data_loader_utils import read_imgs_keraspp, read_imgs_keraspp_stacked

if len(sys.argv) != 3:
    print(
        "You should run: python evaluate_model.py [path to array of predictions] [path to test dataframe]"
    )
    sys.exit()

predictions = np.load(sys.argv[1])
test_df = pd.read_csv(sys.argv[2])
x_true, y_true = read_imgs_keraspp(test_df)
y_true = y_true.flatten()
y_pred = predictions.flatten()

get_metrics(y_true, y_pred, binarized=False)
예제 #10
0
                                                 signature=False,
                                                 threshold=threshold)
    for compare in metric_comparisons:
        metric_df = shuffle_metric_results[compare].assign(permutation=i)
        all_shuffle_results[compare].append(metric_df)

# In[7]:

# Get ROC curve information for model sets
roc_scores = []
roc_curve_data = []
for split in roc_model_split_focus:
    results_subset_df = results_df.query("Metadata_model_split == @split")
    for shuffle in [True, False]:
        roc_auc_val, roc_df = get_metrics(df=results_subset_df,
                                          return_roc_curve=True,
                                          shuffle=shuffle)

        roc_scores.append(pd.Series([roc_auc_val, split, shuffle]))
        roc_curve_data.append(
            roc_df.assign(model_split=split, shuffled=shuffle))

roc_scores_df = pd.DataFrame(roc_scores)
roc_scores_df.columns = ["roc_auc", "model_split", "shuffled"]
roc_curve_data_df = pd.concat(roc_curve_data).reset_index(drop=True)

# In[8]:

# Output performance results
for compare in metric_comparisons:
    full_results_df = real_metric_results[compare]
def run_stream_lstm(
    stream,
    model,
    drift_detector,
    batch_size=1,
    print_every=1,
    noise_stds=None,
    warm_start=None,
    device="cpu",
):
    """
    Runs a stream on the LSTM model using the given drift detector.

    Args:
        stream (WOSStream): the Web of Science stream to be run
        model (LSTM): the LSTM model to evaluate
        drift_detector: the drift detector used to detect concept drift
        batch_size (int): number of batches
        print_every (int): how often we print
        noise_stds (list): a list of standard deviations for the gradual noise.
            If none, no noise is added
        warm_start (int): after which batch we start adding noise.
        device (str): cpu or cuda

    Returns:
        a list of accuracies plus, potential warnings or drifts
    """
    i = 0
    running_acc = 0.0
    # Accuracies list (tuples of accuracy, and drift level)
    accuracies = []
    while stream.has_more_samples():
        # Get the batch from the stream
        if stream.n_remaining_samples() >= batch_size:
            x_, y = stream.next_sample(batch_size)
        else:
            break

        x, seq_lens = x_
        # Add noise if we have standard deviations
        if i >= warm_start and noise_stds is not None:
            print("Adding noise")
            std = torch.zeros_like(x) + noise_stds[i - warm_start]
            noise = torch.normal(0, std)
            x = x + noise

        # Move the batch to device
        x = x.to(device)
        y = torch.from_numpy(y).to(device)
        seq_lens = torch.tensor(seq_lens).to(device)

        # Get predictions and accuracy
        predictions, _ = model((x, seq_lens))
        metrics = get_metrics(
            labels=y, predictions=predictions, no_labels=stream.n_classes
        )
        accuracy = metrics["accuracy"]

        # Print if necessary
        running_acc += accuracy
        if i % print_every == print_every - 1:
            print("Accuracy: {}".format(running_acc / print_every))
            running_acc = 0.0

            # Add to drift detector
            drift_detector.add_element(1 - accuracy)
            if drift_detector.detected_warning_zone():
                accuracies.append((accuracy, "W"))
                print("Warning zone")
            elif drift_detector.detected_change():
                accuracies.append((accuracy, "D"))
                print("Drift detected")
            else:
                accuracies.append((accuracy, "N"))

        i += 1

    return accuracies
def run_stream_nb(
    stream,
    model,
    drift_detector,
    batch_size=1,
    print_every=1,
    noise_stds=None,
    warm_start=None,
    device="cpu",
):
    """
    Runs a stream on the LSTM model using the given drift detector.

    Args:
        stream (WOSStream): the Web of Science stream to be run
        model (NaiveBayes): the Naive Bayes model to evaluate
        drift_detector: the drift detector used to detect concept drift
        batch_size (int): number of batches
        print_every (int): how often we print
        noise_stds (list): a list of standard deviations for the gradual noise.
            If none, no noise is added
        warm_start (int): after which batch we start adding noise.
        device (str): cpu or cuda

    Returns:
        a list of accuracies plus, potential warnings or drifts
    """
    i = 0
    running_acc = 0.0
    # Accuracies list (tuples of accuracy, and drift level)
    accuracies = []
    while stream.has_more_samples():
        # Get the batch from the stream
        if stream.n_remaining_samples() >= batch_size:
            x_, y = stream.next_sample(batch_size)
        else:
            break

        # Unpack x_ (we do not need the sequence lengths for NB)
        x = x_[0].numpy()
        # Take the maximum over the axis 1
        x = np.amax(x, axis=1)
        # Add noise if we have standard deviations
        if i >= warm_start and noise_stds is not None:
            print("Adding noise")
            noise = np.random.normal(0, noise_stds[i - warm_start], x.shape)
            x = x + noise

        # Get the predictions and metrics
        y_pred = model.predict(x)
        metrics = get_metrics(labels=y, predictions=y_pred, no_labels=stream.n_classes)
        accuracy = metrics["accuracy"]

        # Print if necessary
        running_acc += accuracy
        if i % print_every == print_every - 1:
            print("Accuracy: {}".format(running_acc / print_every))
            running_acc = 0.0

            # Add to drift detector
            drift_detector.add_element(1 - accuracy)
            if drift_detector.detected_warning_zone():
                accuracies.append((accuracy, "W"))
                print("Warning zone")
            elif drift_detector.detected_change():
                accuracies.append((accuracy, "D"))
                print("Drift detected")
            else:
                accuracies.append((accuracy, "N"))

        i += 1

    return accuracies
예제 #13
0
    def _train_epoch(self, epoch):

        self.model.train()
        if self.rank == 0:
            wrt_mode = 'train'

            y_true = []
            y_score = []
            y_score_b = []

            tic = time.time()
            self._reset_metrics()
        tbar = tqdm(self.train_loader, ncols=160)
        for batch_idx, (img, gt, Sgt, Lgt, mask) in enumerate(tbar):
            if self.rank == 0: self.data_time.update(time.time() - tic)
            img = img.to(self.rank, non_blocking=True)
            gt = gt.to(self.rank, non_blocking=True)
            mask = mask.to(self.rank, non_blocking=True)
            Sgt = Sgt.to(self.rank, non_blocking=True)
            Lgt = Lgt.to(self.rank, non_blocking=True)
            # LOSS & OPTIMIZE

            self.optimizer.zero_grad()
            with torch.cuda.amp.autocast(enabled=True):
                if self.gt_num == 1:
                    predict = self.model(img)
                    loss = self.loss(predict, gt)
                elif self.gt_num == 2:
                    s, predict = self.model(img)
                    loss = self.loss(predict, gt, s, Sgt)
                else:
                    l, s, predict = self.model(img)
                    loss = self.loss(predict, gt, s, Sgt, l, Lgt)
            self.scaler.scale(loss).backward()
            self.scaler.step(self.optimizer)
            self.scaler.update()
            if self.rank == 0:
                self.total_loss.update(loss.item())
                # measure elapsed time
                self.batch_time.update(time.time() - tic)
                tic = time.time()
                # LOGGING & TENSORBOARD
                if batch_idx % self.log_step == 0:
                    wrt_step = (epoch - 1) * len(self.train_loader) + batch_idx

                predict = torch.sigmoid(predict).cpu().detach().numpy().ravel()
                predict_b = np.where(predict >= 0.5, 1, 0)
                # predict_b = torch.where(predict >= 0.5, torch.full_like(predict, 1), torch.full_like(predict, 0))
                mask = mask.cpu().detach().numpy().ravel()
                y_true = gt.cpu().detach().numpy().ravel()[mask == 1]
                y_score = predict[mask == 1]
                y_score_b = predict_b[mask == 1]

                # FOR EVAL and INFO
                if self.rank == 0:
                    self._update_seg_metrics(*eval_metrics(y_true, y_score_b))
                    metrics = get_metrics(self.tn, self.fp, self.fn, self.tp)
                    tbar.set_description(
                        'TRAIN ({}) | Loss: {:.4f} | Acc {:.4f} Pre {:.4f} Sen {:.4f} Spe {:.4f} f1 {:.4f} IOU {:.4f} |B {:.2f} D {:.2f} |'.format(
                            epoch, self.total_loss.average, *metrics.values(), self.batch_time.average,
                            self.data_time.average))

            # METRICS TO TENSORBOARD
        if self.rank == 0:
            metrics = get_metrics_full(self.tn, self.fp, self.fn, self.tp, y_true, y_score, y_score_b)
            self.writer.add_scalar(f'{wrt_mode}/loss', self.total_loss.average, epoch)
            for k, v in list(metrics.items())[:-1]:
                self.writer.add_scalar(f'{wrt_mode}/{k}', v, epoch)
            for i, opt_group in enumerate(self.optimizer.param_groups):
                self.writer.add_scalar(f'{wrt_mode}/Learning_rate_{i}', opt_group['lr'], epoch)
            # self.writer.add_scalar(f'{self.wrt_mode}/Momentum_{k}', opt_group['momentum'], self.wrt_step)

        self.lr_scheduler.step()
예제 #14
0
        n_jobs=1,
        #eta0 is the learning rate when we use constant configuration
        random_state=SVM_SEED,
        learning_rate='optimal',
        eta0=0.0,
        class_weight=SVM_CLASS_WEIGHTS,
        warm_start=False),
    n_jobs=1)
clf.fit(X, y)

# Training Metrics
info('Evaluating on Training Data')
yp = clf.predict(X)
yp_score = clf.decision_function(X)
info('Calculating training metrics')
training_metrics = get_metrics(y, yp_score, yp)
print "** Training Metrics: Cov Err: {:.3f}, Avg Labels: {:.3f}, \n\t\t Top 1: {:.3f}, Top 3: {:.3f}, Top 5: {:.3f}, \n\t\t F1 Micro: {:.3f}, F1 Macro: {:.3f}, Total Pos: {:,d}".format(
    training_metrics['coverage_error'],
    training_metrics['average_num_of_labels'], training_metrics['top_1'],
    training_metrics['top_3'], training_metrics['top_5'],
    training_metrics['f1_micro'], training_metrics['f1_macro'],
    training_metrics['total_positive'])

# Get the validation data
info('Getting Valdiation Data')
Xv = pickle.load(open(data_validation_location, 'r'))
validation_data_docids = pickle.load(open(data_validation_docids_location,
                                          "r"))
yv = get_label_data(classifications, validation_data_docids,
                    doc_classification_map)
def run_lstm_streams(
    stream_trained,
    stream_untrained,
    model,
    drift_detector,
    batch_size=32,
    print_every=1,
    device="cpu",
):
    """
    Runs the trained stream to collect the labels, and then runs the untrained stream
    to detect changes between the models.

    Args:
        stream_trained (WOSStream): the Web of Science stream on which the model was trained
        stream_untrained (WOSStream): the Web of Science stream to be compared against the trained one
        model (LSTM): the LSTM model to evaluate
        drift_detector: the drift detector used to detect concept drift
        batch_size (int): number of batches
        print_every (int): how often we print
        device (str): cpu or cuda

    Returns:
        a list of accuracies plus, potential warnings or drifts
    """
    i = 0
    # Accuracies list (tuples of accuracy, and drift level)
    trained_accuracies = []
    labels = []
    print("Running trained stream...")
    while stream_trained.has_more_samples():
        # Get the batch from the stream
        if stream_trained.n_remaining_samples() >= batch_size:
            x_, _ = stream_trained.next_sample(batch_size)
        else:
            break

        x, seq_lens = x_
        # Move the batch to device
        x = x.to(device)
        seq_lens = torch.tensor(seq_lens).to(device)

        # Get predictions and add them to labels
        predictions, _ = model((x, seq_lens))
        labels.append(predictions.argmax(dim=1))

        # Print if necessary
        if i % print_every == print_every - 1:
            print("Accuracy: {}".format(1.0))

            # Add to drift detector
            drift_detector.add_element(1 -
                                       np.random.uniform(low=0.9, high=1.0))
            if drift_detector.detected_warning_zone():
                trained_accuracies.append((1.0, "W"))
                print("Warning zone")
            elif drift_detector.detected_change():
                trained_accuracies.append((1.0, "D"))
                print("Drift detected")
            else:
                trained_accuracies.append((1.0, "N"))

        i += 1

    i = 0
    running_acc = 0.0
    # Accuracies list (tuples of accuracy, and drift level)
    untrained_accuracies = []
    print("Running untrained stream...")
    while stream_untrained.has_more_samples():
        # Get the batch from the stream
        if stream_untrained.n_remaining_samples() >= batch_size:
            x_, _ = stream_untrained.next_sample(batch_size)
            y = labels[i]
        else:
            break

        x, seq_lens = x_
        # Move the batch to device
        x = x.to(device)
        seq_lens = torch.tensor(seq_lens).to(device)

        # Get predictions and accuracy
        predictions, _ = model((x, seq_lens))
        metrics = get_metrics(
            labels=y.detach().numpy(),
            predictions=predictions,
            no_labels=stream_untrained.n_classes,
        )
        accuracy = metrics["accuracy"]

        # Print if necessary
        running_acc += accuracy
        if i % print_every == print_every - 1:
            print("Accuracy: {}".format(running_acc / print_every))
            running_acc = 0.0

            # Add to drift detector
            drift_detector.add_element(1 - accuracy)
            if drift_detector.detected_warning_zone():
                untrained_accuracies.append((accuracy, "W"))
                print("Warning zone")
            elif drift_detector.detected_change():
                untrained_accuracies.append((accuracy, "D"))
                print("Drift detected")
            else:
                untrained_accuracies.append((accuracy, "N"))

        i += 1

    return trained_accuracies, untrained_accuracies
예제 #16
0
파일: test_degan.py 프로젝트: ferugit/degan
def main():

    args = parse_arguments()

    # Model and experiment identification
    model_id = args.arc
    experiment_path = os.path.join(args.checkpoint, args.arc)
    model_path = os.path.join(experiment_path, model_id + '_entire.pt')
    print("Testing model: " + args.arc + "\n")

    # Reporter
    reporter = Reporter(experiment_path, model_id + '_report.json')
    reporter.load(os.path.join(experiment_path, model_id + '_report.json'))

    # Augments
    augments = ['white_noise']
    augments = {key: False for key in augments}

    # Set seed
    train.set_seed(args.seed)

    # Read test df
    _, test_dataset = loader.load_train_partitions(
        args.partition_path,
        window_size=int(args.time_window * args.sampling_rate),
        fs=args.sampling_rate,
        augments=augments)

    print("Test data information")
    print(test_dataset)

    # Generate data loaders
    test_loader = DataLoader(test_dataset,
                             shuffle=False,
                             batch_size=args.batch_size,
                             drop_last=False)

    # Build model
    model = torch.load(model_path)

    if ("sgru" in args.arc):
        net_class = 'rnn'
    else:
        net_class = 'cnn'

    # Select device
    if (torch.cuda.is_available() and args.cuda):
        device = torch.device("cuda")
        model.to(device)
        print("Running on GPU")
    else:
        device = torch.device("cpu")
        print("Running on CPU")

    # Loss function
    criterion = nn.CrossEntropyLoss()

    # Test model
    labels, predictions, metrics = train.test_model(model,
                                                    test_loader,
                                                    criterion,
                                                    device,
                                                    args.batch_size,
                                                    net_class=net_class)

    # Save confussión matrix
    target_names = []
    dict_path = os.path.join(args.partition_path, 'classes_index.json')
    classes_index = json.load(open(dict_path, 'r'))
    for index in range(len(predictions[0])):
        target_names.append(classes_index[str(index)])

    result = analyzer.get_metrics(labels,
                                  predictions,
                                  target_names=target_names)
    analyzer.plot_confusion_matrix(result[1],
                                   target_names,
                                   os.path.join(experiment_path,
                                                model_id + '_confusion.png'),
                                   normalize=False)

    # Store metrics
    test_metrics = {
        'loss': metrics['test_loss'],
        'accuracy': metrics['test_accuracy'],
        'report': result[0]
    }
    reporter.report('test_metrics', test_metrics)
예제 #17
0
    def _valid_epoch(self, epoch):
        if self.rank == 0:
            logger.info('\n###### EVALUATION ######')
            wrt_mode = 'val'
            self._reset_metrics()
            val_img = []
            y_true = []
            y_score = []
            y_score_b = []
        self.model.eval()
        tbar = tqdm(self.val_loader, ncols=160)
        with torch.no_grad():

            for batch_idx, (img, gt, Sgt, Lgt, mask) in enumerate(tbar):
                img = img.to(self.rank, non_blocking=True)
                gt = gt.to(self.rank, non_blocking=True)
                mask = mask.to(self.rank, non_blocking=True)
                Sgt = Sgt.to(self.rank, non_blocking=True)
                Lgt = Lgt.to(self.rank, non_blocking=True)
                # LOSS

                with torch.cuda.amp.autocast(enabled=True):
                    if self.gt_num == 1:
                        predict = self.model(img)
                        loss = self.loss(predict, gt)
                    elif self.gt_num == 2:
                        s, predict = self.model(img)
                        loss = self.loss(predict, gt, s, Sgt)
                    else:
                        l, s, predict = self.model(img)
                        loss = self.loss(predict, gt, s, Sgt, l, Lgt)
                if self.rank == 0:
                    self.total_loss.update(loss.item())
                    predict = torch.sigmoid(predict).cpu().detach().numpy()
                    predict_b = np.where(predict >= 0.5, 1, 0)
                    mask = mask.cpu().detach().numpy().ravel()
                    y_true = gt.cpu().detach().numpy().ravel()[mask == 1]
                    y_score = predict.ravel()[mask == 1]
                    y_score_b = predict_b.ravel()[mask == 1]
                    # FOR EVAL and INFO
                    self._update_seg_metrics(*eval_metrics(y_true, y_score_b))
                    metrics = get_metrics(self.tn, self.fp, self.fn, self.tp)
                    tbar.set_description(
                        'EVAL ({}) | Loss: {:.4f} | Acc {:.4f} Pre {:.4f} Sen {:.4f} Spe {:.4f} f1 {:.4f} IOU {:.4f} |'.format(
                            epoch, self.total_loss.average, *metrics.values()))

                    # LIST OF IMAGE TO VIZ (15 images)

                    if batch_idx < 10:
                        val_img.extend([img[0].data.cpu(), gt[0].data.cpu(), torch.tensor(predict_b[0])])
            if self.rank == 0:
                val_img = torch.stack(val_img, 0)
                val_img = make_grid(val_img, nrow=3, padding=2)
                if self.show is True:
                    plt.figure(figsize=(12, 36))
                    plt.imshow(transforms.ToPILImage()(val_img.squeeze(0)).convert('L'), cmap='gray')
                    plt.show()

                # LOGGING & TENSORBOARD
                wrt_step = epoch
                metrics = get_metrics_full(self.tn, self.fp, self.fn, self.tp, y_true, y_score, y_score_b)
                self.writer.add_image(f'{wrt_mode}/inputs_targets_predictions', val_img, wrt_step)
                self.writer.add_scalar(f'{wrt_mode}/loss', self.total_loss.average, wrt_step)
                for k, v in list(metrics.items())[:-1]:
                    self.writer.add_scalar(f'{wrt_mode}/{k}', v, wrt_step)
                log = {
                    'val_loss': self.total_loss.average,
                    **metrics
                }
        return log
예제 #18
0
            max_q_size=QUEUE_SIZE)

        # using the recorded weights of the best recorded validation loss
        last_model_weights = model.get_weights()
        info('Evaluating on Validation Data using saved best weights')
        model.set_weights(metrics_callback.best_weights)
        yvp = model.predict_generator(generator=batch_generator(Xv_file,
                                                                yv_file,
                                                                NN_BATCH_SIZE,
                                                                is_mlp=True,
                                                                validate=True),
                                      max_q_size=QUEUE_SIZE,
                                      val_samples=len(validation_docs_list))
        yvp_binary = get_binary_0_5(yvp)
        info('Generating Validation Metrics')
        validation_metrics = get_metrics(yv, yvp, yvp_binary)
        print "****** Validation Metrics: Cov Err: {:.3f} | Top 3: {:.3f} | Top 5: {:.3f} | F1 Micro: {:.3f} | F1 Macro: {:.3f}".format(
            validation_metrics['coverage_error'], validation_metrics['top_3'],
            validation_metrics['top_5'], validation_metrics['f1_micro'],
            validation_metrics['f1_macro'])
        best_validation_metrics = validation_metrics

        time.sleep(0.2)
        param_results_dict[GLOBAL_VARS.NN_MODEL_NAME] = dict()
        param_results_dict[GLOBAL_VARS.NN_MODEL_NAME][
            'best_validation_metrics'] = best_validation_metrics
        param_results_dict[GLOBAL_VARS.NN_MODEL_NAME]['epochs'] = len(
            history.history['val_loss'])
        param_results_dict[GLOBAL_VARS.NN_MODEL_NAME][
            'best_weights'] = metrics_callback.best_weights
        param_results_dict[GLOBAL_VARS.NN_MODEL_NAME][
def evaluate_model(model_dicts_list,
                   train_val_splits,
                   outcomes_df,
                   reward_params,
                   test_outcomes_df=None,
                   include_defer=False):

    all_stats = defaultdict(list)
    reward_param_names, metric_names = sorted(reward_params[0].keys()), None

    param_settings_array = np.array(
        [[param_setting[param_name] for param_name in reward_param_names]
         for param_setting in reward_params])

    for train_val_split, model_dict in zip(train_val_splits, model_dicts_list):

        for cohort_name, cohort_df in train_val_split.items():
            stats_for_param = []

            for param_setting in reward_params:
                logging.info("Storing primary outcomes...")

                # Compute metrics of interest here
                current_model = model_dict[tuple(param_setting.items())]
                cohort_actions_df = current_model.get_actions(cohort_df)

                if cohort_name == 'test':
                    cohort_actions_outcomes_df = cohort_actions_df.merge(
                        test_outcomes_df, on='example_id')
                else:
                    cohort_actions_outcomes_df = cohort_actions_df.merge(
                        outcomes_df, on='example_id')

                metrics = get_metrics(cohort_actions_outcomes_df) if not (
                    include_defer) else get_metrics_with_deferral(
                        cohort_actions_outcomes_df)

                if metric_names is None:
                    metric_names = list(metrics.keys())

                stats_for_param.append(
                    [metrics[name] for name in metric_names])

            all_stats[cohort_name].append(np.array(stats_for_param))

    columns = reward_param_names + metric_names + [
        f'{metric}_stdev' for metric in metric_names
    ]
    stats_dict_final = {}

    for cohort_name, stats_for_cohort in all_stats.items():
        stats_means = np.array(stats_for_cohort).mean(axis=0)
        stats_stdevs = np.array(stats_for_cohort).std(axis=0)

        stats_final = np.hstack(
            [np.array(param_settings_array), stats_means, stats_stdevs])

        logging.info("Completed calculating means")
        stats_dict_final[cohort_name] = pd.DataFrame(stats_final,
                                                     columns=columns)

    return stats_dict_final
예제 #20
0
def run_stream_with_mapping(
    stream, model, mapping, batch_size=1, print_every=1, device="cpu",
):
    """ Runs a stream with a mapping to convert from the stream's
    inputs embeddings space to the embedding space outputted by
    the mapping.

    Args:
        stream (WOSStream): the Web of Science stream to be run
        model (LSTM): the LSTM model to evaluate
        mapping (Mapping): the mapping used to change embedding spaces
        batch_size (int): number of batches
        print_every (int): how often we print
        device (str): cpu or cuda

    Returns:
        a list of accuracies

    """
    # Initialize variables for tracking
    i = 0
    running_acc = 0.0
    accuracies = []
    if type(mapping) == np.ndarray:
        mapping = torch.tensor(mapping.mapping, dtype=torch.float)
    else:
        mapping = mapping.mapping
        mapping.eval()

    # Run stream
    while stream.has_more_samples():
        # Get the batch from the stream
        if stream.n_remaining_samples() >= batch_size:
            x_, y = stream.next_sample(batch_size)
        else:
            break

        x, seq_lens = x_
        # Put in the mapping to transform to the other embedding space
        if type(mapping) == torch.tensor:
            x = x.matmul(mapping.T).to(device)
        else:
            with torch.no_grad():
                x = mapping(x)

        y = torch.from_numpy(y).to(device)
        seq_lens = torch.tensor(seq_lens).to(device)

        # Get predictions and accuracy
        predictions, _ = model((x, seq_lens))
        metrics = get_metrics(
            labels=y, predictions=predictions, no_labels=stream.n_classes
        )
        accuracy = metrics["accuracy"]

        # Print if necessary
        running_acc += accuracy
        if i % print_every == print_every - 1:
            print("Accuracy: {}".format(running_acc / print_every))
            accuracies.append(running_acc / print_every)
            running_acc = 0.0

        i += 1

    return accuracies
예제 #21
0
def train_nb_wos_holdout(
    epochs=1,
    batch_size=utils.BATCH_SIZE,
    transform=True,
    transformer_model=TransformerModel.BERT,
    print_every=10,
    device="cpu",
):
    """ Trains the Naive Bayes model on the Web of Science dataset.

    Args:
        epochs (int): number of times the stream is run
        batch_size (int): the batch size
        transform (bool): transform the dataset or not
        transformer_model (TransformerModel): the transformer model to use
        print_every (int): print stats parameter
        device (string): the device to run the training on (cpu or gpu)

    """
    # Prepare the stream
    stream = WOSStream(transformer_model=transformer_model,
                       transform=transform,
                       device=device)
    stream.prepare_for_use()

    # Define model
    model = GaussianNB()
    model_name = "naive-bayes-wos-{}-ver-{}-holdout".format(
        transformer_model.name, stream.version)
    model_path = os.path.join(PATH, model_name)
    os.makedirs(model_path, exist_ok=True)
    all_labels = np.arange(stream.n_classes)

    print("Starting training...")
    train_accuracies, test_metrics_list = [], []

    for epoch in range(epochs):
        # Initialize the running loss and accuracy
        running_accuracy = 0.0
        # Start iterating over the dataset
        i = 0
        while stream.has_more_samples():
            # Get the batch from the stream
            if stream.n_remaining_samples() >= batch_size:
                x_, y = stream.next_sample(batch_size)
            else:
                break

            # Unpack x_ (we do not need the sequence lengths for NB)
            x = x_[0].numpy()
            # Take the maximum over the axis 1
            x = np.amax(x, axis=1)

            # Partial fit the model
            model.partial_fit(x, y, classes=all_labels)

            # Update running accuracy
            running_accuracy += accuracy_score(y, model.predict(x))

            # Print statistics
            if i % print_every == print_every - 1:
                # Evaluate the model on the test set
                x_test_, y_test = stream.get_test_set()
                x_test = x_test_[0].numpy()
                x_test = np.amax(x_test, axis=1)
                y_pred = model.predict(x_test)
                test_metrics = get_metrics(y_pred,
                                           y_test,
                                           no_labels=stream.n_classes)

                accuracy = running_accuracy / print_every
                # Print every 10 batches
                print("[{}/{} epochs, {}/{} batches] train accuracy: {:.4f}, "
                      "test (accuracy: {:.4f}, precision: {:.4f}, "
                      "recall: {:.4f}, f1: {:.4f})".format(
                          epoch + 1,
                          epochs,
                          i + 1,
                          stream.n_samples // batch_size + 1,
                          accuracy,
                          test_metrics["accuracy"],
                          test_metrics["precision"],
                          test_metrics["recall"],
                          test_metrics["macro_f1"],
                      ))
                train_accuracies.append(accuracy)
                test_metrics_list.append(test_metrics)
                running_accuracy = 0

            # Increment i
            i += 1

        stream.restart()

    # Save model
    print("Finished training. Saving model..")
    dump(model, os.path.join(model_path, "model.joblib"))
    print("Done!")

    return train_accuracies, test_metrics_list