示例#1
0
    def __init__(self,
                 tf_dataset: tf.data.Dataset,
                 train_ratio: float,
                 validation_ratio: float,
                 batch_size: int = 300):

        self.article_length = len(list(tf_dataset.as_numpy_iterator())[0][0])
        self.theme_count = len(list(tf_dataset.as_numpy_iterator())[0][1])
        self.count = len(list(tf_dataset.as_numpy_iterator()))

        self.dataset = tf_dataset.batch(batch_size).repeat().shuffle(
            batch_size)

        self.trainSize = int(train_ratio * self.count)
        self.validationSize = int(validation_ratio * self.count)
        self.testSize = self.count - self.trainSize - self.validationSize

        self.trainData = self.dataset.take(self.trainSize).repeat()
        self.validationData = self.dataset.skip(self.trainSize).take(
            self.validationSize).repeat()
        self.testData = self.dataset.skip(self.testSize)

        self.train_batch_count = int(math.ceil(self.trainSize / batch_size))
        self.test_batch_count = int(math.ceil(self.testSize / batch_size))
        self.validation_batch_count = int(
            math.ceil(self.validationSize / batch_size))
示例#2
0
def compare_datasets_eager_mode(original_dataset: tf.data.Dataset,
                                dataset_from_stream: tf.data.Dataset) -> int:
    next_element_from_stream = dataset_from_stream.as_numpy_iterator()
    next_element_from_orig = original_dataset.as_numpy_iterator()
    data_samples = 0

    for orig_dict, from_stream_dict in zip(next_element_from_orig,
                                           next_element_from_stream):
        for orig_data, from_stream_data in zip(orig_dict, from_stream_dict):
            assert np.array_equal(orig_data, from_stream_data)
        data_samples += 1

    return data_samples
def make_images(
    dev_dl: tf.data.Dataset,
    model: nn.Module,
    args: argparse.Namespace,
) -> nn.Module:

    device = model_utils.get_device()

    print('  Running forward inference...')
    torch.set_grad_enabled(False)
    with tqdm(total=args.batch_size * len(dev_dl)) as progress_bar:
        for i, (x_batch_orig,
                y_batch) in enumerate(dev_dl.as_numpy_iterator()):
            x_batch, y_batch = model_utils.preprocess_test_example(
                x_batch_orig, y_batch)
            y_batch = y_batch.to(device)
            x_batch = x_batch.to(device)

            # Forward pass on model
            y_pred = model(x_batch).detach()

            model_utils.make_3_col_diagram(
                x_batch.cpu().numpy(),
                y_batch.cpu().numpy(),
                y_pred.cpu().numpy(),
                f'{args.save_dir}/{args.name}/{args.name}_{i}.png')

            progress_bar.update(len(x_batch))

            del x_batch
            del y_pred

    return model
def _predict(model: tf.Module, dataset: tf.data.Dataset) -> np.ndarray:
    predictions_batches = []
    for x, _y in dataset.as_numpy_iterator():
        prediction_batch = model.predict(x)  # shape (BATCH_SIZE, 1)
        predictions_batches.append(prediction_batch)
    predictions = np.concatenate(predictions_batches)
    return predictions
示例#5
0
def dump_chars_to_textfile(dataset: tf.data.Dataset,
                           data_keys: Tuple[str],
                           max_char: int = -1):
  """Write part of a TFDS sentence dataset to lines in a text file.

  Args:
    dataset: tf.dataset containing string-data.
    data_keys: what keys in dataset to dump from.
    max_char: max character to dump to text file.

  Returns:
    name of temp file with dataset bytes, exact number of characters dumped.
  """
  ds_iter = dataset.as_numpy_iterator()
  with tempfile.NamedTemporaryFile(delete=False) as outfp:
    char_count = 0
    while True:
      example = next(ds_iter, None)
      if example is None or (
          max_char > 0 and char_count > max_char):
        break
      for k in data_keys:
        line = example[k] + b"\n"
        char_count += len(line)
        outfp.write(line)
  return outfp.name
示例#6
0
def split_data_labels(dataset: tf.data.Dataset, num_samples: int):
    """
    Extract a given number of data samples and their respective labels from a tf.data.Dataset and convert to np.array

    :param dataset: Dataset that the samples are being extracted from
    :param num_samples: Total number of samples desired from dataset
    :return:
    """
    data = list()
    labels = list()

    count = 0
    for instance in dataset.as_numpy_iterator():
        if count == num_samples:
            break
        data.append(instance[0])
        labels.append(instance[1])
        count += 1

    data_array = np.asarray(data)
    labels_array = np.asarray(labels)

    data_nsamples, data_nx, data_ny = data_array.shape
    data_array = data_array.reshape((data_nsamples, data_nx*data_ny))

    return data_array, labels_array
def _dump_chars_to_textfile(
    dataset: tf.data.Dataset,
    maxchars: int = int(1e7),
    data_keys=('inputs', 'targets')
) -> Tuple[str, int]:
    """Write part of a TFDS sentence dataset to lines in a text file.

  Args:
    dataset: tf.dataset containing string-data.
    maxchars: int: approximate number of characters to save from dataset.
    data_keys: Tuple[str]: what keys in dataset to dump from.

  Returns:
    name of temp file with dataset bytes, exact number of characters dumped.
  """
    char_count = 0
    ds_iter = dataset.as_numpy_iterator()
    with tempfile.NamedTemporaryFile(delete=False,
                                     prefix='/tmp/ds_chars') as outfp:
        while char_count < maxchars:
            example = next(ds_iter)
            for k in data_keys:
                line = example[k] + b'\n'
                char_count += len(line)
                outfp.write(line)
    return outfp.name, char_count
示例#8
0
def read_tf_dataset_eager_mode(
        dataset: tf.data.Dataset) -> Generator[Tuple[Any, bool], None, None]:
    # TODO: If repeat() has been applied we will hit an infinite
    # loop here. Probably best approach is to include log message
    # specifying how many data items we have read and this should
    # alert the user if we are stuck in an infinite loop.
    for next_element in dataset.as_numpy_iterator():
        yield next_element
    def _calculate_frequencies_for_dataset(
            self, dataset: tf.data.Dataset) -> np.array:
        num_labels = len(self.x_vocab)
        frequencies = np.zeros(shape=(num_labels, ), dtype=np.int32)
        for (x, _) in tqdm(dataset.as_numpy_iterator(),
                           desc="Calculating x frequencies..."):
            frequencies = frequencies + self._calculate_frequencies(x)

        return frequencies
 def validate_model(self, class_labels: Union[Tuple[str], List[str]],
                    validation_set: tf.data.Dataset) -> None:
     validation_predicted_probability = self.architecture.model.predict(
         validation_set)[:, 1]  # ,0]
     validation_labels = np.array([])
     for batch in validation_set.as_numpy_iterator():
         validation_labels = np.concatenate((validation_labels, batch[1]))
     validation_labels = validation_labels.astype(np.int32)
     self.charts.update(self.history, self.curr_fold, validation_labels,
                        validation_predicted_probability, class_labels)
示例#11
0
def getTfSize(tf_dataset: tf.data.Dataset):
    """
    # Purpose:\n
        return sample size of a tf dataset.

    # Details:\n
        Since the function reads the entire dataset in and converts in to a list, 
            it could be very slow for big dataset (> 1 million).\n
    """
    tf_n = tf_dataset.as_numpy_iterator()
    tf_n = list(tf_n)
    return len(tf_n)
示例#12
0
 def get_label(max_k: tf.data.Dataset, labels: List[str]) -> str:
     # to bytes: https://stackoverflow.com/questions/6269765/what-does-the-b-character-do-in-front-of-a-string-literal
     cats: List[bytes] = list(map(lambda s: s.encode("UTF-8"), labels))
     values, labels = max_k.as_numpy_iterator().next()
     out = (np.zeros(len(cats)), np.array(cats))
     # effectively reducing and leveraging the input category list
     for i, v in enumerate(values):
         out[0][np.argwhere(
             out[1] == labels[i]
         )] += v  # weights decline with distance because they have been inverted
     # returning the label with the highest aggregate, distance discounted weight
     return out[1][np.argmax(out[0])]
    def _calculate_confusion_matrix_for_dataset(
            self, dataset: tf.data.Dataset) -> np.array:
        num_labels = len(self.y_vocab)
        confusion_matrix = np.zeros(shape=(num_labels, num_labels),
                                    dtype=np.int32)
        for (x, y_true) in tqdm(dataset.as_numpy_iterator(),
                                desc="Calculating confusion matrix..."):
            y_pred = self.model(x).numpy()  # shape: (batch_size, num_labels)
            y_pred = self._convert_to_int_vector(y_pred)
            confusion_matrix = confusion_matrix + self._calculate_confusion_matrix(
                y_true, y_pred)

        return confusion_matrix
def _eval_epoch(model: _FlaxPenguinModel, params: _Params,
                eval_data: tf.data.Dataset, steps_per_epoch: int):
    """Validate for a single epoch."""
    batch_metrics = []
    steps = 0
    for inputs, labels in eval_data.as_numpy_iterator():
        metrics = _eval_step(model, params, inputs, labels)
        batch_metrics.append(metrics)
        steps += 1
        if steps == steps_per_epoch:
            break

    # compute mean of metrics across each batch in epoch.
    return _mean_epoch_metrics(jax.device_get(batch_metrics))
def _train_epoch(model: _FlaxPenguinModel, optimizer: flax.optim.OptimizerDef,
                 train_data: tf.data.Dataset, steps_per_epoch: int):
    """Train for a single epoch."""
    batch_metrics = []
    steps = 0
    for inputs, labels in train_data.as_numpy_iterator():
        optimizer, metrics = _train_step(model, optimizer, inputs, labels)
        batch_metrics.append(metrics)
        steps += 1
        if steps == steps_per_epoch:
            break

    # compute mean of metrics across each batch in epoch.
    epoch_metrics_np = _mean_epoch_metrics(jax.device_get(batch_metrics))
    return optimizer, epoch_metrics_np
    def _calculate_prediction_output_for_dataset(
            self, dataset: tf.data.Dataset) -> np.array:
        all_prediction_dfs = []
        for (x, y) in tqdm(dataset.as_numpy_iterator(),
                           desc="Calculating prediction outputs..."):
            x_words = self._transform_to_words_x(x)
            y_words = self._transform_to_words_y(y)
            y_pred = self.model(x).numpy()
            predictions = self._transform_to_words_per_prediction(y_pred)
            all_prediction_dfs.append(
                pd.DataFrame({
                    "input": x_words,
                    "output": y_words,
                    "predictions": predictions,
                }))

        return pd.concat(all_prediction_dfs, ignore_index=True)
示例#17
0
    def predict_task_split(self,
                           model: transformers.PreTrainedModel,
                           inputs: tf.data.Dataset,
                           task: Task,
                           max_length: int = 140,
                           min_length: int = 55) -> typing.Sequence[typing.Sequence[int]]:

        try:
            outputs = []
            model.to(self.device)
            for batch_inputs in tqdm.tqdm(inputs.as_numpy_iterator(),
                                          desc="Predicting %s" % task,
                                          unit="batch", leave=False):
                with torch.no_grad():
                    model.eval()
                    forward_params = self.prepare_forward_inputs(model, batch_inputs)
                    batch_outputs = model.generate(forward_params['input_ids'],
                                                   attention_mask=forward_params['attention_mask'],
                                                   do_sample=False,
                                                   max_length=GENERATION_MAX_LENGTHS.get(task.dataset, max_length) + 2,
                                                   min_length=GENERATION_MIN_LENGTHS.get(task.dataset, min_length) + 1,
                                                   num_beams=4,
                                                   length_penalty=2.,
                                                   no_repeat_ngram_size=3,
                                                   early_stopping=True)

                    batch_outputs = batch_outputs.detach().cpu().numpy()
                    outputs.extend(batch_outputs)
            return outputs
        # We can't just except tf.errors.UnknownError, because it is thrown as some sort of weird proxy
        # instance of a tf.errors.UnknownError and python's pattern matching can't handle the scandal
        except Exception as e:
            if isinstance(e, tf.errors.UnknownError):
                logging.warning('Encountered error: %s on %s: %s', type(e), task, e)
                # Unfortunately, we don't get a more helpful error type, but this usually means
                # that the dataset has no labels for a given split (e.g., test evaluation occurs on a server)
                return []
            else:
                # We got a different exception type so let python freak out accordingly
                logging.error('Encountered error: %s on %s: %s', type(e), task, e)
                raise e
示例#18
0
    def __init__(self,
                 policy_network: snt.RNNCore,
                 critic_network: networks.CriticDeepRNN,
                 target_policy_network: snt.RNNCore,
                 target_critic_network: networks.CriticDeepRNN,
                 dataset: tf.data.Dataset,
                 accelerator_strategy: Optional[tf.distribute.Strategy] = None,
                 behavior_network: Optional[snt.Module] = None,
                 cwp_network: Optional[snt.Module] = None,
                 policy_optimizer: Optional[snt.Optimizer] = None,
                 critic_optimizer: Optional[snt.Optimizer] = None,
                 discount: float = 0.99,
                 target_update_period: int = 100,
                 num_action_samples_td_learning: int = 1,
                 num_action_samples_policy_weight: int = 4,
                 baseline_reduce_function: str = 'mean',
                 clipping: bool = True,
                 policy_improvement_modes: str = 'exp',
                 ratio_upper_bound: float = 20.,
                 beta: float = 1.0,
                 counter: Optional[counting.Counter] = None,
                 logger: Optional[loggers.Logger] = None,
                 checkpoint: bool = False):
        """Initializes the learner.

    Args:
      policy_network: the online (optimized) policy.
      critic_network: the online critic.
      target_policy_network: the target policy (which lags behind the online
        policy).
      target_critic_network: the target critic.
      dataset: dataset to learn from, whether fixed or from a replay buffer
        (see `acme.datasets.reverb.make_reverb_dataset` documentation).
      accelerator_strategy: the strategy used to distribute computation,
        whether on a single, or multiple, GPU or TPU; as supported by
        tf.distribute.
      behavior_network: The network to snapshot under `policy` name. If None,
        snapshots `policy_network` instead.
      cwp_network: CWP network to snapshot: samples actions
        from the policy and weighs them with the critic, then returns the action
        by sampling from the softmax distribution using critic values as logits.
        Used only for snapshotting, not training.
      policy_optimizer: the optimizer to be applied to the policy loss.
      critic_optimizer: the optimizer to be applied to the distributional
        Bellman loss.
      discount: discount to use for TD updates.
      target_update_period: number of learner steps to perform before updating
        the target networks.
      num_action_samples_td_learning: number of action samples to use to
        estimate expected value of the critic loss w.r.t. stochastic policy.
      num_action_samples_policy_weight: number of action samples to use to
        estimate the advantage function for the CRR weighting of the policy
        loss.
      baseline_reduce_function: one of 'mean', 'max', 'min'. Way of aggregating
        values from `num_action_samples` estimates of the value function.
      clipping: whether to clip gradients by global norm.
      policy_improvement_modes: one of 'exp', 'binary', 'all'. CRR mode which
        determines how the advantage function is processed before being
        multiplied by the policy loss.
      ratio_upper_bound: if policy_improvement_modes is 'exp', determines
        the upper bound of the weight (i.e. the weight is
          min(exp(advantage / beta), upper_bound)
        ).
      beta: if policy_improvement_modes is 'exp', determines the beta (see
        above).
      counter: counter object used to keep track of steps.
      logger: logger object to be used by learner.
      checkpoint: boolean indicating whether to checkpoint the learner.
    """

        if accelerator_strategy is None:
            accelerator_strategy = snt.distribute.Replicator()
        self._accelerator_strategy = accelerator_strategy
        self._policy_improvement_modes = policy_improvement_modes
        self._ratio_upper_bound = ratio_upper_bound
        self._num_action_samples_td_learning = num_action_samples_td_learning
        self._num_action_samples_policy_weight = num_action_samples_policy_weight
        self._baseline_reduce_function = baseline_reduce_function
        self._beta = beta

        # When running on TPUs we have to know the amount of memory required (and
        # thus the sequence length) at the graph compilation stage. At the moment,
        # the only way to get it is to sample from the dataset, since the dataset
        # does not have any metadata, see b/160672927 to track this upcoming
        # feature.
        sample = next(dataset.as_numpy_iterator())
        self._sequence_length = sample.action.shape[1]

        self._counter = counter or counting.Counter()
        self._logger = logger or loggers.TerminalLogger('learner',
                                                        time_delta=1.)
        self._discount = discount
        self._clipping = clipping

        self._target_update_period = target_update_period

        with self._accelerator_strategy.scope():
            # Necessary to track when to update target networks.
            self._num_steps = tf.Variable(0, dtype=tf.int32)

            # (Maybe) distributing the dataset across multiple accelerators.
            distributed_dataset = self._accelerator_strategy.experimental_distribute_dataset(
                dataset)
            self._iterator = iter(distributed_dataset)

            # Create the optimizers.
            self._critic_optimizer = critic_optimizer or snt.optimizers.Adam(
                1e-4)
            self._policy_optimizer = policy_optimizer or snt.optimizers.Adam(
                1e-4)

        # Store online and target networks.
        self._policy_network = policy_network
        self._critic_network = critic_network
        self._target_policy_network = target_policy_network
        self._target_critic_network = target_critic_network

        # Expose the variables.
        self._variables = {
            'critic': self._target_critic_network.variables,
            'policy': self._target_policy_network.variables,
        }

        # Create a checkpointer object.
        self._checkpointer = None
        self._snapshotter = None
        if checkpoint:
            self._checkpointer = tf2_savers.Checkpointer(
                objects_to_save={
                    'counter': self._counter,
                    'policy': self._policy_network,
                    'critic': self._critic_network,
                    'target_policy': self._target_policy_network,
                    'target_critic': self._target_critic_network,
                    'policy_optimizer': self._policy_optimizer,
                    'critic_optimizer': self._critic_optimizer,
                    'num_steps': self._num_steps,
                },
                time_delta_minutes=30.)

            raw_policy = snt.DeepRNN(
                [policy_network,
                 networks.StochasticSamplingHead()])
            critic_mean = networks.CriticDeepRNN(
                [critic_network, networks.StochasticMeanHead()])
            objects_to_save = {
                'raw_policy': raw_policy,
                'critic': critic_mean,
            }
            if behavior_network is not None:
                objects_to_save['policy'] = behavior_network
            if cwp_network is not None:
                objects_to_save['cwp_policy'] = cwp_network
            self._snapshotter = tf2_savers.Snapshotter(
                objects_to_save=objects_to_save, time_delta_minutes=30)
        # Timestamp to keep track of the wall time.
        self._walltime_timestamp = time.time()
示例#19
0
def train_model(
    train_ds: tf.data.Dataset,
    dev_ds: tf.data.Dataset,
    model: nn.Module,
    optimizer: optim.Optimizer,
    lr_scheduler: optim.lr_scheduler._LRScheduler,
    args: argparse.Namespace,
) -> nn.Module:

    device = model_utils.get_device()
    loss_fn = model_utils.depth_proportional_loss
    val_loss_fn = model_utils.l1_norm_loss
    best_val_loss = torch.tensor(float('inf'))
    saved_checkpoints = []
    writer = SummaryWriter(log_dir=f'{args.log_dir}/{args.experiment}')

    cos = nn.CosineSimilarity(dim=1, eps=0)
    get_gradient: nn.Module = sobel.Sobel().to(device)

    for e in range(1, args.train_epochs + 1):
        print(f'Training epoch {e}...')

        if args.use_scheduler:
            lr_scheduler.step()

        # Training portion
        torch.cuda.empty_cache()
        torch.set_grad_enabled(True)
        with tqdm(total=args.train_batch_size * len(train_ds)) as progress_bar:
            model.train()
            for i, (x_batch_orig,
                    y_batch) in enumerate(train_ds.as_numpy_iterator()):
                x_batch, y_batch = model_utils.preprocess_training_example(
                    x_batch_orig, y_batch)
                y_blurred = model_utils.blur_depth_map(y_batch)

                ones = torch.ones(y_batch.shape,
                                  dtype=torch.float32,
                                  device=device)

                # Forward pass on model
                optimizer.zero_grad()
                y_pred = model(x_batch)

                depth_grad = get_gradient(y_blurred)
                output_grad = get_gradient(y_pred)
                depth_grad_dx = depth_grad[:, 0, :, :].contiguous().view_as(
                    y_blurred)
                depth_grad_dy = depth_grad[:, 1, :, :].contiguous().view_as(
                    y_batch)
                output_grad_dx = output_grad[:, 0, :, :].contiguous().view_as(
                    y_blurred)
                output_grad_dy = output_grad[:, 1, :, :].contiguous().view_as(
                    y_batch)

                depth_normal = torch.cat(
                    (-depth_grad_dx, -depth_grad_dy, ones), 1)
                output_normal = torch.cat(
                    (-output_grad_dx, -output_grad_dy, ones), 1)

                loss_depth = torch.log(torch.abs(y_pred - y_batch) +
                                       0.5).mean()
                loss_dx = torch.log(
                    torch.abs(output_grad_dx - depth_grad_dx) + 0.5).mean()
                loss_dy = torch.log(
                    torch.abs(output_grad_dy - depth_grad_dy) + 0.5).mean()
                loss_normal = torch.abs(
                    1 - cos(output_normal, depth_normal)).mean()

                loss = loss_depth + loss_normal + (loss_dx + loss_dy)

                # Backward pass and optimization
                loss.backward()
                optimizer.step()

                progress_bar.update(len(x_batch))
                progress_bar.set_postfix(loss=loss.item())
                writer.add_scalar("train/Loss", loss,
                                  ((e - 1) * len(train_ds) + i) *
                                  args.train_batch_size)

                # Periodically save a diagram
                if (i + 1) % args.picture_frequency == 0:
                    model_utils.make_diagram(
                        np.transpose(x_batch_orig, (0, 3, 1, 2)),
                        x_batch.cpu().numpy(),
                        y_batch.cpu().numpy(),
                        y_pred.cpu().detach().numpy(),
                        f'{args.save_path}/{args.experiment}/diagram_{e}_{i+1}.png',
                    )

                del x_batch
                del y_batch
                del y_blurred
                del y_pred
                del loss

        # Validation portion
        torch.cuda.empty_cache()
        torch.set_grad_enabled(False)

        with tqdm(total=args.dev_batch_size * len(dev_ds)) as progress_bar:
            model.eval()
            val_loss = 0.0
            num_batches_processed = 0
            total_pixels = 0
            total_examples = 0
            squared_error = 0
            rel_error = 0
            log_error = 0
            threshold1 = 0  # 1.25
            threshold2 = 0  # 1.25^2
            threshold3 = 0  # corresponds to 1.25^3

            for i, (x_batch, y_batch) in enumerate(dev_ds.as_numpy_iterator()):
                x_batch, y_batch = model_utils.preprocess_test_example(
                    x_batch, y_batch)
                # Forward pass on model in validation environment
                y_pred = model(x_batch)

                # TODO: Process y_pred in whatever way inference requires.
                loss = val_loss_fn(y_pred, y_batch)
                val_loss += loss.item()
                num_batches_processed += 1

                nanmask = getNanMask(y_batch)
                total_pixels = torch.sum(~nanmask)
                total_examples += x_batch.shape[0]

                # RMS, REL, LOG10, threshold calculation
                squared_error += (
                    torch.sum(torch.pow(y_pred - y_batch, 2)).item() /
                    total_pixels)**0.5
                rel_error += torch.sum(
                    removeNans(torch.abs(y_pred - y_batch) /
                               y_batch)).item() / total_pixels
                log_error += torch.sum(
                    torch.abs(
                        removeNans(torch.log10(y_pred)) - removeNans(
                            torch.log10(y_batch)))).item() / total_pixels
                threshold1 += torch.sum(
                    torch.max(y_pred / y_batch, y_batch /
                              y_pred) < 1.25).item() / total_pixels
                threshold2 += torch.sum(
                    torch.max(y_pred / y_batch, y_batch /
                              y_pred) < 1.25**2).item() / total_pixels
                threshold3 += torch.sum(
                    torch.max(y_pred / y_batch, y_batch /
                              y_pred) < 1.25**3).item() / total_pixels

                progress_bar.update(len(x_batch))
                progress_bar.set_postfix(val_loss=val_loss /
                                         num_batches_processed)
                writer.add_scalar("Val/Loss", loss,
                                  ((e - 1) * len(dev_ds) + i) *
                                  args.dev_batch_size)

                del x_batch
                del y_batch
                del y_pred
                del loss

            writer.add_scalar("Val/RMS", squared_error / total_examples, e)
            writer.add_scalar("Val/REL", rel_error / total_examples, e)
            writer.add_scalar("Val/LOG10", log_error / total_examples, e)
            writer.add_scalar("Val/delta1", threshold1 / total_examples, e)
            writer.add_scalar("Val/delta2", threshold2 / total_examples, e)
            writer.add_scalar("Val/delta3", threshold3 / total_examples, e)

            # Save model if it's the best one yet.
            if val_loss / num_batches_processed < best_val_loss:
                best_val_loss = val_loss / num_batches_processed
                filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_best_val.checkpoint'
                model_utils.save_model(model, filename)
                print(f'Model saved!')
                print(f'Best validation loss yet: {best_val_loss}')
            # Save model on checkpoints.
            if e % args.checkpoint_freq == 0:
                filename = f'{args.save_path}/{args.experiment}/{model.__class__.__name__}_epoch_{e}.checkpoint'
                model_utils.save_model(model, filename)
                print(f'Model checkpoint reached!')
                saved_checkpoints.append(filename)
                # Delete checkpoints if there are too many
                while len(saved_checkpoints) > args.num_checkpoints:
                    os.remove(saved_checkpoints.pop(0))

    return model
示例#20
0
 def accuracy(fit_data: tf.data.Dataset) -> float:
     test_vals, test_labs, pred_labs = fit_data.as_numpy_iterator().next()
     compare: np.array = np.array(
         [test_labs[i] == pred_labs[i] for i in range(len(pred_labs))])
     return compare.sum() / len(compare)
示例#21
0
def evaluate(user_model: tf.keras.Model,
             movie_model: tf.keras.Model,
             test: tf.data.Dataset,
             movies: tf.data.Dataset,
             train: Optional[tf.data.Dataset] = None,
             k: int = 10) -> Dict[Text, float]:
    """Evaluates a Movielens model on the supplied datasets.

  Args:
    user_model: User representation model.
    movie_model: Movie representation model.
    test: Test dataset.
    movies: Dataset of movies.
    train: Training dataset. If supplied, recommendations for training watches
      will be removed.
    k: The cutoff value at which to compute precision and recall.

  Returns:
   Dictionary of metrics.
  """

    movie_ids = np.concatenate(
        list(
            movies.batch(1000).map(
                lambda x: x["movie_id"]).as_numpy_iterator()))

    movie_vocabulary = dict(zip(movie_ids.tolist(), range(len(movie_ids))))

    train_user_to_movies = collections.defaultdict(lambda: array.array("i"))
    test_user_to_movies = collections.defaultdict(lambda: array.array("i"))

    if train is not None:
        for row in train.as_numpy_iterator():
            user_id = row["user_id"]
            movie_id = movie_vocabulary[row["movie_id"]]
            train_user_to_movies[user_id].append(movie_id)

    for row in test.as_numpy_iterator():
        user_id = row["user_id"]
        movie_id = movie_vocabulary[row["movie_id"]]
        test_user_to_movies[user_id].append(movie_id)

    movie_embeddings = np.concatenate(
        list(
            movies.batch(4096).map(lambda x: movie_model(
                {"movie_id": x["movie_id"]})).as_numpy_iterator()))

    precision_values = []
    recall_values = []

    for (user_id, test_movies) in test_user_to_movies.items():
        user_embedding = user_model({"user_id": np.array([user_id])}).numpy()
        scores = (user_embedding @ movie_embeddings.T).flatten()

        test_movies = np.frombuffer(test_movies, dtype=np.int32)

        if train is not None:
            train_movies = np.frombuffer(train_user_to_movies[user_id],
                                         dtype=np.int32)
            scores[train_movies] = -1e6

        top_movies = np.argsort(-scores)[:k]
        num_test_movies_in_k = sum(x in top_movies for x in test_movies)
        precision_values.append(num_test_movies_in_k / k)
        recall_values.append(num_test_movies_in_k / len(test_movies))

    return {
        "precision_at_k": np.mean(precision_values),
        "recall_at_k": np.mean(recall_values)
    }
示例#22
0
def test_model(
    dev_dl: tf.data.Dataset,
    model: nn.Module,
    args: argparse.Namespace,
) -> nn.Module:

    device = model_utils.get_device()

    print('\Computing evaluation metrics...')
    total_pixels = 0
    total_examples = 0
    squared_error = 0
    rel_error = 0
    log_error = 0
    threshold1 = 0  # 1.25
    threshold2 = 0  # 1.25^2
    threshold3 = 0  # corresponds to 1.25^3
    eps = 0.5

    print('  Running forward inference...')
    torch.set_grad_enabled(False)
    with tqdm(total=args.batch_size * len(dev_dl)) as progress_bar:
        for i, (x_batch_orig,
                y_batch) in enumerate(dev_dl.as_numpy_iterator()):
            x_batch, y_batch = model_utils.preprocess_test_example(
                x_batch_orig, y_batch)

            # Forward pass on model
            y_pred = model(x_batch)

            # TODO: Process y_pred in the optimal way (round it off, etc)
            # Maybe clamp from 0 to infty or something
            nanmask = getNanMask(y_batch)
            total_pixels = torch.sum(~nanmask)
            total_examples += x_batch.shape[0]

            # RMS, REL, LOG10, threshold calculation
            squared_error += (
                torch.sum(torch.pow(y_pred - y_batch, 2)).item() /
                total_pixels)**0.5
            rel_error += torch.sum(
                torch.abs(y_pred - y_batch) / y_batch).item() / total_pixels
            log_error += torch.sum(
                torch.abs(
                    removeNans(torch.log10(y_pred)) -
                    removeNans(torch.log10(y_batch)))).item() / total_pixels
            threshold1 += torch.sum(
                torch.max(y_pred / y_batch, y_batch /
                          y_pred) < 1.25).item() / total_pixels
            threshold2 += torch.sum(
                torch.max(y_pred / y_batch, y_batch /
                          y_pred) < 1.25**2).item() / total_pixels
            threshold3 += torch.sum(
                torch.max(y_pred / y_batch, y_batch /
                          y_pred) < 1.25**3).item() / total_pixels
            # total_pixels += np.prod(y_batch.shape)

            if i < args.num_images:
                model_utils.make_3_col_diagram(
                    x_batch.cpu().numpy(),
                    y_batch.cpu().numpy(),
                    y_pred.cpu().numpy(),
                    f'{args.save_dir}/{args.name}/{args.name}_{i}.png',
                )

            progress_bar.update(len(x_batch))

            del x_batch
            del y_pred
            del y_batch

    print('\n  Calculating overall metrics...')
    print()
    output_str = ''
    output_str += '*' * 30 + '\n'
    output_str += f'RMS:   {squared_error / total_examples}\n'
    output_str += f'REL:   {rel_error / total_examples}\n'
    output_str += f'LOG10: {log_error / total_examples}\n'
    output_str += f'delta1:{threshold1 / total_examples}\n'
    output_str += f'delta2:{threshold2 / total_examples}\n'
    output_str += f'delta3:{threshold3 / total_examples}\n'
    output_str += '*' * 30
    print(output_str)

    if args.load_dir is not None:
        with open(f'{args.load_dir}/test_results.txt', 'w') as f:
            f.write(output_str)

    return model
示例#23
0
def denoise_dataset(ds: tf.data.Dataset, model_fn="ae_denoise_model"):
    ds = list(ds.as_numpy_iterator())
    model = tf.keras.models.load_model(model_fn)
    cleared = denoise_signal(ds, model=model)
    return tf.data.Dataset.from_tensor_slices(cleared)