Пример #1
0
def determine_loss_scales(
        snapshots: np.ndarray,
        hparams: tf.contrib.training.HParams) -> Tuple[np.ndarray, np.ndarray]:
    """Determine scale factors for the loss.

  When passed into model.compute_loss, predictions of all zero should result
  in a loss of 1.0 when averaged over the full dataset.

  Args:
    snapshots: np.ndarray with shape [examples, x] with high-resolution
      training data.
    hparams: hyperparameters to use for training.

  Returns:
    Tuple of two numpy arrays:
      error_scale: array with dimensions [2, derivative] indicating the
        scaling in the loss to use on squared error and relative squared error
        for each derivative target.
      error_floor: numpy array with scale for weighting of relative errors.
  """
    with tf.Graph().as_default():
        dataset = model.make_dataset(snapshots, hparams, repeat=False)
        data = load_dataset(dataset)

    baseline_error = (data['labels'] - data['baseline'])**2
    percentile = 100 * hparams.error_floor_quantile
    error_floor = np.maximum(
        np.percentile(baseline_error, percentile, axis=(0, 1)), 1e-12)

    # predict zero for all derivatives, and a constant value for the integrated
    # solution over time.
    equation_type = equations.equation_type_from_hparams(hparams)
    num_zero_predictions = len(equation_type.DERIVATIVE_ORDERS) + 1
    labels_shape = data['labels'].shape
    predictions = np.concatenate([
        np.zeros(labels_shape[:-1] + (num_zero_predictions, )),
        np.repeat(data['inputs'][..., np.newaxis],
                  labels_shape[-1] - num_zero_predictions,
                  axis=-1)
    ],
                                 axis=-1)

    components = np.stack(
        model.abs_and_rel_error(predictions=predictions,
                                labels=data['labels'],
                                baseline=data['baseline'],
                                error_floor=error_floor))
    baseline_error = np.mean(components, axis=(1, 2))
    logging.info('baseline_error: %s', baseline_error)

    error_scale = np.where(baseline_error > 0, 1.0 / baseline_error, 0)
    return error_floor, error_scale
Пример #2
0
def weighted_loss(normalized_loss_per_head: tf.Tensor,
                  hparams: tf.contrib.training.HParams) -> tf.Tensor:
    """Calculate overall training loss.

  Weights are normalized to sum to 1.0 (`relative_error+absolute_error` and
  `space_derivatives_weight+time_derivatives_weight+integrated_solution_weight`)
  before being used.

  Args:
    normalized_loss_per_head: tensor with dimensions [abs/rel error, channel].
    hparams: model hyperparameters.

  Returns:
    Scalar float32 Tensor indicating the loss.
  """
    # dimensions [abs/rel error]
    abs_rel_weights = tf.convert_to_tensor(
        [hparams.absolute_error_weight, hparams.relative_error_weight])
    abs_rel_weights /= tf.reduce_sum(abs_rel_weights)

    equation_type = equations.equation_type_from_hparams(hparams)

    num_space = len(equation_type.DERIVATIVE_ORDERS)
    num_integrated = normalized_loss_per_head.shape[-1].value - num_space - 1
    # dimensions [channel]
    weights_list = (
        [hparams.space_derivatives_weight / num_space] * num_space +
        [hparams.time_derivative_weight])
    if num_integrated:
        weights_list.extend(
            [hparams.integrated_solution_weight / num_integrated] *
            num_integrated)
    channel_weights = tf.convert_to_tensor(weights_list)
    channel_weights /= tf.reduce_sum(channel_weights)

    # dimensions [abs/rel error, channel]
    weights = abs_rel_weights[:, tf.newaxis] * channel_weights[tf.newaxis, :]
    return tf.reduce_sum(weights * normalized_loss_per_head)
Пример #3
0
def training_loop(snapshots: np.ndarray,
                  checkpoint_dir: str,
                  hparams: tf.contrib.training.HParams,
                  master: str = '') -> pd.DataFrame:
    """Run training.

  Args:
    snapshots: np.ndarray with shape [examples, x] with high-resolution
      training data.
    checkpoint_dir: directory to which to save model checkpoints.
    hparams: hyperparameters for training, as created by create_hparams().
    master: string master to use for MonitoredTrainingSession.

  Returns:
    pd.DataFrame with metrics for the full training run.
  """
    hparams = copy.deepcopy(hparams)
    set_data_dependent_hparams(hparams, snapshots)
    logging.info('Training with hyperparameters:\n%r', hparams)

    hparams_path = os.path.join(checkpoint_dir, 'hparams.pbtxt')
    with tf.gfile.GFile(hparams_path, 'w') as f:
        f.write(str(hparams.to_proto()))

    logging.info('Setting up training')
    _, train_step = setup_training(snapshots, hparams)
    train_inferer = Inferer(snapshots, hparams, training=True)
    test_inferer = Inferer(snapshots, hparams, training=False)

    global_step = tf.train.get_or_create_global_step()

    logging.info('Variables: %s', '\n'.join(map(str,
                                                tf.trainable_variables())))

    logged_metrics = []
    equation_type = equations.equation_type_from_hparams(hparams)

    with tf.train.MonitoredTrainingSession(
            master=master,
            checkpoint_dir=checkpoint_dir,
            save_checkpoint_secs=300,
            config=_disable_rewrite_config(),
            hooks=[SaveAtEnd(checkpoint_dir_to_path(checkpoint_dir))]) as sess:

        test_writer = tf.summary.FileWriter(os.path.join(
            checkpoint_dir, 'test'),
                                            sess.graph,
                                            flush_secs=60)
        train_writer = tf.summary.FileWriter(os.path.join(
            checkpoint_dir, 'train'),
                                             sess.graph,
                                             flush_secs=60)

        initial_step = sess.run(global_step)

        with test_writer, train_writer:
            for step in range(initial_step, hparams.learning_stops[-1]):
                sess.run(train_step)

                if (step + 1) % hparams.eval_interval == 0:
                    train_inference_data = train_inferer.run(sess)
                    test_inference_data = test_inferer.run(sess)

                    train_metrics = calculate_metrics(train_inference_data,
                                                      equation_type)
                    test_metrics = calculate_metrics(test_inference_data,
                                                     equation_type)
                    logged_metrics.append((step, test_metrics, train_metrics))

                    logging.info(metrics_one_linear(test_metrics))
                    save_summaries(test_metrics, test_writer, global_step=step)
                    save_summaries(train_metrics,
                                   train_writer,
                                   global_step=step)

    return metrics_to_dataframe(logged_metrics)
Пример #4
0
    def test_integrate_exact_baseline_and_model(self,
                                                warmup=0,
                                                conservative=False,
                                                resample_factor=4,
                                                exact_filter_interval=None,
                                                **hparam_values):
        hparams = training.create_hparams(learning_rates=[1e-3],
                                          learning_stops=[20],
                                          eval_interval=10,
                                          equation_kwargs=json.dumps(
                                              {'num_points': NUM_X_POINTS}),
                                          conservative=conservative,
                                          resample_factor=resample_factor,
                                          **hparam_values)
        self.train(hparams)

        results = integrate.integrate_exact_baseline_and_model(
            self.checkpoint_dir,
            random_seed=RANDOM_SEED,
            times=np.linspace(0, 1, num=11),
            warmup=warmup,
            exact_filter_interval=exact_filter_interval)

        self.assertIsInstance(results, xarray.Dataset)
        self.assertEqual(
            dict(results.dims), {
                'time': 11,
                'x_high': NUM_X_POINTS,
                'x_low': NUM_X_POINTS // resample_factor
            })
        self.assertEqual(results['y_exact'].dims, ('time', 'x_high'))
        self.assertEqual(results['y_baseline'].dims, ('time', 'x_low'))
        self.assertEqual(results['y_model'].dims, ('time', 'x_low'))

        with self.subTest('average should be zero'):
            y_exact_mean = results.y_exact.mean('x_high')
            xarray.testing.assert_allclose(y_exact_mean,
                                           xarray.zeros_like(y_exact_mean),
                                           atol=1e-3)

        with self.subTest('matching initial conditions'):
            if conservative:
                resample = duckarray.resample_mean
            else:
                resample = duckarray.subsample
            y_exact = resample(
                results.y_exact.isel(time=0).values, resample_factor)
            np.testing.assert_allclose(y_exact,
                                       results.y_baseline.isel(time=0).values)
            np.testing.assert_allclose(y_exact,
                                       results.y_model.isel(time=0).values)

        with self.subTest('matches integrate_baseline'):
            equation_type = equations.equation_type_from_hparams(hparams)
            assert equation_type.CONSERVATIVE == conservative
            equation = equation_type(NUM_X_POINTS // resample_factor,
                                     resample_factor=resample_factor,
                                     random_seed=RANDOM_SEED)
            results2 = integrate.integrate_baseline(equation,
                                                    times=np.linspace(0,
                                                                      1,
                                                                      num=11),
                                                    warmup=warmup)
            np.testing.assert_allclose(results['y_baseline'].data,
                                       results2['y'].data,
                                       atol=1e-5)