Exemplo n.º 1
0
    def test_profile(self):
        profiler.start()
        with traceme.TraceMe('three_times_five'):
            three = constant_op.constant(3)
            five = constant_op.constant(5)
            product = three * five
        self.assertAllEqual(15, product)
        with self.assertRaises(profiler.ProfilerAlreadyRunningError):
            profiler.start()

        profile_result = profiler.stop()
        profile_pb = trace_events_pb2.Trace()
        profile_pb.ParseFromString(profile_result)
        devices = frozenset(device.name
                            for device in profile_pb.devices.values())
        self.assertIn('/host:CPU', devices)
        if not test_util.IsBuiltWithROCm() and config.list_physical_devices(
                'GPU'):
            # device tracing is not yet supported on the ROCm platform
            self.assertIn('/device:GPU:0', devices)
        events = frozenset(event.name for event in profile_pb.trace_events)
        self.assertIn('three_times_five', events)
        self.assertIn('Mul', events)
        with self.assertRaises(profiler.ProfilerNotRunningError):
            profiler.stop()
Exemplo n.º 2
0
 def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self):
   profiler.start()
   with context.graph_mode():
     model = make_keras_model(initializer="glorot_uniform")
     self._benchmark_keras_model_fit(model)
   result = profiler.stop()
   assert result is not None
Exemplo n.º 3
0
def trace_on(graph=True, profiler=False):  # pylint: disable=redefined-outer-name
    """Enables execution trace.

  Args:
    graph: whether to collect graphs used in execution
    profiler: whether to enable profiler.

  Returns:
    None
  """
    if not context.context().executing_eagerly():
        logging.warn("Must enable trace in eager mode.")
        return

    global _current_trace_context
    with _current_trace_context_lock:
        if _current_trace_context:
            logging.warn("Trace already enabled")
            return

        if graph and not profiler:
            context.context().enable_graph_collection()
        if profiler:
            context.context().enable_run_metadata()
            _profiler.start()

        _current_trace_context = _TraceContext(graph=graph, profiler=profiler)
Exemplo n.º 4
0
 def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self):
     profiler.start()
     with context.graph_mode():
         model = make_keras_model(initializer="glorot_uniform")
         self._benchmark_keras_model_fit(model)
     result = profiler.stop()
     assert result is not None
Exemplo n.º 5
0
 def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler(
         self):
     profiler.start()
     model = make_keras_model(initializer="glorot_uniform")
     self._benchmark_keras_model_fit(model, run_eagerly=True)
     result = profiler.stop()
     assert result is not None
Exemplo n.º 6
0
def main():
    profile = None
    if "--run-cProfile" in sys.argv:
        import cProfile
        import pstats
        profile_file = "cProfile.log"
        print("Running cProfile, writing output to '{}'".format(profile_file), file=sys.stderr)
        profile = cProfile.Profile()
        profile.enable()
    parser = create_argparser()
    if len(sys.argv) < 2:
        parser.error("Too few arguments, run '{} --help' for more information.".format(lidbox.__name__))
    # TODO when a subcommand is used incorrectly, get usage strings for its subparser  instead of the root parser
    args = parser.parse_args()
    tf_profiler = None
    if args.run_tf_profiler:
        from tensorflow.python.eager import profiler as tf_profiler
        tf_profile_file = os.path.abspath(os.path.join("tf_profile", str(int(time.time()))))
        print("Running TensorFlow profiler, writing output to '{}'".format(tf_profile_file), file=sys.stderr)
        tf_profiler.start()
    ret = 1
    try:
        # Initialize a Command object from the class specified in args.cmd_class and remove the class from args
        command = args.__dict__.pop("cmd_class")(args)
        ret = command.run()
    finally:
        if tf_profiler:
            tf_profiler_result = tf_profiler.stop()
            tf_profiler.save(tf_profile_file, tf_profiler_result)
        if profile:
            profile.disable()
            with open(profile_file, "w") as out_f:
                pstats.Stats(profile, stream=out_f).sort_stats("tottime").print_stats()
    if ret:
        sys.exit(ret)
Exemplo n.º 7
0
 def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler(
     self):
   profiler.start()
   model = make_keras_model(initializer="glorot_uniform")
   self._benchmark_keras_model_fit(model, run_eagerly=True)
   result = profiler.stop()
   assert result is not None
Exemplo n.º 8
0
def _start_profiler():
    from tensorflow.python.eager import profiler  # pylint: disable=g-import-not-at-top

    try:
        logging.info('Starting profiler')
        profiler.start()
    except Exception:  # pylint: disable=W0703
        logging.error('Profiler failed to start due to error:\n %s',
                      traceback.format_exc())
  def test_profile_exceptions(self):
    logdir = self.get_temp_dir()
    profiler.start(logdir)
    with self.assertRaises(errors.AlreadyExistsError):
      profiler.start(logdir)

    profiler.stop()
    with self.assertRaises(errors.UnavailableError):
      profiler.stop()
def _start_profiler():
    from tensorflow.python.eager import profiler  # pylint: disable=g-import-not-at-top

    try:
        profiler.start()
        logging.info('Started TensorFlow profiler')
    except Exception:  # pylint: disable=broad-except
        logging.error('TensorFlow profiler failed to start due to error:\n %s',
                      traceback.format_exc())
Exemplo n.º 11
0
 def test_profile(self):
   profiler.start()
   three = constant_op.constant(3)
   five = constant_op.constant(5)
   product = three * five
   self.assertAllEqual(15, product)
   profile_result = profiler.stop()
   profile_pb = trace_events_pb2.Trace()
   profile_pb.ParseFromString(profile_result)
   profile_pb_str = '%s' % profile_pb
   self.assertTrue('Mul' in profile_pb_str)
Exemplo n.º 12
0
 def test_profile(self):
     profiler.start()
     three = constant_op.constant(3)
     five = constant_op.constant(5)
     product = three * five
     self.assertAllEqual(15, product)
     profile_result = profiler.stop()
     profile_pb = trace_events_pb2.Trace()
     profile_pb.ParseFromString(profile_result)
     profile_pb_str = '%s' % profile_pb
     self.assertTrue('Mul' in profile_pb_str)
Exemplo n.º 13
0
    def test_save_profile(self):
        logdir = self.get_temp_dir()
        profiler.start(logdir)
        with traceme.TraceMe('three_times_five'):
            three = constant_op.constant(3)
            five = constant_op.constant(5)
            product = three * five
        self.assertAllEqual(15, product)

        profiler.stop()
        file_list = gfile.ListDirectory(logdir)
        self.assertEqual(len(file_list), 2)
        for file_name in gfile.ListDirectory(logdir):
            if gfile.IsDirectory(os.path.join(logdir, file_name)):
                self.assertEqual(file_name, 'plugins')
            else:
                self.assertTrue(file_name.endswith('.profile-empty'))
        profile_dir = os.path.join(logdir, 'plugins', 'profile')
        run = gfile.ListDirectory(profile_dir)[0]
        hostname = socket.gethostname()
        overview_page = os.path.join(profile_dir, run,
                                     hostname + '.overview_page.pb')
        self.assertTrue(gfile.Exists(overview_page))
        input_pipeline = os.path.join(profile_dir, run,
                                      hostname + '.input_pipeline.pb')
        self.assertTrue(gfile.Exists(input_pipeline))
        tensorflow_stats = os.path.join(profile_dir, run,
                                        hostname + '.tensorflow_stats.pb')
        self.assertTrue(gfile.Exists(tensorflow_stats))

        trace_file = os.path.join(profile_dir, run, hostname + '.trace')
        self.assertTrue(gfile.Exists(trace_file))
        with gfile.Open(trace_file, 'rb') as f:
            profile_pb = trace_events_pb2.Trace()
            profile_pb.ParseFromString(f.read())
        devices = frozenset(device.name
                            for device in profile_pb.devices.values())
        self.assertIn('/host:CPU', devices)
        if config.list_physical_devices('GPU'):
            self.assertIn('/device:GPU:0', devices)
        events = frozenset(event.name for event in profile_pb.trace_events)
        self.assertIn('three_times_five', events)
        self.assertIn('Mul:Mul', events)
Exemplo n.º 14
0
def trace_on(graph=True, profiler=False):  # pylint: disable=redefined-outer-name
    """Starts a trace to record computation graphs and profiling information.

  Must be invoked in eager mode.

  When enabled, TensorFlow runtime will collection information that can later be
  exported and consumed by TensorBoard. The trace is activated across the entire
  TensorFlow runtime and affects all threads of execution.

  To stop the trace and export the collected information, use
  `tf.summary.trace_export`. To stop the trace without exporting, use
  `tf.summary.trace_off`.

  Args:
    graph: If True, enables collection of executed graphs. It includes ones from
        tf.function invocation and ones from the legacy graph mode. The default
        is True.
    profiler: If True, enables the advanced profiler. Enabling profiler
        implicitly enables the graph collection. The profiler may incur a high
        memory overhead. The default is False.

  """
    if ops.inside_function():
        logging.warn("Cannot enable trace inside a tf.function.")
        return
    if not context.executing_eagerly():
        logging.warn("Must enable trace in eager mode.")
        return

    global _current_trace_context
    with _current_trace_context_lock:
        if _current_trace_context:
            logging.warn("Trace already enabled")
            return

        if graph and not profiler:
            context.context().enable_graph_collection()
        if profiler:
            context.context().enable_run_metadata()
            _profiler.start()

        _current_trace_context = _TraceContext(graph=graph, profiler=profiler)
Exemplo n.º 15
0
def trace_on(graph=True, profiler=False):  # pylint: disable=redefined-outer-name
  """Starts a trace to record computation graphs and profiling information.

  Must be invoked in eager mode.

  When enabled, TensorFlow runtime will collection information that can later be
  exported and consumed by TensorBoard. The trace is activated across the entire
  TensorFlow runtime and affects all threads of execution.

  To stop the trace and export the collected information, use
  `tf.summary.trace_export`. To stop the trace without exporting, use
  `tf.summary.trace_off`.

  Args:
    graph: If True, enables collection of executed graphs. It includes ones from
        tf.function invocation and ones from the legacy graph mode. The default
        is True.
    profiler: If True, enables the advanced profiler. Enabling profiler
        implicitly enables the graph collection. The profiler may incur a high
        memory overhead. The default is False.

  """
  if ops.inside_function():
    logging.warn("Cannot enable trace inside a tf.function.")
    return
  if not context.context().executing_eagerly():
    logging.warn("Must enable trace in eager mode.")
    return

  global _current_trace_context
  with _current_trace_context_lock:
    if _current_trace_context:
      logging.warn("Trace already enabled")
      return

    if graph and not profiler:
      context.context().enable_graph_collection()
    if profiler:
      context.context().enable_run_metadata()
      _profiler.start()

    _current_trace_context = _TraceContext(graph=graph, profiler=profiler)
Exemplo n.º 16
0
    def test_profile(self):
        profiler.start()
        three = constant_op.constant(3)
        five = constant_op.constant(5)
        product = three * five
        self.assertAllEqual(15, product)
        with self.assertRaises(profiler.ProfilerAlreadyRunningError):
            profiler.start()

        profile_result = profiler.stop()
        profile_pb = trace_events_pb2.Trace()
        profile_pb.ParseFromString(profile_result)
        devices = frozenset(device.name
                            for device in profile_pb.devices.values())
        self.assertIn('/host:CPU', devices)
        if config.list_physical_devices('GPU'):
            self.assertIn('/device:GPU:0', devices)
        events = frozenset(event.name for event in profile_pb.trace_events)
        self.assertIn('Mul:Mul', events)
        with self.assertRaises(profiler.ProfilerNotRunningError):
            profiler.stop()
Exemplo n.º 17
0
def custom_training(waveGlow, hparams, optimizer, checkpoint,
                    manager_checkpoint):
    step = tf.cast(checkpoint.step, tf.int64)

    for epoch in tf.range(1):
        tf.summary.text(name='epoch',
                        data='Start epoch {}'.format(epoch.numpy()) +\
                        'at ' + datetime.now().strftime("%Y%m%d-%H%M%S"),
                        step=step)

        for int_step, (step,
                       x_train) in zip(range(50),
                                       training_dataset.enumerate(start=step)):
            if int_step == 2:
                profiler.start()

            train_step(step=step,
                       x_train=x_train,
                       waveGlow=waveGlow,
                       hparams=hparams,
                       optimizer=optimizer)

            if tf.equal(step % hparams['save_model_every'], 0):
                save_path = manager_checkpoint.save()
                tf.summary.text(name='save_checkpoint',
                                data="Saved checkpoint in" + save_path,
                                step=step)

            if tf.equal(step % hparams['save_audio_every'], 0):
                utils.eval_step(eval_dataset=validation_dataset,
                                waveGlow=waveGlow,
                                hparams=hparams,
                                step=step)
            if int_step == 50:
                profiler_result = profiler.stop()
                profiler.save(hparams['log_dir'], profiler_result)
                break

            checkpoint.step.assign_add(1)
Exemplo n.º 18
0
    def on_batch_end(self, batch, logs=None):
        """Writes scalar summaries for metrics on every training batch.

    Performs profiling if current batch is in profiler_batches.
    """
        # Don't output batch_size and batch number as TensorBoard summaries
        logs = logs or {}
        self._samples_seen += logs.get('size', 1)
        samples_seen_since = self._samples_seen - self._samples_seen_at_last_write
        if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq:
            batch_logs = {('batch_' + k): v
                          for k, v in logs.items()
                          if k not in ['batch', 'size', 'num_steps']}
            self._write_custom_summaries(self._total_batches_seen, batch_logs)
            self._samples_seen_at_last_write = self._samples_seen
        self._total_batches_seen += 1
        if self._is_profiling:
            profiler.save(self.log_dir, profiler.stop())
            self._is_profiling = False
        elif (not self._is_profiling
              and self._total_batches_seen == self._profile_batch - 1):
            profiler.start()
            self._is_profiling = True
Exemplo n.º 19
0
  def on_batch_end(self, batch, logs=None):
    """Writes scalar summaries for metrics on every training batch.

    Performs profiling if current batch is in profiler_batches.
    """
    # Don't output batch_size and batch number as TensorBoard summaries
    logs = logs or {}
    self._samples_seen += logs.get('size', 1)
    samples_seen_since = self._samples_seen - self._samples_seen_at_last_write
    if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq:
      batch_logs = {('batch_' + k): v
                    for k, v in logs.items()
                    if k not in ['batch', 'size', 'num_steps']}
      self._write_custom_summaries(self._total_batches_seen, batch_logs)
      self._samples_seen_at_last_write = self._samples_seen
    self._total_batches_seen += 1
    if self._is_profiling:
      profiler.save(self.log_dir, profiler.stop())
      self._is_profiling = False
    elif (not self._is_profiling and
          self._total_batches_seen == self._profile_batch - 1):
      profiler.start()
      self._is_profiling = True
Exemplo n.º 20
0
def run_profile(net,
                train=False,
                n_steps=150,
                do_profile=True,
                reps=1,
                dtype="float32",
                **kwargs):
    """
    Run profiler on a benchmark network.

    Parameters
    ----------
    net : `~nengo.Network`
        The nengo Network to be profiled.
    train : bool
        If True, profile the `.Simulator.fit` function. Otherwise, profile the
        `.Simulator.run` function.
    n_steps : int
        The number of timesteps to run the simulation.
    do_profile : bool
        Whether or not to run profiling
    reps : int
        Repeat the run this many times (only profile data from the last
        run will be kept).
    dtype : str
        Simulation dtype (e.g. "float32")

    Returns
    -------
    exec_time : float
        Time (in seconds) taken to run the benchmark, taking the minimum over
        ``reps``.

    Notes
    -----
    kwargs will be passed on to `.Simulator`
    """

    exec_time = 1e10
    n_batches = 1

    with net:
        nengo_dl.configure_settings(inference_only=not train, dtype=dtype)

    with nengo_dl.Simulator(net, **kwargs) as sim:
        if hasattr(net, "inp"):
            x = {
                net.inp:
                np.random.randn(sim.minibatch_size * n_batches, n_steps,
                                net.inp.size_out)
            }
        elif hasattr(net, "inp_a"):
            x = {
                net.inp_a:
                np.random.randn(sim.minibatch_size * n_batches, n_steps,
                                net.inp_a.size_out),
                net.inp_b:
                np.random.randn(sim.minibatch_size * n_batches, n_steps,
                                net.inp_b.size_out),
            }
        else:
            x = None

        if train:
            y = {
                net.p:
                np.random.randn(sim.minibatch_size * n_batches, n_steps,
                                net.p.size_in)
            }

            sim.compile(tf.optimizers.SGD(0.001), loss=tf.losses.mse)

            # run once to eliminate startup overhead
            start = timeit.default_timer()
            sim.fit(x, y, epochs=1, n_steps=n_steps)
            print("Warmup time:", timeit.default_timer() - start)

            for _ in range(reps):
                if do_profile:
                    profiler.start()
                start = timeit.default_timer()
                sim.fit(x, y, epochs=1, n_steps=n_steps)
                exec_time = min(timeit.default_timer() - start, exec_time)
                if do_profile:
                    profiler.save("profile", profiler.stop())

        else:
            # run once to eliminate startup overhead
            start = timeit.default_timer()
            sim.predict(x, n_steps=n_steps)
            print("Warmup time:", timeit.default_timer() - start)

            for _ in range(reps):
                if do_profile:
                    profiler.start()
                start = timeit.default_timer()
                sim.predict(x, n_steps=n_steps)
                exec_time = min(timeit.default_timer() - start, exec_time)
                if do_profile:
                    profiler.save("profile", profiler.stop())

    exec_time /= n_batches

    print("Execution time:", exec_time)

    return exec_time
Exemplo n.º 21
0
def run(flags_obj):
  """Run ResNet ImageNet training and eval loop using native Keras APIs.

  Args:
    flags_obj: An object containing parsed flag values.

  Raises:
    ValueError: If fp16 is passed as it is not currently supported.

  Returns:
    Dictionary of training and eval stats.
  """
  # TODO(tobyboyd): Remove eager flag when tf 1.0 testing ends.
  # Eager is default in tf 2.0 and should not be toggled
  if keras_common.is_v2_0():
    keras_common.set_config_v2()
  else:
    config = keras_common.get_config_proto_v1()
    if flags_obj.enable_eager:
      tf.compat.v1.enable_eager_execution(config=config)
    else:
      sess = tf.Session(config=config)
      tf.keras.backend.set_session(sess)

  # Execute flag override logic for better model performance
  if flags_obj.tf_gpu_thread_mode:
    keras_common.set_gpu_thread_mode_and_count(flags_obj)

  dtype = flags_core.get_tf_dtype(flags_obj)
  if dtype == 'float16':
    policy = tf.keras.mixed_precision.experimental.Policy('infer_float32_vars')
    tf.keras.mixed_precision.experimental.set_policy(policy)

  data_format = flags_obj.data_format
  if data_format is None:
    data_format = ('channels_first'
                   if tf.test.is_built_with_cuda() else 'channels_last')
  tf.keras.backend.set_image_data_format(data_format)

  strategy = distribution_utils.get_distribution_strategy(
      distribution_strategy=flags_obj.distribution_strategy,
      num_gpus=flags_obj.num_gpus,
      num_workers=distribution_utils.configure_cluster())

  strategy_scope = keras_common.get_strategy_scope(strategy)

  # pylint: disable=protected-access
  if flags_obj.use_synthetic_data:
    distribution_utils.set_up_synthetic_data()
    input_fn = keras_common.get_synth_input_fn(
        height=imagenet_main.DEFAULT_IMAGE_SIZE,
        width=imagenet_main.DEFAULT_IMAGE_SIZE,
        num_channels=imagenet_main.NUM_CHANNELS,
        num_classes=imagenet_main.NUM_CLASSES,
        dtype=dtype)
  else:
    distribution_utils.undo_set_up_synthetic_data()
    input_fn = imagenet_main.input_fn

  train_input_dataset = input_fn(
      is_training=True,
      data_dir=flags_obj.data_dir,
      batch_size=flags_obj.batch_size,
      num_epochs=flags_obj.train_epochs,
      parse_record_fn=parse_record_keras,
      datasets_num_private_threads=flags_obj.datasets_num_private_threads,
      dtype=dtype)

  eval_input_dataset = None
  if not flags_obj.skip_eval:
    eval_input_dataset = input_fn(
        is_training=False,
        data_dir=flags_obj.data_dir,
        batch_size=flags_obj.batch_size,
        num_epochs=flags_obj.train_epochs,
        parse_record_fn=parse_record_keras,
        dtype=dtype)

  with strategy_scope:
    optimizer = keras_common.get_optimizer()
    if dtype == 'float16':
      # TODO(reedwm): Remove manually wrapping optimizer once mixed precision
      # can be enabled with a single line of code.
      optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
          optimizer, loss_scale=flags_core.get_loss_scale(flags_obj))

    if flags_obj.use_trivial_model:
      model = trivial_model.trivial_model(imagenet_main.NUM_CLASSES)
    else:
      model = resnet_model.resnet50(num_classes=imagenet_main.NUM_CLASSES,
                                    dtype=dtype)

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['sparse_categorical_accuracy'])

  time_callback, tensorboard_callback, lr_callback = keras_common.get_callbacks(
      learning_rate_schedule, imagenet_main.NUM_IMAGES['train'])

  train_steps = imagenet_main.NUM_IMAGES['train'] // flags_obj.batch_size
  train_epochs = flags_obj.train_epochs

  if flags_obj.train_steps:
    train_steps = min(flags_obj.train_steps, train_steps)
    train_epochs = 1

  num_eval_steps = (imagenet_main.NUM_IMAGES['validation'] //
                    flags_obj.batch_size)

  validation_data = eval_input_dataset
  if flags_obj.skip_eval:
    # Only build the training graph. This reduces memory usage introduced by
    # control flow ops in layers that have different implementations for
    # training and inference (e.g., batch norm).
    tf.keras.backend.set_learning_phase(1)
    num_eval_steps = None
    validation_data = None

  callbacks = [time_callback, lr_callback]
  if flags_obj.enable_tensorboard:
    callbacks.append(tensorboard_callback)
  if flags_obj.enable_e2e_xprof:
    profiler.start()

  history = model.fit(train_input_dataset,
                      epochs=train_epochs,
                      steps_per_epoch=train_steps,
                      callbacks=callbacks,
                      validation_steps=num_eval_steps,
                      validation_data=validation_data,
                      validation_freq=flags_obj.epochs_between_evals,
                      verbose=2)

  if flags_obj.enable_e2e_xprof:
    results = profiler.stop()
    profiler.save(flags_obj.model_dir, results)

  eval_output = None
  if not flags_obj.skip_eval:
    eval_output = model.evaluate(eval_input_dataset,
                                 steps=num_eval_steps,
                                 verbose=2)
  stats = keras_common.build_stats(history, eval_output, time_callback)
  return stats
Exemplo n.º 22
0
 def on_batch_begin(self, batch, logs=None):
   if batch == self.start_step:
     profiler.start()
     tf.compat.v1.logging.info('Profiler started at Step %s', self.start_step)
Exemplo n.º 23
0
 def on_batch_begin(self, batch, logs=None):
   if batch == self.start_step:
     profiler.start()
     tf.compat.v1.logging.info('Profiler started at Step %s', self.start_step)
Exemplo n.º 24
0
 def on_train_begin(self, logs=None):
   if self._profile_batch == 1:
     profiler.start()
     self._is_profiling = True
Exemplo n.º 25
0
 def on_train_begin(self, logs=None):
     if self._profile_batch == 1:
         profiler.start()
         self._is_profiling = True
Exemplo n.º 26
0
 def on_batch_begin(self, batch, logs=None):
     if batch == self.start_step_in_epoch and self.should_start:
         self.should_start = False
         profiler.start()
         tf.compat.v1.logging.info('Profiler started at Step %s',
                                   self.start_step)
Exemplo n.º 27
0
    value_grad = tf.random.normal(value.shape, dtype=value.dtype)
    v = tf.reduce_sum(value * value_grad)
    grad, = tf.gradients(v, feature)
    return grad, value_grad


@tf.function
def get_gradient_gt(feature, coordinate, value_grad):
    value = resampler.resampler(feature, coordinate)
    v = tf.reduce_sum(value * value_grad)
    grad, = tf.gradients(v, feature)

    return grad


mask, value = resample_image(feature, coordinate, tfnative=TFNATIVE)
gt = resampler.resampler(feature, coordinate)
grad, value_grad = get_gradient(feature, coordinate, TFNATIVE)
value_grad = tf.where(mask[..., None], value_grad, 0)
grad_gt = get_gradient_gt(feature, coordinate, value_grad)

profiler.start()

for i in range(3):
    # feature_grad, _ = get_gradient(feature, coordinate, tfnative=TFNATIVE)
    # print(feature_grad.shape)
    mask, value = resample_image(feature, coordinate, tfnative=TFNATIVE)

profiler_result = profiler.stop()
profiler.save('./profile.log', profiler_result)