Пример #1
0
def to_bfloat16_unbiased(x, quantization_noise=None):
  """Convert a float32 to a bfloat16 using randomized roundoff.

  The current implementation uses quantization_noise_from_step_num to generate
  quantization_noise, which requires global_step, and is not deterministic.
  To use it for inference, it might be feasible to replace the noise generation
  function with a constant, e.g., 0.5.

  Args:
    x: A float32 Tensor.
    quantization_noise: A float, specifying the quantization noise.

  Returns:
    A bfloat16 Tensor, with the same shape as x.
  """
  if quantization_noise is None:
    quantization_noise = quantization_noise_from_step_num()
  x_sign = tf.sign(x)
  # Make sure x is positive.  If it is zero, the two candidates are identical.
  x = x * x_sign + 1e-30
  cand1 = tf.to_bfloat16(x)
  cand1_f = tf.to_float(cand1)
  # This relies on the fact that for a positive bfloat16 b,
  # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the
  # next lower one. Both 1.005 and 0.995 are ballpark estimation.
  cand2 = tf.to_bfloat16(
      tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995))
  ret = randomized_roundoff_to_bfloat16(x, quantization_noise, cand1, cand2)
  return ret * tf.to_bfloat16(x_sign)
Пример #2
0
    def alltoall(self, x, mesh_axis, split_axis, concat_axis):
        """Grouped alltoall (like MPI alltoall with splitting and concatenation).

    Args:
      x: a LaidOutTensor
      mesh_axis: an integer the mesh axis along which to group
      split_axis: an integer (the Tensor axis along which to split)
      concat_axis: an integer (the Tensor axis along which to concatenate)
    Returns:
      a LaidOutTensor
    """
        x = x.to_laid_out_tensor()
        t = x.one_slice
        group_assignment = self._create_group_assignment([mesh_axis])
        dtype = t.dtype
        if dtype == tf.float32:
            # There seems to be a bug with float32 alltoall.
            # Do it in bfloat16 until the bug is fixed.
            # TODO(noam): file a bug
            t = tf.to_bfloat16(t)
        t = tpu_ops.all_to_all(t,
                               concat_dimension=concat_axis,
                               split_dimension=split_axis,
                               split_count=len(group_assignment[0]),
                               group_assignment=group_assignment)
        t = tf.cast(t, dtype)
        x = self.LaidOutTensor([t])
        return x
Пример #3
0
        def _step(source_replica, target_replica, x_split, op="plus_eq"):
            """Helper function - one step of summing or copying.

            If op == "plus_eq", then adds source_replica into target_replica
            If op == "copy", then copies source_replica onto target_replica

            These operations happen for all shards.  The replica numbers are offset
            by the shard numbers to keep all physical links busy.

            Args:
              source_replica: an integer
              target_replica: an integer
              x_split: a list of lists of tensors
              op: a string
            """
            for shard in range(parallelism.n):
                source_device = (shard + source_replica) % parallelism.n
                target_device = (shard + target_replica) % parallelism.n
                source = x_split[source_device][shard]
                if use_bfloat16:
                    with tf.device(parallelism.devices[source_device]):
                        source = tf.to_bfloat16(source)
                with tf.device(parallelism.devices[target_device]):
                    source = tf.to_float(source)
                    if op == "plus_eq":
                        x_split[target_device][shard] += source
                    else:
                        assert op == "copy"
                        x_split[target_device][shard] = tf.identity(source)
def _to_bfloat16_unbiased(x, noise):
    """Convert a float32 to a bfloat16 using randomized roundoff.
    Args:
      x: A float32 Tensor.
      noise: a float32 Tensor with values in [0, 1), broadcastable to tf.shape(x)
    Returns:
      A float32 Tensor.
    """
    x_sign = tf.sign(x)
    # Make sure x is positive.  If it is zero, the two candidates are identical.
    x = x * x_sign + 1e-30
    cand1 = tf.to_bfloat16(x)
    cand1_f = tf.to_float(cand1)
    # This relies on the fact that for a positive bfloat16 b,
    # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the
    # next lower one. Both 1.005 and 0.995 are ballpark estimation.
    cand2 = tf.to_bfloat16(tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995))
    ret = _randomized_roundoff_to_bfloat16(x, noise, cand1, cand2)
    return ret * tf.to_bfloat16(x_sign)
Пример #5
0
def _to_bfloat16_unbiased(x, noise):
  """Convert a float32 to a bfloat16 using randomized roundoff.

  Args:
    x: A float32 Tensor.
    noise: a float32 Tensor with values in [0, 1), broadcastable to tf.shape(x)
  Returns:
    A float32 Tensor.
  """
  x_sign = tf.sign(x)
  # Make sure x is positive.  If it is zero, the two candidates are identical.
  x = x * x_sign + 1e-30
  cand1 = tf.to_bfloat16(x)
  cand1_f = tf.to_float(cand1)
  # This relies on the fact that for a positive bfloat16 b,
  # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the
  # next lower one. Both 1.005 and 0.995 are ballpark estimation.
  cand2 = tf.to_bfloat16(
      tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995))
  ret = _randomized_roundoff_to_bfloat16(x, noise, cand1, cand2)
  return ret * tf.to_bfloat16(x_sign)
Пример #6
0
def _to_bfloat16_unbiased(x):
  """Convert a float32 to a bfloat16 using randomized roundoff.

  Note: If this ever produces worse results than using float32 all the way
  through, we should try to diagnose and fix it.  There are several things
  to try:

  1. Encode parameter x for storage purposes as
     _to_bfloat16_unbiased(tf.pow(x, 5)) .  This gives 5x the
     resolution while incurring overflow and underflow at 10^9 and 10^-9
     instead of 10^37 and 10^-37.  Comes at a cost of extracting fifth roots
     to decode parameters.  Or use some other such scheme.

  2. In this function, use actual random numbers, different for each parameter
     as opposed to the same for every parameter in the graph.

  3. Look for bugs in this function.

  Args:
    x: A float32 Tensor.
  Returns:
    A float32 Tensor.
  """
  # Not using random_uniform here due to a problem on TPU in that random seeds
  # are not respected, which may cause the parameters on different replicas
  # to go out-of-sync.
  quantization_noise = _quantization_noise_from_step_num()
  x_sign = tf.sign(x)
  # Make sure x is positive.  If it is zero, the two candidates are identical.
  x = x * x_sign + 1e-30
  cand1 = tf.to_bfloat16(x)
  cand1_f = tf.to_float(cand1)
  # This relies on the fact that for a positive bfloat16 b,
  # b * 1.005 gives you the next higher bfloat16 and b*0.995 gives you the
  # next lower one. Both 1.005 and 0.995 are ballpark estimation.
  cand2 = tf.to_bfloat16(
      tf.where(tf.greater(x, cand1_f), cand1_f * 1.005, cand1_f * 0.995))
  ret = _randomized_roundoff_to_bfloat16(x, quantization_noise, cand1, cand2)
  return ret * tf.to_bfloat16(x_sign)
Пример #7
0
def _blocked_and_dtype_transformations(tensor):
  """Yields variants of a tensor, for standard blocking/dtype variants.

  Args:
    tensor (tf.Tensor): Input tensor.

  Yields:
    (modified_tensor, suffix) pairs, where `modified_tensor` is a transformed
    version of the input, and `suffix` is a string like "/blocked32".
  """
  for blocking_level in (32, 48):
    blocked = make_padded_blocked_matrix(tensor, blocking_level)
    bfloat16_blocked = tf.to_bfloat16(bfloat16_permutation(blocked))
    yield blocked, '/blocked{}'.format(blocking_level)
    yield bfloat16_blocked, '/blocked{}/bfloat16'.format(blocking_level)
Пример #8
0
def _blocked_and_dtype_transformations(tensor):
    """Yields variants of a tensor, for standard blocking/dtype variants.

  Args:
    tensor (tf.Tensor): Input tensor.

  Yields:
    (modified_tensor, suffix) pairs, where `modified_tensor` is a transformed
    version of the input, and `suffix` is a string like "/blocked32".
  """
    for blocking_level in (32, 48):
        blocked = make_padded_blocked_matrix(tensor, blocking_level)
        bfloat16_blocked = tf.to_bfloat16(bfloat16_permutation(blocked))
        yield blocked, '/blocked{}'.format(blocking_level)
        yield bfloat16_blocked, '/blocked{}/bfloat16'.format(blocking_level)
Пример #9
0
def train_model(train=None,
                test=None,
                row_id=None,
                gpu_device='/gpu:0',
                cpu_device='/cpu:0',
                z=18,
                version='3d',
                build_model=None,
                experiment_params=None,
                tf_records=False,
                checkpoint=None,
                weight_loss=True,
                overwrite_training_params=False,
                force_jk=False,
                use_bfloat16=False,
                wd=False,
                use_lms=False):
    """Run an experiment with hGRUs."""
    # Set up tensors
    (config, exp_label, prediction_dir, checkpoint_dir, summary_dir,
     test_data_meta, test_dataset_module, train_dataset_module,
     train_data_meta) = configure_model(train=train,
                                        test=test,
                                        row_id=row_id,
                                        gpu_device=gpu_device,
                                        z=z,
                                        version=version,
                                        build_model=build_model,
                                        experiment_params=experiment_params,
                                        evaluate=False)
    if overwrite_training_params:
        config = tf_fun.update_config(overwrite_training_params, config)
    config.ds_name = {'train': train, 'test': test}

    (test_images, test_labels, train_images,
     train_labels) = prepare_data(config=config,
                                  tf_records=tf_records,
                                  device=cpu_device,
                                  test_dataset_module=test_dataset_module,
                                  train_dataset_module=train_dataset_module,
                                  force_jk=force_jk,
                                  evaluate=False)
    if use_bfloat16:
        train_images = tf.to_bfloat16(train_images)
        test_images = tf.to_bfloat16(test_images)

    # Build training and test models
    with tf.device(gpu_device):
        train_logits = build_model(
            data_tensor=train_images,
            reuse=None,
            training=True,
            output_channels=config.train_label_shape[-1])
        test_logits = build_model(data_tensor=test_images,
                                  reuse=tf.AUTO_REUSE,
                                  training=False,
                                  output_channels=config.test_label_shape[-1])

    if use_bfloat16:
        train_logits = tf.cast(train_logits, experiment_params.tf_dtype)
        test_logits = tf.cast(test_logits, experiment_params.tf_dtype)

    # Derive loss
    if weight_loss:
        assert train_data_meta is not None, 'Could not find a train_data_meta'
        pos_weight = train_data_meta['weights']
        train_loss = tf.reduce_mean(
            tf.nn.weighted_cross_entropy_with_logits(targets=train_labels,
                                                     logits=train_logits,
                                                     pos_weight=pos_weight))
    else:
        train_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=train_labels,
                                                    logits=train_logits))
    test_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(labels=test_labels,
                                                logits=test_logits))

    if wd:
        WEIGHT_DECAY = 1e-4
        train_loss += (WEIGHT_DECAY * tf.add_n([
            tf.nn.l2_loss(v) for v in tf.trainable_variables()
            if 'batch_normalization' not in v.name
        ]))

    # Derive metrics
    train_scores = tf.reduce_mean(tf.sigmoid(train_logits[:, :, :, :, :3]),
                                  axis=-1)  # config['gt_idx']])
    test_scores = tf.reduce_mean(tf.sigmoid(test_logits[:, :, :, :, :3]),
                                 axis=-1)  # config['gt_idx']])
    train_gt = tf.cast(
        tf.greater(tf.reduce_mean(train_labels[:, :, :, :, :3], axis=-1), 0.5),
        tf.int32)  # config['gt_idx']]
    test_gt = tf.cast(
        tf.greater(tf.reduce_mean(test_labels[:, :, :, :, :3], axis=-1), 0.5),
        tf.int32)  # config['gt_idx']]
    try:
        train_pr, train_pr_update, train_pr_init = calculate_pr(
            labels=train_gt,
            predictions=train_scores,
            summation_method='careful_interpolation',
            name='train_pr')
        test_pr, test_pr_update, test_pr_init = calculate_pr(
            labels=test_gt,
            predictions=test_scores,
            summation_method='careful_interpolation',
            name='test_pr')
    except Exception:
        print 'Failed to use careful_interpolation'
        train_pr, train_pr_update, train_pr_init = calculate_pr(
            labels=train_gt,
            predictions=train_scores,
            summation_method='trapezoidal',
            name='train_pr')
        test_pr, test_pr_update, test_pr_init = calculate_pr(
            labels=test_gt,
            predictions=test_scores,
            summation_method='trapezoidal',
            name='test_pr')
    train_metrics = {'train_pr': train_pr, 'train_cce': train_loss}
    test_metrics = {'test_pr': test_pr, 'test_cce': test_loss}
    for k, v in train_metrics.iteritems():
        if 'update' not in k:
            tf.summary.scalar(k, v)
    for k, v in test_metrics.iteritems():
        if 'update' not in k:
            tf.summary.scalar(k, v)

    # Build optimizer
    lr = tf.placeholder(tf.float32, shape=[])
    train_op = optimizers.get_optimizer(loss=train_loss,
                                        lr=lr,
                                        optimizer=config['optimizer'])

    # Create dictionaries of important training and test information
    train_dict = {
        'train_loss': train_loss,
        'train_images': train_images,
        'train_labels': train_labels,
        'train_op': train_op,
        'train_pr_update': train_pr_update,
        'train_logits': train_scores
    }

    test_dict = {
        'test_loss': test_loss,
        'test_images': test_images,
        'test_labels': test_labels,
        'test_pr_update': test_pr_update,
        'test_logits': test_scores
    }
    train_metrics = {
        'train_pr': train_pr,
    }
    test_metrics = {
        'test_pr': test_pr,
    }
    reset_metrics = {
        'train_pr_init': train_pr_init,
        'test_pr_init': test_pr_init,
    }

    # Count model parameters
    parameter_count = tf_fun.count_parameters(tf.trainable_variables())
    print 'Number of parameters in model: %s' % parameter_count

    # Create datastructure for saving data
    ds = data_structure.data(train_batch_size=config.train_batch_size,
                             test_batch_size=config.test_batch_size,
                             test_iters=config.test_iters,
                             shuffle_train=config.shuffle_train,
                             shuffle_test=config.shuffle_test,
                             lr=config.lr,
                             training_routine=config.training_routine,
                             loss_function=config.loss_function,
                             optimizer=config.optimizer,
                             model_name=config.exp_label,
                             train_dataset=config.train_dataset,
                             test_dataset=config.test_dataset,
                             output_directory=config.results,
                             prediction_directory=prediction_dir,
                             summary_dir=summary_dir,
                             checkpoint_dir=checkpoint_dir,
                             parameter_count=parameter_count,
                             exp_label=exp_label)
    sess, summary_op, summary_writer, saver, adabn_init = initialize_tf(
        config=config, summary_dir=summary_dir)

    # Start training loop
    if use_lms:
        from tensorflow.contrib.lms import LMS
        lms_model = LMS({'cnn'}, lb=3)  # Hardcoded model scope for now...
        lms_model.run(tf.get_default_graph())
    if tf_records:
        # Coordinate for tfrecords
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        training_tf.training_loop(config=config,
                                  sess=sess,
                                  summary_op=summary_op,
                                  summary_writer=summary_writer,
                                  saver=saver,
                                  summary_dir=summary_dir,
                                  checkpoint_dir=checkpoint_dir,
                                  prediction_dir=prediction_dir,
                                  train_dict=train_dict,
                                  test_dict=test_dict,
                                  exp_label=config.exp_label,
                                  lr=lr,
                                  row_id=row_id,
                                  data_structure=ds,
                                  coord=coord,
                                  threads=threads,
                                  reset_metrics=reset_metrics,
                                  train_metrics=train_metrics,
                                  test_metrics=test_metrics,
                                  checkpoint=checkpoint,
                                  top_test=config['top_test'])
    else:
        training.training_loop(config=config,
                               sess=sess,
                               summary_op=summary_op,
                               summary_writer=summary_writer,
                               saver=saver,
                               summary_dir=summary_dir,
                               checkpoint_dir=checkpoint_dir,
                               prediction_dir=prediction_dir,
                               train_dict=train_dict,
                               test_dict=test_dict,
                               train_dataset_module=train_dataset_module,
                               test_dataset_module=test_dataset_module,
                               exp_label=config.exp_label,
                               lr=lr,
                               row_id=row_id,
                               data_structure=ds,
                               train_metrics=train_metrics,
                               test_metrics=test_metrics,
                               reset_metrics=reset_metrics,
                               checkpoint=checkpoint,
                               top_test=config['top_test'])
Пример #10
0
"""tf.to_bfloat16(x, name = 'ToBFloat16')
解释:这个函数是将一个Tensor的数据类型转换成bfloat16。
译者注:这个API的作用不是很理解,但我测试了一下,输入的x必须是浮点型的,别的类型都不行。
使用例子:"""

import tensorflow as tf

sess = tf.Session()
data = tf.constant([x for x in range(20)], tf.float32)
print(sess.run(data))
d = tf.to_bfloat16(data)
print(sess.run(d))
"""输入参数:
  ● x: 一个Tensor或者是SparseTensor。
  ● name:(可选)为这个操作取一个名字。
输出参数:
  ● 一个Tensor或者SparseTensor,数据类型是bfloat16,数据维度和x相同。
提示:
  ● 错误: 如果x是不能被转换成bfloat16类型的,那么将报错。"""
Пример #11
0
x = np.empty(10, dtype="|S10")
for i, e in enumerate(x):
    x[i] = "string_{}".format(i)
z_string_to_number = tf.string_to_number(x, out_type=tf.int32)

# tf.to_double
x = np.random.rand(3, 5)
z_to_double = tf.to_double(x)

# tf.to_float
x = np.random.rand(3, 5)
z_to_float = tf.to_float(x)

# tf.to_bfloat16
x = np.random.rand(3, 5).astype(np.float32)
z_to_bfloat16 = tf.to_bfloat16(x)

# tf.to_int32
x = np.random.rand(3, 5) * 10
z_to_int32 = tf.to_int32(x)

# tf.to_int64
x = np.random.rand(3, 5) * 10
z_to_int64 = tf.to_int64(x)

# tf.cast
x = np.random.rand(3, 5) * 10
z_cast = tf.cast(x, dtype=tf.int16)

with tf.Session() as sess:
Пример #12
0
tf.scan()
tf.scatter_add()
tf.scatter_div()
tf.scatter_mul()
tf.scatter_nd()
tf.scatter_nd_add()
tf.scatter_nd_non_aliasing_add()
tf.scatter_nd_sub()
tf.scatter_nd_update()
tf.scatter

tf.tables_initializer()
tf.tensordot()
tf.tf_logging
tf.tile()
tf.to_bfloat16()
tf.to_double()
tf.to_float()
tf.to_int32()
tf.to_int64()

tf.trace()
tf.trainable_variables()
tf.transpose()
tf.truncated_normal()
tf.truediv()
tf.sparse_transpose()
tf.sparse_tensor_dense_matmul()
tf.sparse_accumulator_apply_gradient()
tf.sparse_accumulator_take_gradient()
tf.sparse_add()
Пример #13
0
import tensorflow as tf

sess = tf.InteractiveSession()
s = tf.constant(['123', '257'])
print(s.eval(), s.dtype)
# 字符串转数字
num = tf.string_to_number(s)
print('数字 : ', num.eval(), num.dtype)
# 数字转double
d_num = tf.to_double(num)
print('double : ', d_num.eval(), d_num.dtype)
# 数字转float
f_num = tf.to_float(num)
print('float : ', f_num.eval(), f_num.dtype)
# 数字转bfloat16, bfloat16是什么鬼?网上没查出来
f16_num = tf.to_bfloat16(num)
print('bfloat16 : ', f16_num.eval(), f16_num.dtype)
# 数字转int32
i32_num = tf.to_int32(num)
print('int32 : ', i32_num.eval(), i32_num.dtype)
# 数字转int64
i64_num = tf.to_int64(num)
print('int64 : ', i64_num.eval(), i64_num.dtype)
# 转换为指定类型
cast_num = tf.cast(i64_num, tf.int8)
print('cast : ', cast_num.eval(), cast_num.dtype)
# bitcast转换
bit_num = tf.bitcast(i64_num, tf.int8)
print('bitcast : ', bit_num.eval(), bit_num.dtype)
# saturate_cast转换
saturate_cast = tf.saturate_cast(i64_num, tf.int32)
Пример #14
0
# -*- coding: utf-8 -*-
#!/usr/bin/python

import tensorflow as tf
import tfutil

const1 = tf.constant(1, dtype=tf.float32)
tfutil.print_constant(const1)
print(const1)
bfloat1 = tf.to_bfloat16(const1)
tfutil.print_operation_value(bfloat1)
print(bfloat1)

const2 = tf.constant([2, 3], dtype=tf.float32)
tfutil.print_constant(const2)
print(const2)
bfloat2 = tf.to_bfloat16(const2)
tfutil.print_operation_value(bfloat2)
print(bfloat2)

var1 = tf.Variable(4, dtype=tf.float32)
tfutil.print_variable(var1)
print(var1)
bfloat3 = tf.to_bfloat16(var1)
tfutil.print_operation_value(bfloat3)
print(bfloat3)

var2 = tf.Variable([5, 6], dtype=tf.float32)
tfutil.print_variable(var2)
print(var2)
bfloat4 = tf.to_bfloat16(var2)