示例#1
0
 def testSetHParamListNonListMismatch(self):
     hparams = hparam.HParams(a=1, b=[2.0, 3.0])
     with self.assertRaisesRegexp(ValueError, r'Must not pass a list'):
         hparams.set_hparam('a', [1.0])
     with self.assertRaisesRegexp(ValueError, r'Must pass a list'):
         hparams.set_hparam('b', 1.0)
示例#2
0
 def testBoolParsingFail(self):
     hparams = hparam.HParams(use_gpu=True)
     with self.assertRaisesRegexp(ValueError, r'Could not parse.*use_gpu'):
         hparams.parse('use_gpu=yep')
示例#3
0
 def testContains(self):
     hparams = hparam.HParams(foo=1)
     self.assertTrue('foo' in hparams)
     self.assertFalse('bar' in hparams)
示例#4
0
def get_pruning_hparams():
    """Get a tf.HParams object with the default values for the hyperparameters.

    name: string
      name of the pruning specification. Used for adding summaries and ops under
      a common tensorflow name_scope
    begin_pruning_step: integer
      the global step at which to begin pruning
    end_pruning_step: integer
      the global step at which to terminate pruning. Defaults to -1 implying
      that pruning continues till the training stops
    weight_sparsity_map: list of strings
       comma separed list of weight variable name:target sparsity pairs.
       For layers/weights not in this list, sparsity as specified by the
       target_sparsity hyperparameter is used.
       Eg. [conv1:0.9,conv2/kernel:0.8]
    threshold_decay: float
      the decay factor to use for exponential decay of the thresholds
    pruning_frequency: integer
      How often should the masks be updated? (in # of global_steps)
    nbins: integer
      number of bins to use for histogram computation
    block_height: integer
      number of rows in a block (defaults to 1)
    block_width: integer
      number of cols in a block (defaults to 1)
    block_pooling_function: string
      Whether to perform average (AVG) or max (MAX) pooling in the block
      (default: AVG)
    initial_sparsity: float
      initial sparsity value
    target_sparsity: float
      target sparsity value
    sparsity_function_begin_step: integer
      the global step at this which the gradual sparsity function begins to
      take effect
    sparsity_function_end_step: integer
      the global step used as the end point for the gradual sparsity function
    sparsity_function_exponent: float
      exponent = 1 is linearly varying sparsity between initial and final.
      exponent > 1 varies more slowly towards the end than the beginning
    use_tpu: False
      Indicates whether to use TPU

    We use the following sparsity function:

    num_steps = (sparsity_function_end_step -
                 sparsity_function_begin_step)/pruning_frequency
    sparsity(step) = (initial_sparsity - target_sparsity)*
                     [1-step/(num_steps -1)]**exponent + target_sparsity

  Args:
    None

  Returns:
    tf.HParams object initialized to default values

  """
    return hparam.HParams(name='model_pruning',
                          begin_pruning_step=0,
                          end_pruning_step=-1,
                          weight_sparsity_map=[''],
                          threshold_decay=0.0,
                          pruning_frequency=10,
                          nbins=256,
                          block_height=1,
                          block_width=1,
                          block_pooling_function='AVG',
                          initial_sparsity=0.0,
                          target_sparsity=0.5,
                          sparsity_function_begin_step=0,
                          sparsity_function_end_step=100,
                          sparsity_function_exponent=3,
                          use_tpu=False)
示例#5
0
    parser.add_argument(
        '--eval-steps',
        help='Number of steps to run evalution for at each checkpoint',
        default=100,
        type=int)
    args = parser.parse_args()

    # Set python level verbosity
    tf.logging.set_verbosity(args.verbosity)
    # Set C++ Graph Execution level verbosity
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(
        tf.logging.__dict__[args.verbosity] / 10)

    train_files = []
    tflist = file_io.list_directory(args.train_files_dir)
    for x in tflist:
        if args.train_files_prefix in x:
            train_files.append(os.path.join(args.train_files_dir, x))
    print("train files list: %s" % train_files)

    eval_files = []
    eflist = file_io.list_directory(args.eval_files_dir)
    for x in eflist:
        if args.eval_files_prefix in x:
            eval_files.append(os.path.join(args.eval_files_dir, x))
    print("eval files list: %s" % eval_files)

    # Run the training job
    hparams = hparam.HParams(**args.__dict__)
    run_experiment(train_files, eval_files, hparams)
示例#6
0
 def testSomeValues(self):
     hparams = hparam.HParams(aaa=1, b=2.0, c_c='relu6', d='/a/b=c/d')
     self.assertDictEqual(
         {
             'aaa': 1,
             'b': 2.0,
             'c_c': 'relu6',
             'd': '/a/b=c/d'
         }, hparams.values())
     expected_str = ('[(\'aaa\', 1), (\'b\', 2.0), (\'c_c\', \'relu6\'), '
                     '(\'d\', \'/a/b=c/d\')]')
     self.assertEqual(expected_str, str(hparams.__str__()))
     self.assertEqual(expected_str, str(hparams))
     self.assertEqual(1, hparams.aaa)
     self.assertEqual(2.0, hparams.b)
     self.assertEqual('relu6', hparams.c_c)
     self.assertEqual('/a/b=c/d', hparams.d)
     hparams.parse('aaa=12')
     self.assertDictEqual(
         {
             'aaa': 12,
             'b': 2.0,
             'c_c': 'relu6',
             'd': '/a/b=c/d'
         }, hparams.values())
     self.assertEqual(12, hparams.aaa)
     self.assertEqual(2.0, hparams.b)
     self.assertEqual('relu6', hparams.c_c)
     self.assertEqual('/a/b=c/d', hparams.d)
     hparams.parse('c_c=relu4, b=-2.0e10')
     self.assertDictEqual(
         {
             'aaa': 12,
             'b': -2.0e10,
             'c_c': 'relu4',
             'd': '/a/b=c/d'
         }, hparams.values())
     self.assertEqual(12, hparams.aaa)
     self.assertEqual(-2.0e10, hparams.b)
     self.assertEqual('relu4', hparams.c_c)
     self.assertEqual('/a/b=c/d', hparams.d)
     hparams.parse('c_c=,b=0,')
     self.assertDictEqual({
         'aaa': 12,
         'b': 0,
         'c_c': '',
         'd': '/a/b=c/d'
     }, hparams.values())
     self.assertEqual(12, hparams.aaa)
     self.assertEqual(0.0, hparams.b)
     self.assertEqual('', hparams.c_c)
     self.assertEqual('/a/b=c/d', hparams.d)
     hparams.parse('c_c=2.3",b=+2,')
     self.assertEqual(2.0, hparams.b)
     self.assertEqual('2.3"', hparams.c_c)
     hparams.parse('d=/a/b/c/d,aaa=11,')
     self.assertEqual(11, hparams.aaa)
     self.assertEqual(2.0, hparams.b)
     self.assertEqual('2.3"', hparams.c_c)
     self.assertEqual('/a/b/c/d', hparams.d)
     hparams.parse('b=1.5,d=/a=b/c/d,aaa=10,')
     self.assertEqual(10, hparams.aaa)
     self.assertEqual(1.5, hparams.b)
     self.assertEqual('2.3"', hparams.c_c)
     self.assertEqual('/a=b/c/d', hparams.d)
     with self.assertRaisesRegexp(ValueError, 'Unknown hyperparameter'):
         hparams.parse('x=123')
     with self.assertRaisesRegexp(ValueError, 'Could not parse'):
         hparams.parse('aaa=poipoi')
     with self.assertRaisesRegexp(ValueError, 'Could not parse'):
         hparams.parse('aaa=1.0')
     with self.assertRaisesRegexp(ValueError, 'Could not parse'):
         hparams.parse('b=12x')
     with self.assertRaisesRegexp(ValueError, 'Could not parse'):
         hparams.parse('b=relu')
     with self.assertRaisesRegexp(ValueError, 'Must not pass a list'):
         hparams.parse('aaa=[123]')
     self.assertEqual(10, hparams.aaa)
     self.assertEqual(1.5, hparams.b)
     self.assertEqual('2.3"', hparams.c_c)
     self.assertEqual('/a=b/c/d', hparams.d)
     # Exports to proto.
     hparam_def = hparams.to_proto()
     # Imports from proto.
     hparams2 = hparam.HParams(hparam_def=hparam_def)
     # Verifies that all hparams are restored.
     self.assertEqual(10, hparams2.aaa)
     self.assertEqual(1.5, hparams2.b)
     self.assertEqual('2.3"', hparams2.c_c)
     self.assertEqual('/a=b/c/d', hparams2.d)
示例#7
0
        x={"x": eval_data},
        y=eval_labels,
        num_epochs=1,
        shuffle=False)

    estimator = tf.estimator.Estimator(model_fn=model.solution)

    steps_per_eval = int(model.get_training_steps() / params.eval_steps)

    for _ in range(params.eval_steps):
        estimator.train(train_input_fn, steps=steps_per_eval)
        estimator.evaluate(eval_input_fn)


if __name__ == "__main__":
    PARSER = argparse.ArgumentParser()
    PARSER.add_argument(
        '--eval-steps',
        help='Number of steps to run evaluation for at each checkpoint',
        default=1,
        type=int
    )

    ARGS = PARSER.parse_args()
    tf.logging.set_verbosity('INFO')
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = "0"
    #os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(tf.logging.__dict__['INFO'] / 10)

    HPARAMS = hparam.HParams(**ARGS.__dict__)
    train_model(HPARAMS)
示例#8
0
    parser.add_argument('-b',
                        '--batch-size',
                        help='Training batch size',
                        default=200,
                        type=int)
    parser.add_argument('-t',
                        '--step-rate',
                        help='Step rate',
                        default=1e-3,
                        type=float)
    parser.add_argument('-x',
                        '--max-steps',
                        help='Max training steps',
                        default=20000,
                        type=int)
    parser.add_argument('--configure',
                        default=None,
                        help="Model structure configure json file.")
    parser.add_argument('-k'
                        '--kmer',
                        help='K-mer length',
                        default=1,
                        type=int)
    parser.add_argument('-r',
                        '--retrain',
                        help='Flag if retrain the model',
                        default=False,
                        type=bool)
    args = parser.parse_args()
    run(hparam.HParams(**args.__dict__))
def generate_experiment_fn(**experiment_args):
  """Create an experiment function.
  See command line help text for description of args.
  Args:
    experiment_args: keyword arguments to be passed through to experiment
      See `tf.contrib.learn.Experiment` for full args.
  Returns:
    A function:
      (tf.contrib.learn.RunConfig, tf.contrib.training.HParams) -> Experiment
    This function is used by learn_runner to create an Experiment which
    executes model code provided in the form of an Estimator and
    input functions.
  """
  def _experiment_fn(run_config, hparams):
    # num_epochs can control duration if train_steps isn't
    # passed to Experiment
    train_input = lambda: model.generate_input_fn(
        hparams.train_files,
        num_epochs=hparams.num_epochs,
        batch_size=hparams.train_batch_size,
    )
    # Don't shuffle evaluation data
    eval_input = lambda: model.generate_input_fn(
        hparams.eval_files,
        batch_size=hparams.eval_batch_size,
        shuffle=False
    )
    return tf.contrib.learn.Experiment(
        model.build_estimator(
            embedding_size=hparams.embedding_size,
            # Construct layers sizes with exponetial decay
            hidden_units=[
                max(2, int(hparams.first_layer_size *
                           hparams.scale_factor**i))
                for i in range(hparams.num_layers)
            ],
            config=run_config
        ),
        train_input_fn=train_input,
        eval_input_fn=eval_input,
        **experiment_args
    )
  return _experiment_fn

  # Set python level verbosity
 # tf.logging.set_verbosity(args.verbosity)
  # Set C++ Graph Execution level verbosity
  # os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(
  #     tf.logging.__dict__[args.verbosity] / 10)

  # If job_dir_reuse is False then remove the job_dir if it exists
  # if not args.reuse_job_dir:
  #   if tf.gfile.Exists(args.job_dir):
  #     tf.gfile.DeleteRecursively(args.job_dir)
  #     tf.logging.info("Deleted job_dir {} to avoid re-use".format(args.job_dir))
  #   else:
  #     tf.logging.info("No job_dir available to delete")
  # else:
  #   tf.logging.info("Reusing job_dir {} if it exists".format(args.job_dir))

  # Run the training job
  # learn_runner pulls configuration information from environment
  # variables using tf.learn.RunConfig and uses this configuration
  # to conditionally execute Experiment, or param server code
  learn_runner.run(
      generate_experiment_fn(
          train_steps=FLAGS.max_steps,
          eval_steps=FLAGS.eval_steps,
      ),
      run_config=run_config.RunConfig(model_dir=FLAGS.job_dir),
      hparams=hparam.HParams(FLAGS)
  )
示例#10
0
class SpectrumAugmenter():
    """Performs data augmentation as according to the SpecAug paper.

    https://arxiv.org/pdf/1904.08779.pdf
    """

    params = hparam.HParams(
        # Maximum number of frequency bins of frequency masking.
        freq_mask_max_bins=15,
        # Number of times we apply masking on the frequency axis.
        freq_mask_count=1,

        # Maximum number of frames of time masking. Overridden when use_dynamic_time_mask_max_frames = True.
        time_mask_max_frames=50,
        # Number of times we apply masking on the time axis. Acts as upper-bound when time_masks_per_frame > 0.
        time_mask_count=1,

        # If true, time_mask_max_frames is determined by time_mask_max_ratio * utterance_length.
        use_dynamic_time_mask_max_frames=False,
        # Maximum portion allowed for time masking.
        time_mask_max_ratio=1.0,
        # Ratio of number of time masks to be applied against the number of frames. If > 0,
        # multiplicity of the time mask is determined by min(time_masks_per_frame * utterance_length, time_mask_count).
        time_masks_per_frame=0.0,

        # To be set to either `dynamic` or `static`. 'If `dynamic`,
        # time warp bound is determined by 'time_warp_max_ratio * utterance_length.
        # ' If `static`, time warp bound is determined by min(time_warp_max_frames, time_warp_max_ratio * utterance_length).
        time_warp_bound='static',
        # Maximum number of frames for shifting in time warping.
        time_warp_max_frames=0,
        # Maximum portion of frames for shifting in time warping.
        time_warp_max_ratio=0.0,
        use_noise=False,  # Whether to noisify the time masked region.
        gaussian_noise=False,  # Use Gaussian distribution for noise.
        # Whether to unstack features before applying SpecAugment.
        unstack=False,
        stack_height=3,  # Number of frames stacked on top of each other.
        # Whether to use stateless random TensorFlow ops, with seeds determined by the input features. \
        # This feature is necessary for applications including federated learning.
        use_input_dependent_random_seed=False,
        dtype=tf.float32,  # Datatype to use.
        fprop_dtype=None,  # Activations datatype to use.
        random_seed=None,  # Random seed for deterministic unittests.
    )

    def __init__(self, config=None):
        if config is not None:
            self.params.override_from_dict(config)

    def EinsumBBmBm(self, a, b, name=None):
        return tf.einsum('b,bm->bm', a, b, name=name)

    def EinsumBmtBmBt(self, a, b, name=None):
        return tf.einsum('bmt,bm->bt', a, b, name=name)

    def EinsumBxycByBxyc(self, a, b, name=None):
        return tf.einsum('bxyc,by->bxyc', a, b, name=name)

    def EinsumBxycBxBxyc(self, a, b, name=None):
        return tf.einsum('bxyc,bx->bxyc', a, b, name=name)

    def EinsumBxyBxBxy(self, a, b, name=None):
        return tf.einsum('bxy,bx->bxy', a, b, name=name)

    def EinsumBxycBzxBzyc(self, a, b, name=None):
        return tf.einsum('bxyc,bzx->bzyc', a, b, name=name)

    def _GetMask(self,
                 batch_size,
                 choose_range,
                 mask_size,
                 global_seed,
                 max_length=None,
                 masks_per_frame=0.0,
                 multiplicity=1,
                 dtype=tf.float32,
                 max_ratio=1.0):
        """Returns fixed size multi-masks starting from random positions.

        A multi-mask is a mask obtained by applying multiple masks.

        This function when max_length is given:
          1) Sample random mask lengths less than max_length with shape
             (batch_size, multiplicity).
          2) Truncate lengths to a max of (choose_range * max_ratio),
             so that each mask is fully contained within the corresponding sequence.
          3) Random sample start points of shape (batch_size, multiplicity)
             with in (choose_range - lengths).
          4) For each batch, multiple masks (whose number is given by the
             multiplicity) are constructed.
          5) Return a mask of shape (batch_size, mask_size) where masks are
             obtained by composing the masks constructed in step 4).
             If masks_per_frame > 0, the number is given by
             min(masks_per_frame * choose_range, multiplicity).
             If not, all the masks are composed. The masked regions are set to zero.

        This function when max_length is not given:
          1) Sample random mask lengths less than (choose_range * max_ratio)
             with shape (batch_size, multiplicity).
          2) Proceed to steps 3), 4) and 5) of the above.

        Args:
          batch_size: Batch size. Integer number.
          choose_range: Range within which the masked entries must lie. Tensor of
            shape (batch_size,).
          mask_size: Size of the mask. Integer number.
          global_seed: an integer seed tensor for stateless random ops.
          max_length: Maximum number of allowed consecutive masked entries. Integer
            number or None.
          masks_per_frame: Number of masks per frame. Float number. If > 0, the
            multiplicity of the mask is set to be masks_per_frame * choose_range.
          multiplicity: Maximum number of total masks. Integer number.
          dtype: Data type.
          max_ratio: Maximum portion of the entire range allowed to be masked. Float
            number.

        Returns:
          mask: a fixed size multi-mask starting from a random position with shape
          (batch_size, mask_size).
        """
        p = self.params
        # Non-empty random seed values are only used for testing or when using
        # stateless random ops. seed_1 and seed_2 are set separately to avoid
        # correlation of mask size and mask position.
        if p.use_input_dependent_random_seed:
            seed_1 = global_seed + 1
            seed_2 = global_seed + 2
        elif p.random_seed:
            seed_1 = p.random_seed + 1
            seed_2 = 2 * p.random_seed
        else:
            seed_1 = p.random_seed
            seed_2 = p.random_seed
        # Sample lengths for multiple masks.
        if max_length and max_length > 0:
            max_length = tf.broadcast_to(tf.cast(max_length, dtype),
                                         (batch_size, ))
        else:
            max_length = tf.cast(choose_range, dtype=dtype) * max_ratio
        random_uniform = _random_uniform_op(p.use_input_dependent_random_seed)
        masked_portion = random_uniform(shape=(batch_size, multiplicity),
                                        minval=0.0,
                                        maxval=1.0,
                                        dtype=dtype,
                                        seed=seed_1)
        masked_frame_size = self.EinsumBBmBm(max_length, masked_portion)
        masked_frame_size = tf.cast(masked_frame_size, dtype=tf.int32)
        # Make sure the sampled length was smaller than max_ratio * length_bound.
        # Note that sampling in this way was biased
        # (shorter sequence may over-masked.)
        choose_range = tf.expand_dims(choose_range, -1)
        choose_range = tf.tile(choose_range, [1, multiplicity])
        length_bound = tf.cast(choose_range, dtype=dtype)
        length_bound = tf.cast(max_ratio * length_bound, dtype=tf.int32)
        length = tf.minimum(masked_frame_size, tf.maximum(length_bound, 1))

        # Choose starting point.
        random_start = random_uniform(shape=(batch_size, multiplicity),
                                      maxval=1.0,
                                      seed=seed_2)
        start_with_in_valid_range = random_start * tf.cast(
            (choose_range - length + 1), dtype=dtype)
        start = tf.cast(start_with_in_valid_range, tf.int32)
        end = start + length - 1

        # Shift starting and end point by small value.
        delta = tf.constant(0.1)
        start = tf.expand_dims(tf.cast(start, dtype) - delta, -1)
        start = tf.tile(start, [1, 1, mask_size])
        end = tf.expand_dims(tf.cast(end, dtype) + delta, -1)
        end = tf.tile(end, [1, 1, mask_size])

        # Construct pre-mask of shape (batch_size, multiplicity, mask_size).
        diagonal = tf.expand_dims(
            tf.expand_dims(tf.cast(tf.range(mask_size), dtype=dtype), 0), 0)
        diagonal = tf.tile(diagonal, [batch_size, multiplicity, 1])
        pre_mask = tf.cast(tf.math.logical_and(diagonal < end,
                                               diagonal > start),
                           dtype=dtype)

        # Sum masks with appropriate multiplicity.
        if masks_per_frame > 0:
            multiplicity_weights = tf.tile(
                tf.expand_dims(tf.range(multiplicity, dtype=dtype), 0),
                [batch_size, 1])
            multiplicity_tensor = masks_per_frame * \
                tf.cast(choose_range, dtype=dtype)
            multiplicity_weights = tf.cast(
                multiplicity_weights < multiplicity_tensor, dtype=dtype)
            pre_mask = self.EinsumBmtBmBt(pre_mask, multiplicity_weights)
        else:
            pre_mask = tf.reduce_sum(pre_mask, 1)
        mask = tf.cast(1.0 - tf.cast(pre_mask > 0, dtype=dtype), dtype=dtype)

        if p.fprop_dtype is not None and p.fprop_dtype != p.dtype:
            mask = tf.cast(mask, p.fprop_dtype)

        return mask

    def _GetWarpMatrix(self,
                       batch_size,
                       choose_range,
                       matrix_size,
                       global_seed,
                       max_warp_frames=None,
                       dtype=tf.float32,
                       max_ratio=1.0):
        """Returns warp matrices starting from random positions.

        In this function when max_warp_frames != None:
          1) Sample random warp displacements from the interval
             [-max_warp_frames, max_warp_frames) to yield shift tensor
             with shape (batch_size,).
          2) Truncate lengths to a maximum magnitude of (choose_range * max_ratio),
             so that each shift is fully contained within the
             corresponding sequence.
          3) Random sample origin points of shape (batch_size, multiplicity)
             with in [shift, choose_range - shift).
          4) Return a batch of 1-D linear maps that fix the boundary points and
             shift the origin point by the shift.

        When max_warp_frames == None:
          1) Sample random warp displacements with magnitudes less than
             (choose_range * max_ratio) to yield shift tensor with
             shape (batch_size,).
          2) Proceed through steps 3), 4).

        Args:
          batch_size: Batch size. Integer number.
          choose_range: Range within which the warp reference points must lie.
            Tensor of shape (batch_size,).
          matrix_size: Dimension of vector space warp matrix is applied to. Integer
            number.
          global_seed: an integer seed tensor for stateless random ops.
          max_warp_frames: Upper-bound on the warp distance. Integer or None.
          dtype: Data type.
          max_ratio: Maximum ratio between the shift distance and choose_range.
            Float number.

        Returns:
          warp_matrix: An array of fixed size warp matrices with shape
          (batch_size, matrix_size, matrix_size).
        """
        p = self.params
        # Non-empty random seed values are only used for testing or when using
        # stateless random ops. seed_3, seed_4, and seed_5 are set separately to
        # avoid correlation of warp magnitude and origin position.
        if p.use_input_dependent_random_seed:
            seed_3 = global_seed + 3
            seed_4 = global_seed + 4
            seed_5 = global_seed + 5
        elif p.random_seed:
            seed_3 = p.random_seed - 1
            seed_4 = p.random_seed - 1
            seed_5 = 2 * p.random_seed + 1
        else:
            seed_3 = p.random_seed
            seed_4 = p.random_seed
            seed_5 = p.random_seed

        choose_range_dtype = tf.cast(choose_range, dtype=dtype)
        length_upper_bound = tf.cast(max_ratio * choose_range_dtype,
                                     dtype=tf.int32)
        # Set shift length.

        random_uniform = _random_uniform_op(p.use_input_dependent_random_seed)

        if max_warp_frames and max_warp_frames > 0:
            shift = random_uniform(shape=(batch_size, ),
                                   minval=-1 * max_warp_frames,
                                   maxval=max_warp_frames + 1,
                                   dtype=tf.int32,
                                   seed=seed_3)
        else:
            random_ratio = random_uniform(shape=(batch_size, ),
                                          minval=-1.0,
                                          maxval=1.0,
                                          dtype=dtype,
                                          seed=seed_4)
            shift = tf.cast(
                random_ratio * tf.cast(length_upper_bound, dtype=dtype),
                tf.int32)
        # Make sure the sampled length was smaller than max_ratio * length_bound.
        # Note that sampling in this way is biased.
        # (Shorter sequence may over-masked.)
        final_shift = tf.maximum(-length_upper_bound,
                                 tf.minimum(shift, length_upper_bound))
        # Choose origin anchor point.
        mid_range = tf.cast(choose_range, dtype=tf.int32)
        mid_range = tf.maximum(choose_range - 2, 0)
        random_origin = random_uniform(shape=(batch_size, ),
                                       maxval=1.0,
                                       seed=seed_5)
        origin_with_in_valid_range = random_origin * \
            tf.cast(mid_range, dtype=dtype)
        origin = tf.cast(origin_with_in_valid_range, tf.int32) + 1
        # Set destination point of the origin anchor point under the warp map.
        destination = origin + final_shift
        # Cast origin and destination.
        origin = tf.cast(origin, dtype=dtype)
        destination = tf.cast(destination, dtype=dtype)

        return self._ConstructWarpMatrix(batch_size=batch_size,
                                         matrix_size=matrix_size,
                                         origin=origin,
                                         destination=destination,
                                         choose_range=choose_range_dtype,
                                         dtype=dtype)

    def _ConstructWarpMatrix(self, batch_size, matrix_size, origin,
                             destination, choose_range, dtype):
        """Returns warp matrices according to origin, destination and choose_range.

        This function constructs a batch of warp matrices which maps the batch
        of origin points to the batch of destination points with fixed boundary
        coordinates at 0 and choose_range.

        The warping function, defined by the origin anchor point `origin`,
        the destination of the origin anchor point `destination` and the
        length of the domain in the warping axis `choose_range` is a piecewise
        linear map that fixes the points 0 and `choose_range` and maps
        `origin` to `destination`.

        For the warping matrix to be non-singular, destination must lie in the
        range 1<= destination <= choose_range - 1, so a destination
        out of this range is adjusted to be in this range before the warping
        matrix is constructed.

        The warping map can be explicitly written by first defining the slopes:
          1) slope_0 = origin / destination.
          2) slope_1 = (choose_range - origin) / (choose_range - destination).
          3) slope_2 = 1.0.

        Then the origin point orig_i of the mapped coordinate i is given by:
          1) i < destination: orig_i = slope_0 * i.
          2) destination <= i < choose_range:
             orig_i = slope_1 * i - (slope_1 - slope_0) * destination.
          3) i >= choose_range: orig_i = i.

        Denoting n_i = ceil(orig_i), the warp matrix element warp[i][j] is given by:
          1) j = n_i: 1 - n_i + orig_i.
          2) j = n_i - 1: n_i - orig_i.
          3) Otherwise: 0.

        Applying the warp matrix to an array of pixels, i.e.,
        warped_pixel[i] = sum_j warp[i][j] * pixel[j], one would get
        warped_pixel[i] = (n_i-orig_i) pixel[n_i-1] + (1-n_i+orig_i) pixel[n_i].

        Args:
          batch_size: Batch size. Integer number.
          matrix_size: Dimension of the vector space the warp matrix is applied to.
            Integer number.
          origin: Origin anchor point for warping. Tensor of shape (batch_size,) and
            data type dtype.
          destination: Destination of the origin anchor point upon warping. Tensor
            of shape (batch_size,) and data type dtype.
          choose_range: Range within which the warp reference points must lie.
            Tensor of shape (batch_size,) data type dtype.
          dtype: Data type of origin, destination, choose_range and the output warp
            matrix.

        Returns:
          warp_matrix: An array of fixed size warp matrices with shape
          (batch_size, matrix_size, matrix_size).
        """
        p = self.params

        # Entries of destination must be in the range
        # 1 <= destination <= choose_range - 1
        # for warp matrix to have non-singular values.
        destination = tf.minimum(tf.maximum(destination, 1.0),
                                 choose_range - 1.0)

        # Construct piece-wise linear function fixing boundary points
        # specified by zero, choose_range and matrix size and maps
        # the origin anchor point to the destination.
        destination_bc = tf.broadcast_to(destination,
                                         (matrix_size, batch_size))
        destination_bc = tf.transpose(destination_bc)
        choose_range_bc = tf.broadcast_to(choose_range,
                                          (matrix_size, batch_size))
        choose_range_bc = tf.transpose(choose_range_bc)

        # Slopes of piece-wise linear function.
        slope_0 = origin / destination
        slope_1 = (choose_range - origin) / (choose_range - destination)
        slope_2 = 1.0

        # x is a batch of origin matrices.
        # The origin matrix is the matrix such that
        # origin[i][j] = Origin coordinate of coordinate i for the warp map.
        # Denoting the destination of the origin anchor point in the
        # warp map as "dest," the origin coordinate of point i is given by:
        # 1) i < dest: slope_0 * i.
        # 2) dest <= i < choose_range: slope_1 * i - (slope_1 - slope_0) * dest.
        # 3) i >= choose_range: i.
        x = tf.broadcast_to(tf.cast(tf.range(matrix_size), dtype=dtype),
                            (batch_size, matrix_size))
        x = (self.EinsumBBmBm(slope_0, x) + self.EinsumBBmBm(
            slope_1 - slope_0, tf.nn.relu(x - destination_bc)) +
             self.EinsumBBmBm(slope_2 - slope_1,
                              tf.nn.relu(x - choose_range_bc)))
        x = tf.broadcast_to(x, (matrix_size, batch_size, matrix_size))
        x = tf.transpose(x, perm=[1, 2, 0])

        # y is a batch of coordinate matrices.
        # A coordinate matrix is a matrix such that
        # coordinate[i][j] = j.
        y = tf.broadcast_to(tf.cast(tf.range(matrix_size), dtype=dtype),
                            (batch_size, matrix_size, matrix_size))
        # Warp matrix is obtained by applying hat function element-wise to (x-y).
        # Denoting the origin point of i under the warp map as orig_i,
        # and n_i = ceil(orig_i), the warp matrix element warp[i][j] is given by:
        # 1) j = n_i: 1 - n_i + orig_i.
        # 2) j = n_i - 1: n_i - orig_i.
        # 3) Otherwise: 0.
        # Applying the warp matrix to pixels, i.e.,
        # warped_pixel[i] = sum_j warp[i][j] * original_pixel[j], one would get
        # warped_pixel[i] = (n_i - orig_i) * original_pixel[n_i-1]
        #                   + (1 - n_i + orig_i) * original_pixel[n_i].
        warp_matrix = x - y
        warp_matrix = _hat(warp_matrix)
        if p.fprop_dtype is not None and p.fprop_dtype != dtype:
            warp_matrix = tf.cast(warp_matrix, p.fprop_dtype)

        return warp_matrix

    def _FrequencyMask(self, inputs, global_seed, dtype=tf.float32):
        """Applies frequency masking with given degree to inputs.

        Args:
          inputs: Batch of input features of shape (batch_size, time_length,
            num_freq, channels).
          global_seed: an integer seed tensor for stateless random ops.
          dtype: Data type.

        Returns:
          Inputs with random frequency masking applied.
        """
        p = self.params

        # Mask parameters.
        freq_mask_max_bins = p.freq_mask_max_bins
        multiplicity = p.freq_mask_count

        # If masking length or count is zero, do nothing.
        if freq_mask_max_bins == 0 or multiplicity == 0:
            return inputs

        # Arguments to pass to mask generator.
        batch_size, _, num_freq, _ = GetShape(inputs)
        choose_range = tf.cast(tf.broadcast_to(num_freq, (batch_size, )),
                               dtype=tf.int32)
        # Create masks in frequency direction and apply.
        block_arrays = self._GetMask(tf.shape(inputs)[0],
                                     choose_range=choose_range,
                                     mask_size=num_freq,
                                     global_seed=global_seed,
                                     max_length=freq_mask_max_bins,
                                     masks_per_frame=0.0,
                                     multiplicity=multiplicity,
                                     dtype=dtype,
                                     max_ratio=1.0)
        return self.EinsumBxycByBxyc(inputs, block_arrays)

    def _TimeMask(self,
                  inputs,
                  seq_lengths,
                  global_seed,
                  noisify=False,
                  gaussian_noise=False,
                  dtype=tf.float32):
        """Applies time masking with given degree to inputs.

        Args:
          inputs: Batch of input features of shape (batch_size, time_length,
            num_freq, channels).
          seq_lengths: The actual sequence lengths which mask been sampled of shape
            (batch_size,).
          global_seed: an integer seed tensor for stateless random ops.
          noisify: Whether to noisify the masked out regions.
          gaussian_noise: Whether to use gaussian noise when noisifying.
          dtype: Data type.

        Returns:
          Inputs with random time masking applied.
        """
        p = self.params

        # Get time masking parameters.
        time_mask_max_frames = p.time_mask_max_frames
        time_masks_per_frame = p.time_masks_per_frame
        use_dynamic_time_mask_max_frames = \
            p.use_dynamic_time_mask_max_frames
        multiplicity = p.time_mask_count
        max_ratio = p.time_mask_max_ratio

        # If maximum mask length is zero, do nothing.
        if ((time_mask_max_frames == 0
             and not use_dynamic_time_mask_max_frames) or max_ratio <= 0.0):
            return inputs
        if multiplicity == 0:
            return inputs
        seq_lengths = tf.cast(seq_lengths, tf.int32)
        batch_size, time_length, _, _ = GetShape(inputs)

        # When using dynamic time mask size, discard upper-bound on
        # maximum allowed frames for time mask.
        if use_dynamic_time_mask_max_frames:
            time_mask_max_frames = None
        # Create masks in time direction and apply.
        block_arrays = self._GetMask(batch_size,
                                     choose_range=seq_lengths,
                                     mask_size=time_length,
                                     global_seed=global_seed,
                                     max_length=time_mask_max_frames,
                                     masks_per_frame=time_masks_per_frame,
                                     multiplicity=multiplicity,
                                     dtype=dtype,
                                     max_ratio=max_ratio)

        # Non-empty random seed values are only used for testing or when using
        # stateless random ops. seed_6 and seed_7 are set separately to avoid
        # correlation of warp magnitude and origin position.
        if p.use_input_dependent_random_seed:
            seed_6 = global_seed + 6
            seed_7 = global_seed + 7
        else:
            seed_6 = p.random_seed
            seed_7 = p.random_seed

        outputs = self.EinsumBxycBxBxyc(inputs,
                                        block_arrays,
                                        name='einsum_formasking')
        if noisify:
            # Sample noise with standard deviation with factor * 0.1 + 0.0001
            # TODO(ngyuzh): Make sure this won't affect EOS.
            if gaussian_noise:
                stddev = 1.0
            else:
                random_uniform = _random_uniform_op(
                    p.use_input_dependent_random_seed)
                factor = random_uniform(shape=(),
                                        minval=1.0,
                                        maxval=2.0,
                                        dtype=dtype,
                                        seed=seed_6)
                stddev = factor * 0.1 + 0.0001
            random_normal = _random_normal_op(
                p.use_input_dependent_random_seed)
            noise = random_normal(shape=[
                tf.shape(inputs)[0],
                tf.shape(inputs)[1],
                tf.shape(inputs)[2]
            ],
                                  stddev=stddev,
                                  seed=seed_7)
            if p.fprop_dtype is not None and p.fprop_dtype != p.dtype:
                noise = tf.cast(noise, p.fprop_dtype)
            outputs_mask = self.EinsumBxyBxBxy(noise,
                                               1.0 - block_arrays,
                                               name='einsum_fornoisymasking')
            outputs = outputs + tf.expand_dims(outputs_mask, -1)

        return outputs

    def _TimeWarp(self, inputs, seq_lengths, global_seed, dtype=tf.float32):
        """Applies time warping with given degree to inputs.

        Args:
          inputs: Batch of input features of shape (batch_size, time_length,
            num_freq, channels).
          seq_lengths: The actual sequence lengths which mask been sampled of shape
            (batch_size,).
          global_seed: an integer seed tensor for stateless random ops.
          dtype: Data type.

        Returns:
          Inputs with random time warping applied.
        """
        p = self.params
        batch_size, time_length, _, _ = GetShape(inputs)

        # Get parameters for warping.
        time_warp_max_frames = p.time_warp_max_frames
        max_ratio = p.time_warp_max_ratio
        time_warp_bound = p.time_warp_bound
        assert time_warp_bound in ('static', 'dynamic')

        # If maximum warp length is zero, do nothing.
        if ((time_warp_max_frames == 0 and time_warp_bound == 'static')
                or max_ratio <= 0.0):
            return inputs
        seq_lengths = tf.cast(seq_lengths, tf.int32)

        # Discard upper-bound on time-warp frames when
        # dynamic time warping is used.
        if time_warp_bound == 'dynamic':
            time_warp_max_frames = None

        # Create warping matrix in time direction and apply
        warp_matrix = self._GetWarpMatrix(batch_size,
                                          choose_range=seq_lengths,
                                          matrix_size=time_length,
                                          global_seed=global_seed,
                                          max_warp_frames=time_warp_max_frames,
                                          dtype=dtype,
                                          max_ratio=max_ratio)

        return self.EinsumBxycBzxBzyc(inputs,
                                      warp_matrix,
                                      name='einsum_forwarping')

    def UnstackFeatures(self, src_inputs, src_paddings):
        """Unstacks src_input and src_paddings based off stack height."""
        sh = self.params.stack_height
        bs, old_series_length, _, channels = GetShape(src_inputs)
        unstacked_series_length = old_series_length * sh
        src_inputs = tf.reshape(src_inputs,
                                [bs, unstacked_series_length, -1, channels])
        content = 1 - src_paddings
        lengths = tf.cast(sh * tf.reduce_sum(content, axis=1), tf.int32)
        mask = tf.sequence_mask(lengths, maxlen=unstacked_series_length)
        src_paddings = 1 - tf.cast(mask, tf.int32)
        return src_inputs, src_paddings

    def _AugmentationNetwork(self, series_length, inputs, paddings,
                             global_seed):
        """Returns augmented features.

        Args:
          series_length: Total length of time series.
          inputs: Batch of input features of shape (batch_size, time_length,
            num_freq, channels).
          paddings: Batch of padding vectors of shape (batch_size, time_length).
          global_seed: an integer seed tensor for stateless random ops.

        Returns:
          Batch of output features of shape (batch_size, time_length, num_freq,
          channels) obtained by applying random augmentations to inputs.
        """
        p = self.params
        dtype = p.dtype

        # Unstack the features.
        if p.unstack:
            inputs, paddings = self.UnstackFeatures(inputs, paddings)

        lengths = tf.reduce_sum(1 - paddings, 1)

        inputs = self._TimeWarp(inputs,
                                lengths,
                                global_seed=global_seed,
                                dtype=dtype)
        inputs = self._TimeMask(inputs,
                                lengths,
                                global_seed=global_seed,
                                noisify=p.use_noise,
                                gaussian_noise=p.gaussian_noise,
                                dtype=dtype)
        inputs = self._FrequencyMask(inputs,
                                     global_seed=global_seed,
                                     dtype=dtype)

        # Restack the features after applying specaugment.
        if p.unstack:
            inputs = tf.reshape(
                inputs,
                [tf.shape(inputs)[0], series_length, -1,
                 tf.shape(inputs)[3]])

        return inputs

    def __call__(self, inputs, seq_len):
        """Applies data augmentation by randomly mask spectrum in inputs.

        Args:
          inputs: A tensor of shape [batch, time, freq, num_channels].
          paddings: A 0/1 tensor of shape [batch, time].

        Returns:
          A pair of 2 tensors:

          - augmented_inputs: A tensor of shape [batch, time, freq, num_channels].
          - paddings: A 0/1 tensor of shape [batch, time].
        """
        p = self.params

        paddings = 1 - tf.sequence_mask(
            seq_len, tf.shape(inputs)[1], dtype=tf.float32)

        inputs = tf.expand_dims(inputs, -1)

        # A tensor seed in case stateless random ops are needed.
        global_seed = None
        if p.use_input_dependent_random_seed:
            global_seed = _global_seed_from_inputs(inputs)

        batch_size, series_length, _, _ = GetShape(inputs)
        augmented_inputs = self._AugmentationNetwork(series_length,
                                                     inputs,
                                                     paddings,
                                                     global_seed=global_seed)

        return tf.reshape(augmented_inputs, [batch_size, series_length, -1])
示例#11
0
class QTest(test.TestCase):

  hparams = hparam.HParams(
      learning_rate=1.25e-3,
      hidden_layers=[16, 16],
      initial_exploration=.5,
      discount=.99,
      exploration_decay_steps=256 // 16 * 25,
      exploration_decay_rate=.99,
      max_sequence_length=1,
      num_episodes=256,
      batch_size=16,
      num_iterations=100,
      assign_target_steps=10 * 16,
      huber_loss_delta=1.,
      num_quantiles=51)

  @test_util.skip_if(True)
  def test_q_ops_dqn(self):
    ops.reset_default_graph()
    np.random.seed(42)
    random_seed.set_random_seed(42)
    env = gym.make('CartPole-v0')
    env.seed(42)

    # Setup the policy and model
    global_step = training_util.get_or_create_global_step()
    deterministic_ph = array_ops.placeholder(
        dtypes.bool, [], name='deterministic')
    exploration_op = learning_rate_decay.exponential_decay(
        QTest.hparams.initial_exploration,
        global_step,
        QTest.hparams.exploration_decay_steps,
        QTest.hparams.exploration_decay_rate)


    state_distribution, state_ph = gym_ops.distribution_from_gym_space(
        env.observation_space, name='state_space')
    with variable_scope.variable_scope('logits'):
      action_value_op = mlp(state_ph, QTest.hparams.hidden_layers)
      action_distribution, action_value_op = gym_ops.distribution_from_gym_space(
          env.action_space, logits=[action_value_op], name='action_space')
      action_op = array_ops.squeeze(sampling_ops.epsilon_greedy(
          action_distribution, exploration_op, deterministic_ph))
    policy_variables = variables.trainable_variables(scope='logits')


    next_state_ph = shortcuts.placeholder_like(state_ph, name='next_state_space')
    with variable_scope.variable_scope('logits', reuse=True):
      next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers)
      next_action_distribution, next_action_value_op = gym_ops.distribution_from_gym_space(
          env.action_space, logits=[next_action_value_op], name='action_space')
      next_action_op = array_ops.squeeze(sampling_ops.epsilon_greedy(
          next_action_distribution, exploration_op, deterministic_ph))


    # Setup the dataset
    stream = streams.Uniform.from_distributions(
        state_distribution, action_distribution)
    replay_dataset = dataset.ReplayDataset(
        stream, max_sequence_length=QTest.hparams.max_sequence_length)
    replay_dataset = replay_dataset.batch(QTest.hparams.batch_size)
    replay_op = replay_dataset.make_one_shot_iterator().get_next()

    action_ph = array_ops.placeholder(
        stream.action_dtype, [None, None] + stream.action_shape, name='action')
    reward_ph = array_ops.placeholder(
        stream.reward_dtype, [None, None] + stream.reward_shape, name='reward')
    terminal_ph = array_ops.placeholder(
        dtypes.bool, [None, None], name='terminal')
    sequence_length_ph = array_ops.placeholder(
        dtypes.int32, [None, 1], name='sequence_length')
    sequence_length = array_ops.squeeze(sequence_length_ph, -1)

    q_value_op, expected_q_value_op = q_ops.expected_q_value(
        reward_ph,
        action_ph,
        action_value_op,
        next_action_value_op,
        weights=(1 - math_ops.cast(terminal_ph, reward_ph.dtype)),
        discount=QTest.hparams.discount)

    # mean_squared_error
    loss_op = math_ops.square(q_value_op - expected_q_value_op)

    loss_op = math_ops.reduce_mean(
        math_ops.reduce_sum(loss_op, axis=-1) / math_ops.cast(
            sequence_length, loss_op.dtype))
    optimizer = adam.AdamOptimizer(
        learning_rate=QTest.hparams.learning_rate)
    train_op = optimizer.minimize(loss_op, var_list=policy_variables)

    with self.test_session() as sess:
      sess.run(variables.global_variables_initializer())
      for iteration in range(QTest.hparams.num_iterations):
        rewards = gym_test_utils.rollout_on_gym_env(
            sess, env, state_ph, deterministic_ph,
            action_value_op, action_op,
            num_episodes=QTest.hparams.num_episodes,
            stream=stream)

        while True:
          try:
            replay = sess.run(replay_op)
          except (errors_impl.InvalidArgumentError, errors_impl.OutOfRangeError):
            break
          _, loss = sess.run(
              (train_op, loss_op),
              feed_dict={
                state_ph: replay.state,
                next_state_ph: replay.next_state,
                action_ph: replay.action,
                reward_ph: replay.reward,
                terminal_ph: replay.terminal,
                sequence_length_ph: replay.sequence_length,
              })

        rewards = gym_test_utils.rollout_on_gym_env(
            sess, env, state_ph, deterministic_ph,
            action_value_op, action_op,
            num_episodes=QTest.hparams.num_episodes,
            deterministic=True, save_replay=False)
        print('average_rewards = {}'.format(rewards / QTest.hparams.num_episodes))

  # @test_util.skip_if(True)
  def test_q_ops_double_dqn(self):
    env = gym.make('CartPole-v0')
    ops.reset_default_graph()
    np.random.seed(42)
    random_seed.set_random_seed(42)
    env.seed(42)

    # Setup the policy and model
    global_step = training_util.get_or_create_global_step()
    deterministic_ph = array_ops.placeholder(
        dtypes.bool, [], name='deterministic')
    exploration_op = learning_rate_decay.exponential_decay(
        QTest.hparams.initial_exploration,
        global_step,
        QTest.hparams.exploration_decay_steps,
        QTest.hparams.exploration_decay_rate)


    state_distribution, state_ph = gym_ops.distribution_from_gym_space(
        env.observation_space, name='state_space')
    with variable_scope.variable_scope('logits'):
      action_value_op = mlp(state_ph, QTest.hparams.hidden_layers)
      action_distribution, action_value_op = gym_ops.distribution_from_gym_space(
          env.action_space, logits=[action_value_op], name='action_space')
      action_op = array_ops.squeeze(sampling_ops.epsilon_greedy(
          action_distribution, exploration_op, deterministic_ph))
    policy_variables = variables.trainable_variables(scope='logits')


    next_state_ph = shortcuts.placeholder_like(state_ph, name='next_state_space')
    with variable_scope.variable_scope('logits', reuse=True):
      next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers)
      next_action_distribution, next_action_value_op = gym_ops.distribution_from_gym_space(
          env.action_space, logits=[next_action_value_op], name='action_space')
      next_action_op = array_ops.squeeze(sampling_ops.epsilon_greedy(
          next_action_distribution, exploration_op, deterministic_ph))

    with variable_scope.variable_scope('target_logits'):
      target_next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers)
      target_next_action_distribution, target_next_action_value_op = gym_ops.distribution_from_gym_space(
          env.action_space, logits=[target_next_action_value_op], name='action_space')
      target_next_action_op = array_ops.squeeze(sampling_ops.epsilon_greedy(
          target_next_action_distribution, exploration_op, deterministic_ph))
    assign_target_op = shortcuts.assign_scope('logits', 'target_logits')


    # Setup the dataset
    stream = streams.Uniform.from_distributions(
        state_distribution, action_distribution)
    replay_dataset = dataset.ReplayDataset(
        stream, max_sequence_length=QTest.hparams.max_sequence_length)
    replay_dataset = replay_dataset.batch(QTest.hparams.batch_size)
    replay_op = replay_dataset.make_one_shot_iterator().get_next()

    action_ph = array_ops.placeholder(
        stream.action_dtype, [None, None] + stream.action_shape, name='action')
    reward_ph = array_ops.placeholder(
        stream.reward_dtype, [None, None] + stream.reward_shape, name='reward')
    terminal_ph = array_ops.placeholder(
        dtypes.bool, [None, None], name='terminal')
    sequence_length_ph = array_ops.placeholder(
        dtypes.int32, [None, 1], name='sequence_length')
    sequence_length = array_ops.squeeze(sequence_length_ph, -1)

    q_value_op, expected_q_value_op = q_ops.expected_q_value(
        reward_ph,
        action_ph,
        action_value_op,
        (next_action_value_op, target_next_action_value_op),
        weights=(1 - math_ops.cast(terminal_ph, reward_ph.dtype)),
        discount=QTest.hparams.discount)

    # mean_squared_error
    loss_op = math_ops.square(q_value_op - expected_q_value_op)
    loss_op = math_ops.reduce_mean(
        math_ops.reduce_sum(loss_op, axis=-1) / math_ops.cast(
            sequence_length, loss_op.dtype))
    optimizer = adam.AdamOptimizer(
        learning_rate=QTest.hparams.learning_rate)
    train_op = optimizer.minimize(loss_op, var_list=policy_variables)
    train_op = control_flow_ops.cond(
        gen_math_ops.equal(
            gen_math_ops.mod(
                ops.convert_to_tensor(
                    QTest.hparams.assign_target_steps, dtype=dtypes.int64),
                (global_step + 1)), 0),
        lambda: control_flow_ops.group(*[train_op, assign_target_op]),
        lambda: train_op)

    with self.test_session() as sess:
      sess.run(variables.global_variables_initializer())
      sess.run(assign_target_op)

      for iteration in range(QTest.hparams.num_iterations):
        rewards = gym_test_utils.rollout_on_gym_env(
            sess, env, state_ph, deterministic_ph,
            action_value_op, action_op,
            num_episodes=QTest.hparams.num_episodes,
            stream=stream)

        while True:
          try:
            replay = sess.run(replay_op)
          except (errors_impl.InvalidArgumentError, errors_impl.OutOfRangeError):
            break
          _, loss = sess.run(
              (train_op, loss_op),
              feed_dict={
                state_ph: replay.state,
                next_state_ph: replay.next_state,
                action_ph: replay.action,
                reward_ph: replay.reward,
                terminal_ph: replay.terminal,
                sequence_length_ph: replay.sequence_length,
              })

        rewards = gym_test_utils.rollout_on_gym_env(
            sess, env, state_ph, deterministic_ph,
            action_value_op, action_op,
            num_episodes=QTest.hparams.num_episodes,
            deterministic=True, save_replay=False)
        print('average_rewards = {}'.format(rewards / QTest.hparams.num_episodes))

  @test_util.skip_if(True)
  def test_q_ops_quantile_dqn(self):
    env = gym.make('CartPole-v0')
    ops.reset_default_graph()
    np.random.seed(42)
    random_seed.set_random_seed(42)
    env.seed(42)

    # Setup the policy and model
    global_step = training_util.get_or_create_global_step()
    deterministic_ph = array_ops.placeholder(
        dtypes.bool, [], name='deterministic')
    exploration_op = learning_rate_decay.exponential_decay(
        QTest.hparams.initial_exploration,
        global_step,
        QTest.hparams.exploration_decay_steps,
        QTest.hparams.exploration_decay_rate)

    state_distribution, state_ph = gym_ops.distribution_from_gym_space(
        env.observation_space, name='state_space')
    action_distribution, _ = gym_ops.distribution_from_gym_space(
        env.action_space, name='action_space')

    # Setup the dataset
    stream = streams.Uniform.from_distributions(
        state_distribution, action_distribution)

    with variable_scope.variable_scope('logits'):
      action_value_op = mlp(state_ph, QTest.hparams.hidden_layers)
      action_value_op = core.dense(
          action_value_op,
          stream.action_value_shape[-1] * QTest.hparams.num_quantiles,
          use_bias=False)
      action_value_op_shape = array_ops.shape(action_value_op)
      action_value_shape = [
          action_value_op_shape[0],
          action_value_op_shape[1],
          stream.action_value_shape[-1],
          QTest.hparams.num_quantiles]
      action_value_op = gen_array_ops.reshape(action_value_op, action_value_shape)
      mean_action_value_op = math_ops.reduce_mean(action_value_op, axis=-1)
      action_op = math_ops.argmax(mean_action_value_op, axis=-1)
      action_op = array_ops.squeeze(action_op)
    policy_variables = variables.trainable_variables(scope='logits')


    next_state_ph = shortcuts.placeholder_like(state_ph, name='next_state_space')
    with variable_scope.variable_scope('targets'):
      target_next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers)
      target_next_action_value_op = core.dense(
          target_next_action_value_op,
          stream.action_value_shape[-1] * QTest.hparams.num_quantiles,
          use_bias=False)
      target_next_action_value_op_shape = array_ops.shape(target_next_action_value_op)
      target_next_action_value_shape = [
          target_next_action_value_op_shape[0],
          target_next_action_value_op_shape[1],
          stream.action_value_shape[-1],
          QTest.hparams.num_quantiles]
      target_next_action_value_op = gen_array_ops.reshape(
          target_next_action_value_op, target_next_action_value_shape)
      mean_target_next_action_value_op = math_ops.reduce_mean(
          target_next_action_value_op, axis=-1)
    assign_target_op = shortcuts.assign_scope('logits', 'target_logits')


    replay_dataset = dataset.ReplayDataset(
        stream, max_sequence_length=QTest.hparams.max_sequence_length)
    replay_dataset = replay_dataset.batch(QTest.hparams.batch_size)
    replay_op = replay_dataset.make_one_shot_iterator().get_next()


    action_ph = array_ops.placeholder(
        stream.action_dtype, [None, None] + stream.action_shape, name='action')
    reward_ph = array_ops.placeholder(
        stream.reward_dtype, [None, None] + stream.reward_shape, name='reward')
    terminal_ph = array_ops.placeholder(
        dtypes.bool, [None, None], name='terminal')
    sequence_length_ph = array_ops.placeholder(
        dtypes.int32, [None, 1], name='sequence_length')
    sequence_length = array_ops.squeeze(sequence_length_ph, -1)

    q_value_op, expected_q_value_op = q_ops.expected_q_value(
        array_ops.expand_dims(reward_ph, -1),
        action_ph,
        action_value_op,
        (target_next_action_value_op, mean_target_next_action_value_op),
        weights=array_ops.expand_dims(
            1 - math_ops.cast(terminal_ph, reward_ph.dtype), -1),
        discount=QTest.hparams.discount)

    u = expected_q_value_op - q_value_op
    loss_op = losses_impl.huber_loss(u, delta=QTest.hparams.huber_loss_delta)

    tau_op = (2. * math_ops.range(
        0, QTest.hparams.num_quantiles, dtype=u.dtype) + 1) / (
            2. * QTest.hparams.num_quantiles)

    loss_op *= math_ops.abs(tau_op - math_ops.cast(u < 0, tau_op.dtype))
    loss_op = math_ops.reduce_mean(loss_op, axis=-1)

    loss_op = math_ops.reduce_mean(
        math_ops.reduce_sum(loss_op, axis=-1) / math_ops.cast(
            sequence_length, loss_op.dtype))
    optimizer = adam.AdamOptimizer(
        learning_rate=QTest.hparams.learning_rate)
    train_op = optimizer.minimize(loss_op, var_list=policy_variables)
    train_op = control_flow_ops.cond(
        gen_math_ops.equal(
            gen_math_ops.mod(
                ops.convert_to_tensor(
                    QTest.hparams.assign_target_steps, dtype=dtypes.int64),
                (global_step + 1)), 0),
        lambda: control_flow_ops.group(*[train_op, assign_target_op]),
        lambda: train_op)

    with self.test_session() as sess:
      sess.run(variables.global_variables_initializer())
      sess.run(assign_target_op)

      for iteration in range(QTest.hparams.num_iterations):
        rewards = gym_test_utils.rollout_on_gym_env(
            sess, env, state_ph, deterministic_ph,
            mean_action_value_op, action_op,
            num_episodes=QTest.hparams.num_episodes,
            stream=stream)

        while True:
          try:
            replay = sess.run(replay_op)
          except (errors_impl.InvalidArgumentError, errors_impl.OutOfRangeError):
            break
          loss, _ = sess.run(
              (loss_op, train_op),
              feed_dict={
                state_ph: replay.state,
                next_state_ph: replay.next_state,
                action_ph: replay.action,
                reward_ph: replay.reward,
                terminal_ph: replay.terminal,
                sequence_length_ph: replay.sequence_length,
              })

        rewards = gym_test_utils.rollout_on_gym_env(
            sess, env, state_ph, deterministic_ph,
            mean_action_value_op, action_op,
            num_episodes=QTest.hparams.num_episodes,
            deterministic=True, save_replay=False)
        print('average_rewards = {}'.format(rewards / QTest.hparams.num_episodes))
示例#12
0
import os

from tensorflow.contrib.learn.python.learn import evaluable  # pylint: disable=g-import-not-at-top
from tensorflow.contrib.learn.python.learn import experiment
from tensorflow.contrib.learn.python.learn import learn_runner
from tensorflow.contrib.learn.python.learn import trainable

from tensorflow.contrib.learn.python.learn.estimators import run_config as run_config_lib
from tensorflow.contrib.training.python.training import hparam as hparam_lib
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging

patch = test.mock.patch

_MODIR_DIR = "/tmp"
_HPARAMS = hparam_lib.HParams(learning_rate=0.01)
_MUST_SPECIFY_OUTPUT_DIR_MSG = "Must specify an output directory"
_MISSING_MODEL_DIR_ERR_MSG = "Must specify a model directory in `run_config`."
_EXP_NOT_CALLABLE_MSG = "Experiment builder .* is not callable"
_INVALID_HPARAMS_ERR_MSG = "`hparams` must be `HParams` instance"
_NOT_EXP_TYPE_MSG = "Experiment builder did not return an Experiment"
_NON_EXIST_TASK_MSG = "Schedule references non-existent task"
_NON_CALLABLE_MSG = "Schedule references non-callable member"
_MUST_SPECIFY_OUTPUT_DIR_OR_CONFIG_MSG = (
    "Must set value for `output_dir` or `run_config`")
_HPARAMS_CANNOT_BE_SET_FOR_OUTPUT_DIR_MSG = (
    "Must set `hparams` as None for `experiment_fn` with `output_dir`.")
_CANNOT_SET_BOTH_OUTPUT_DIR_AND_CONFIG_MSG = (
    "Cannot provide both `output_dir` and `run_config`")
_INVALID_RUN_CONFIG_TYPE_MSG = "`run_config` must be `RunConfig` instance"
_RUN_CONFIG_UID_CHECK_ERR_MSG = (
示例#13
0
class XlaDecoratorTest(test.TestCase, parameterized.TestCase):

  @parameterized.named_parameters(
      ('test_use_as_decorator', decorated_model_fn, None),
      ('test_use_as_function', xla.estimator_model_fn(_test_train_model_fn),
       None),
      ('test_use_tpu_false_hparams', decorated_model_fn,
       hparam.HParams(use_tpu=False)),
      ('test_use_tpu_false_dict_params', decorated_model_fn, {
          'use_tpu': False
      }),
  )
  def test_compile(self, model_fn, params):
    """Calls model_fn and verifies it is compiled."""
    with test.mock.patch.object(xla, 'compile') as mock_xla_compile:
      loss = constant_op.constant(_EXPECTED_LOSS)
      mock_xla_compile.return_value = [loss]

      features, labels = make_dummy_features_labels()
      estimator_spec = model_fn(
          features=features, labels=labels, mode=_TRAIN, params=params or {})

      mock_xla_compile.assert_called_once()
      self.assertEqual(estimator_spec.mode, _TRAIN)

      with self.test_session() as sess:
        self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss))
        self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss))

  @parameterized.named_parameters(
      ('test_use_tpu_true_hparams', decorated_model_fn,
       hparam.HParams(use_tpu=True)),
      ('test_use_tpu_true_dict_params', decorated_model_fn, {
          'use_tpu': True
      }),
  )
  def test_not_compile(self, model_fn, params):
    """Calls model_fn and verifies it is NOT compiled."""
    with test.mock.patch.object(xla, 'compile') as mock_xla_compile:
      loss = constant_op.constant(_EXPECTED_LOSS)
      mock_xla_compile.return_value = [loss]

      features, labels = make_dummy_features_labels()
      estimator_spec = model_fn(
          features=features, labels=labels, mode=_TRAIN, params=params or {})

      mock_xla_compile.assert_not_called()
      self.assertEqual(estimator_spec.mode, _TRAIN)

      with self.test_session() as sess:
        self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss))
        self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss))

  def test_model_with_summary(self):
    """Tests that summary ops are disabled."""

    @xla.estimator_model_fn
    def model_fn_with_summary(features, labels, mode, params):
      del features, labels, params
      loss = constant_op.constant(_EXPECTED_LOSS)
      summary.scalar('loss_scalar_summary', loss)
      summary.histogram('loss_histogram_summary', loss)
      summary.image('loss_image_summary', loss)
      return model_fn_lib.EstimatorSpec(
          mode=mode, loss=loss, train_op=array_ops.identity(loss))

    features, labels = make_dummy_features_labels()
    estimator_spec = model_fn_with_summary(
        features=features, labels=labels, mode=_TRAIN, params={})

    with self.test_session() as sess:
      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
示例#14
0
      default=5)
  parser.add_argument(
      '--agent',
      help='type of agent, one of [DDPG|TD3|C2A2]',
      default='DDPG')
  parser.add_argument(
      '--job-dir',
      help='dir to save logs and videos',
      default='./results')
  parser.add_argument(
      '--record-video',
      help='whether to record video when testing',
      action='store_true')
  parser.add_argument(
      '--verbosity',
      choices=['DEBUG', 'ERROR', 'FATAL', 'INFO', 'WARN'],
      default='INFO')

  args, _ = parser.parse_known_args()
  # Set python level verbosity
  tf.logging.set_verbosity(args.verbosity)
  # Set C++ Graph Execution level verbosity
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(
      tf.logging.__dict__[args.verbosity] / 10)

  for k, v in args.__dict__.iteritems():
    tf.logging.info('{}: {}'.format(k, v))

  config = hparam.HParams(**args.__dict__)
  train(config)