def iterative_body(self, x, indices, remaining, current_x, current_internals): batch_size = tf_util.cast(x=tf.shape(input=current_x)[:1], dtype='int') zeros = tf_util.zeros(shape=batch_size, dtype='int') ones = tf_util.ones(shape=batch_size, dtype='int') batch_size = batch_size[0] current_x = tf.gather(params=x, indices=indices) next_x, next_internals = self.iterative_apply( x=current_x, internals=current_internals) with tf.control_dependencies(control_inputs=(current_x, next_x)): is_finished = tf.math.equal(x=remaining, y=zeros) if isinstance(next_internals, dict): for name, current_internal, next_internal in current_internals.zip_items( next_internals): condition = is_finished for _ in range(tf_util.rank(x=current_internal) - 1): condition = tf.expand_dims(input=condition, axis=1) next_internals[name] = tf.where(condition=condition, x=current_internal, y=next_internal) else: condition = is_finished for _ in range(tf_util.rank(x=current_internals) - 1): condition = tf.expand_dims(input=condition, axis=1) next_internals = tf.where(condition=condition, x=current_internals, y=next_internals) remaining -= tf.where(condition=is_finished, x=zeros, y=ones) indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) return x, indices, remaining, next_x, next_internals
def variable(self, *, name, spec, initializer, is_trainable, is_saved, initialization_scale=None): assert self.is_initialized is False # name if not isinstance(name, str): raise TensorforceError.type(name='variable', argument='name', dtype=type(name)) # spec if not isinstance(spec, TensorSpec): raise TensorforceError.dtype(name='variable', argument='spec', dtype=type(spec)) if spec.is_underspecified(): raise TensorforceError.value(name='variable', argument='spec', value=spec, hint='underspecified') # initializer initializer_names = ('constant', 'normal', 'normal-relu', 'ones', 'orthogonal', 'orthogonal-relu', 'zeros') if not isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) and \ initializer not in initializer_names: raise TensorforceError.value(name='variable', argument='initializer', value=initializer) elif isinstance(initializer, np.ndarray) and initializer.dtype != spec.np_type(): raise TensorforceError.type(name='variable', argument='initializer', dtype=initializer.dtype) elif isinstance( initializer, tf.Tensor) and tf_util.dtype(x=initializer) != spec.tf_type(): raise TensorforceError.type(name='variable', argument='initializer', dtype=tf_util.dtype(x=initializer)) # initialization_scale if initialization_scale is not None: if isinstance(initializer, (spec.py_type(), np.ndarray, tf.Tensor)) or \ initializer not in ('constant', 'orthogonal', 'orthogonal-relu'): raise TensorforceError.invalid( name='variable', argument='initialization_scale', condition='initializer not orthogonal') elif not isinstance(initialization_scale, spec.py_type()): raise TensorforceError.type(name='variable', argument='initialization_scale', dtype=type(initialization_scale), hint='!= float') # is_trainable if not isinstance(is_trainable, bool): raise TensorforceError.type(name='variable', argument='is_trainable', dtype=type(is_trainable)) elif is_trainable and spec.type != 'float': raise TensorforceError.value(name='variable', argument='is_trainable', value=is_trainable, condition='spec.type != float') # is_saved if not isinstance(is_saved, bool): raise TensorforceError.type(name='variable', argument='is_saved', dtype=type(is_saved)) # Variable initializer if isinstance(initializer, spec.py_type()): initializer = tf_util.constant(value=initializer, dtype=spec.type, shape=spec.shape) elif isinstance(initializer, np.ndarray): if initializer.shape != spec.shape: raise TensorforceError.mismatch(name='Module.variable', value1='shape', value2='initializer') initializer = tf_util.constant(value=initializer, dtype=spec.type) elif isinstance(initializer, tf.Tensor): if tf_util.shape(x=initializer) != spec.shape: raise TensorforceError.mismatch(name='Module.variable', value1='shape', value2='initializer') initializer = initializer elif not isinstance(initializer, str): raise TensorforceError( "Invalid variable initializer: {}".format(initializer)) elif initializer.startswith('normal'): if spec.type != 'float': raise TensorforceError( message= "Invalid variable initializer value for non-float variable: {}." .format(initializer)) if initializer.endswith('-relu'): stddev = min(0.1, np.sqrt(2.0 / util.product(xs=spec.shape[:-1]))) else: stddev = min( 0.1, np.sqrt( 2.0 / (util.product(xs=spec.shape[:-1]) + spec.shape[-1]))) initializer = tf.random.normal(shape=spec.shape, stddev=stddev, dtype=spec.tf_type()) elif initializer.startswith('orthogonal'): if spec.type != 'float': raise TensorforceError( message= "Invalid variable initializer value for non-float variable: {}." .format(initializer)) if spec.rank < 2: raise TensorforceError( message= "Invalid variable initializer value for 0/1-rank variable: {}." .format(initializer)) normal = np.random.normal(size=(util.product(xs=spec.shape[:-1]), spec.shape[-1])) u, _, v = np.linalg.svd(a=normal, full_matrices=False) orthogonal = u if u.shape[1] == spec.shape[-1] else v if initializer.endswith('-relu'): orthogonal = orthogonal * np.sqrt(2.0) if initialization_scale is not None and initialization_scale != 1.0: if initialization_scale <= 0.0: raise TensorforceError.value( name='variable', argument='initialization_scale', value=initialization_scale, hint='<= 0.0') orthogonal = orthogonal * initialization_scale initializer = tf_util.constant(value=orthogonal.reshape( spec.shape), dtype=spec.type) elif initializer == 'zeros': initializer = tf_util.zeros(shape=spec.shape, dtype=spec.type) elif initializer == 'ones': initializer = tf_util.ones(shape=spec.shape, dtype=spec.type) elif initializer == 'constant': initializer = tf.fill(dims=spec.shape, value=tf_util.constant( value=initialization_scale, dtype=spec.type)) # Variable variable = tf.Variable(initial_value=initializer, trainable=is_trainable, validate_shape=True, name=name, dtype=spec.tf_type(), shape=spec.shape) variable.is_saved = is_saved return variable
def apply(self, *, x, horizons, internals): zero = tf_util.constant(value=0, dtype='int') one = tf_util.constant(value=1, dtype='int') batch_size = tf_util.cast(x=tf.shape(input=horizons)[0], dtype='int') zeros = tf_util.zeros(shape=(batch_size, ), dtype='int') ones = tf_util.ones(shape=(batch_size, ), dtype='int') # including 0th step horizon = self.horizon.value() + one # in case of longer horizon than necessary (e.g. main vs baseline policy) starts = horizons[:, 0] + tf.maximum(x=(horizons[:, 1] - horizon), y=zeros) lengths = horizons[:, 1] - tf.maximum(x=(horizons[:, 1] - horizon), y=zeros) horizon = tf.minimum(x=horizon, y=tf.math.reduce_max(input_tensor=lengths, axis=0)) output_spec = self.output_spec() if self.temporal_processing == 'cumulative': if self.horizon.is_constant(value=0): x = self.iterative_apply(xs=x, lengths=ones) else: def body(x, indices, remaining, xs): current_x = tf.gather(params=x, indices=indices) current_x = tf.expand_dims(input=current_x, axis=1) xs = tf.concat(values=(xs, current_x), axis=1) remaining -= tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones) return x, indices, remaining, xs initial_xs = tf_util.zeros(shape=((batch_size, 0) + output_spec.shape), dtype=output_spec.type) _, final_indices, final_remaining, xs = tf.while_loop( cond=tf_util.always_true, body=body, loop_vars=(x, starts, lengths, initial_xs), maximum_iterations=tf_util.int64(x=horizon)) x = self.cumulative_apply(xs=xs, lengths=lengths) elif self.temporal_processing == 'iterative': if self.horizon.is_constant(value=0): x, final_internals = self.iterative_apply(x=x, internals=internals) else: initial_x = tf_util.zeros(shape=((batch_size, ) + output_spec.shape), dtype=output_spec.type) signature = self.input_signature(function='iterative_body') internals = signature['current_internals'].kwargs_to_args( kwargs=internals) _, final_indices, final_remaining, x, final_internals = tf.while_loop( cond=tf_util.always_true, body=self.iterative_body, loop_vars=(x, starts, lengths, initial_x, internals), maximum_iterations=tf_util.int32(x=horizon)) internals = signature['current_internals'].args_to_kwargs( args=final_internals) assertions = list() if self.config.create_tf_assertions: assertions.append( tf.debugging.assert_equal(x=final_indices, y=(tf.math.cumsum(x=lengths) - ones))) assertions.append( tf.debugging.assert_equal( x=tf.math.reduce_sum(input_tensor=final_remaining), y=zero)) with tf.control_dependencies(control_inputs=assertions): if self.temporal_processing == 'cumulative': return tf_util.identity(input=super().apply(x=x)) elif self.temporal_processing == 'iterative': return tf_util.identity(input=super().apply(x=x)), internals