def add_module(self, *args, **kwargs): if 'input_spec' in kwargs: layer = super().add_module(*args, modules=layer_modules, **kwargs) self.output_spec = layer.output_spec else: if self.output_spec is None: if util.is_atomic_values_spec(values_spec=self.inputs_spec): self.output_spec = self.inputs_spec elif len(self.inputs_spec) == 1: self.output_spec = next(iter(self.inputs_spec.values())) else: self.output_spec = dict(type=None, shape=None) layer = super().add_module(*args, modules=layer_modules, input_spec=self.output_spec, **kwargs) self.output_spec = layer.output_spec if not isinstance(layer, (Layer, Parameter)): raise TensorforceError.type(name='layer-based network', argument='sub-module', value=layer) return layer
def validated_tf_function(x, internals, return_internals=False): if util.is_atomic_values_spec(values_spec=self.inputs_spec): if not util.is_consistent_with_value_spec(value_spec=self.inputs_spec, x=x): raise TensorforceError("Invalid input arguments for tf_apply.") else: if not all( util.is_consistent_with_value_spec(value_spec=spec, x=x[name]) for name, spec in self.inputs_spec.items() ): raise TensorforceError("Invalid input arguments for tf_apply.") if not all( util.is_consistent_with_value_spec(value_spec=spec, x=internals[name]) for name, spec in self.__class__.internals_spec(network=self).items() ): raise TensorforceError("Invalid input arguments for tf_apply.") if return_internals: x, internals = tf_function(x=x, internals=internals, return_internals=True) else: x = tf_function(x=x, internals=internals, return_internals=False) if not util.is_consistent_with_value_spec(value_spec=self.get_output_spec(), x=x): raise TensorforceError("Invalid output arguments for tf_apply.") if return_internals and not all( util.is_consistent_with_value_spec(value_spec=spec, x=internals[name]) for name, spec in self.__class__.internals_spec(network=self).items() ): raise TensorforceError("Invalid output arguments for tf_apply.") if return_internals: return x, internals else: return x
def add_module(self, *args, **kwargs): if 'input_spec' in kwargs: layer = super().add_module(*args, **kwargs) self.output_spec = layer.output_spec else: if self.output_spec is None: if util.is_atomic_values_spec(values_spec=self.inputs_spec): self.output_spec = self.inputs_spec elif len(self.inputs_spec) == 1: self.output_spec = next(iter(self.inputs_spec.values())) else: self.output_spec = None if self.output_spec is not None: if 'input_spec' in kwargs: kwargs['input_spec'] = util.unify_value_specs( value_spec1=kwargs['input_spec'], value_spec2=self.output_spec) else: kwargs['input_spec'] = self.output_spec layer = super().add_module(*args, **kwargs) self.output_spec = layer.output_spec if not isinstance(layer, (Layer, Parameter)): raise TensorforceError.type(name='layer-based network', argument='sub-module', value=layer) return layer
def is_valid_actions_function(cls, actions_spec): if util.is_atomic_values_spec(values_spec=actions_spec): return cls.is_valid_action_function(action_spec=actions_spec) else: return (lambda actions: all( cls.is_valid_actions_function(actions_spec=action_spec) (action=actions[name]) for name, action_spec in actions_spec.items()))
def random_states_function(cls, states_spec): if util.is_atomic_values_spec(values_spec=states_spec): return cls.random_state_function(state_spec=states_spec) else: return (lambda: { name: cls.random_states_function(states_spec=state_spec)() for name, state_spec in states_spec.items() })
def is_valid_actions_function(cls, actions_spec): if util.is_atomic_values_spec(values_spec=actions_spec): return (lambda actions, states: cls.is_valid_action_function( action_spec=actions_spec)(actions, 'action', states)) else: return (lambda actions, states: all( cls.is_valid_action_function(action_spec=action_spec) (action=actions[name], name=name, states=states) for name, action_spec in actions_spec.items()))
def __init__(self, level, visualize=False, max_episode_steps=None, terminal_reward=0.0, reward_threshold=None, drop_states_indices=None, visualize_directory=None, **kwargs): super().__init__() import gym import gym.wrappers self.level = level self.visualize = visualize self.terminal_reward = terminal_reward if isinstance(level, gym.Env): self.environment = self.level self.level = self.level.__class__.__name__ self.max_episode_steps = max_episode_steps elif isinstance(level, type) and issubclass(level, gym.Env): self.environment = self.level(**kwargs) self.level = self.level.__class__.__name__ self.max_episode_steps = max_episode_steps else: self.environment, self.max_episode_steps = self.__class__.create_level( level=self.level, max_episode_steps=max_episode_steps, reward_threshold=reward_threshold, **kwargs) if visualize_directory is not None: self.environment = gym.wrappers.Monitor( env=self.environment, directory=visualize_directory) self.states_spec = OpenAIGym.specs_from_gym_space( space=self.environment.observation_space, ignore_value_bounds=True # TODO: not ignore? ) if drop_states_indices is None: self.drop_states_indices = None else: assert util.is_atomic_values_spec(values_spec=self.states_spec) self.drop_states_indices = sorted(drop_states_indices) assert len(self.states_spec['shape']) == 1 num_dropped = len(self.drop_states_indices) self.states_spec['shape'] = (self.states_spec['shape'][0] - num_dropped, ) self.actions_spec = OpenAIGym.specs_from_gym_space( space=self.environment.action_space, ignore_value_bounds=False)
def validated_tf_function(x): if util.is_atomic_values_spec(values_spec=self.inputs_spec): if not util.is_consistent_with_value_spec(value_spec=self.inputs_spec, x=x): raise TensorforceError("Invalid input arguments for tf_apply.") else: if not all( util.is_consistent_with_value_spec(value_spec=spec, x=x[name]) for name, spec in self.inputs_spec.items() ): raise TensorforceError("Invalid input arguments for tf_apply.") x = tf_function(x=x) if not util.is_consistent_with_value_spec(value_spec=self.get_output_spec(), x=x): raise TensorforceError("Invalid output arguments for tf_apply.") return x
def random_states_function(cls, states_spec, actions_spec=None): if actions_spec is None: if util.is_atomic_values_spec(values_spec=states_spec): return ( lambda: cls.random_state_function(state_spec=states_spec) ()) else: return (lambda: { name: cls.random_state_function(state_spec=state_spec)() for name, state_spec in states_spec.items() }) elif util.is_atomic_values_spec(values_spec=states_spec): if util.is_atomic_values_spec(values_spec=actions_spec): def fn(): random_states = cls.random_state_function( state_spec=states_spec)() if actions_spec['type'] == 'int': if not isinstance(random_states, dict): random_states = dict(state=random_states) mask = cls.random_mask(action_spec=actions_spec) random_states['action_mask'] = mask return random_states else: def fn(): random_states = cls.random_state_function( state_spec=states_spec)() for name, action_spec in actions_spec.items(): if action_spec['type'] == 'int': if not isinstance(random_states, dict): random_states = dict(state=random_states) mask = cls.random_mask(action_spec=action_spec) random_states[name + '_mask'] = mask return random_states else: if util.is_atomic_values_spec(values_spec=actions_spec): def fn(): random_states = { name: cls.random_state_function(state_spec=state_spec)() for name, state_spec in states_spec.items() } if actions_spec['type'] == 'int': mask = cls.random_mask(action_spec=actions_spec) random_states['action_mask'] = mask return random_states else: def fn(): random_states = { name: cls.random_state_function(state_spec=state_spec)() for name, state_spec in states_spec.items() } for name, action_spec in actions_spec.items(): if action_spec['type'] == 'int': mask = cls.random_mask(action_spec=action_spec) random_states[name + '_mask'] = mask return random_states return fn