def __init__(self, config): """Initializes the reinforcement learning agent. Args: config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. """ assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) # states config and preprocessing self.preprocessing = dict() if 'type' in config.states: # only one state config.states = dict(state=config.states) self.unique_state = True if config.preprocessing is not None: config.preprocessing = dict(state=config.preprocessing) else: self.unique_state = False for name, state in config.states: if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_config( config=config.preprocessing[name]) self.preprocessing[name] = preprocessing state.shape = preprocessing.processed_shape(shape=state.shape) # actions config and exploration self.continuous_actions = list() self.exploration = dict() if 'continuous' in config.actions: # only one action if config.actions.continuous: self.continuous_actions.append('action') config.actions = dict(action=config.actions) if config.exploration is not None: config.exploration = dict(action=config.exploration) self.unique_action = True else: self.unique_action = False for name, action in config.actions: if action.continuous: self.continuous_actions.append(name) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_config( config=config.exploration[name]) self.states_config = config.states self.actions_config = config.actions self.model = self.__class__.model(config) self.episode = 0 self.timestep = 0 # Reset internal state - needs to be called after every episode self.next_internal = self.current_internal = self.model.reset() for preprocessing in self.preprocessing.values(): preprocessing.reset()
def __init__(self, config): """Initializes the reinforcement learning agent. Args: config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. """ assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) # states config and preprocessing self.preprocessing = dict() if 'type' in config.states: # only one state config.states = dict(state=config.states) self.unique_state = True if config.preprocessing is not None: config.preprocessing = dict(state=config.preprocessing) else: self.unique_state = False for name, state in config.states: state.default(dict(type='float')) if isinstance(state.shape, int): state.shape = (state.shape, ) if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_config( config=config.preprocessing[name]) self.preprocessing[name] = preprocessing state.shape = preprocessing.processed_shape(shape=state.shape) # actions config and exploration self.exploration = dict() if 'continuous' in config.actions: # only one action config.actions = dict(action=config.actions) if config.exploration is not None: config.exploration = dict(action=config.exploration) self.unique_action = True else: self.unique_action = False for name, action in config.actions: if action.continuous: action.default(dict(shape=(), min_value=None, max_value=None)) else: action.default(dict(shape=())) if isinstance(action.shape, int): action.shape = (action.shape, ) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_config( config=config.exploration[name]) self.states_config = config.states self.actions_config = config.actions self.model = self.__class__.model(config) self.episode = -1 self.timestep = 0 self.reset()
def __init__(self, config): """Initializes the reinforcement learning agent. Args: config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. """ assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) # states config and preprocessing self.preprocessing = dict() if 'type' in config.states: # only one state config.states = dict(state=config.states) self.unique_state = True if config.preprocessing is not None: config.preprocessing = dict(state=config.preprocessing) else: self.unique_state = False for name, state in config.states: if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_config(config=config.preprocessing[name]) self.preprocessing[name] = preprocessing state.shape = preprocessing.processed_shape(shape=state.shape) # actions config and exploration self.continuous_actions = list() self.exploration = dict() if 'continuous' in config.actions: # only one action if config.actions.continuous: self.continuous_actions.append('action') config.actions = dict(action=config.actions) if config.exploration is not None: config.exploration = dict(action=config.exploration) self.unique_action = True else: self.unique_action = False for name, action in config.actions: if action.continuous: self.continuous_actions.append(name) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_config(config=config.exploration[name]) self.states_config = config.states self.actions_config = config.actions self.model = self.__class__.model(config) self.episode = 0 self.timestep = 0 # Reset internal state - needs to be called after every episode self.next_internal = self.current_internal = self.model.reset() for preprocessing in self.preprocessing.values(): preprocessing.reset()
def __init__(self, config, model=None): """Initializes the reinforcement learning agent. Args: config (Configuration): configuration object containing at least `states`, `actions`, `preprocessing` and 'exploration`. model (Model): optional model instance. If not supplied, a new model is created. """ assert self.__class__.name is not None and self.__class__.model is not None config.default(Agent.default_config) self.logger = logging.getLogger(__name__) self.logger.setLevel(util.log_levels[config.log_level]) # states config and preprocessing self.preprocessing = dict() if 'type' in config.states: # only one state config.states = dict(state=config.states) self.unique_state = True if config.preprocessing is not None: config.preprocessing = dict(state=config.preprocessing) else: self.unique_state = False for name, state in config.states: state.default(dict(type='float')) if isinstance(state.shape, int): state.shape = (state.shape, ) if config.preprocessing is not None and name in config.preprocessing: preprocessing = Preprocessing.from_config( config=config.preprocessing[name]) self.preprocessing[name] = preprocessing state.shape = preprocessing.processed_shape(shape=state.shape) # actions config and exploration self.exploration = dict() if 'continuous' in config.actions: # only one action config.actions = dict(action=config.actions) if config.exploration is not None: config.exploration = dict(action=config.exploration) self.unique_action = True else: self.unique_action = False for name, action in config.actions: if action.continuous: action.default(dict(shape=(), min_value=None, max_value=None)) else: action.default(dict(shape=())) if isinstance(action.shape, int): action.shape = (action.shape, ) if config.exploration is not None and name in config.exploration: self.exploration[name] = Exploration.from_config( config=config.exploration[name]) self.states_config = config.states self.actions_config = config.actions if model is None: self.model = self.__class__.model(config) else: if not isinstance(model, self.__class__.model): raise TensorForceError( "Supplied model class `{}` does not match expected agent model class `{}`" .format( type(model).__name__, self.__class__.model.__name__)) self.model = model not_accessed = config.not_accessed() if not_accessed: self.logger.warning("Configuration values not accessed: {}".format( ', '.join(not_accessed))) self.episode = -1 self.timestep = 0 self.reset()