def __init__( self, agent_params: AgentParameters, env_params: EnvironmentParameters, schedule_params: ScheduleParameters, vis_params: VisualizationParameters = VisualizationParameters(), preset_validation_params: PresetValidationParameters = PresetValidationParameters(), name='simple_rl_graph'): super().__init__(name, schedule_params, vis_params) self.agent_params = agent_params self.env_params = env_params self.preset_validation_params = preset_validation_params self.agent_params.visualization = vis_params if self.agent_params.input_filter is None: if env_params is not None: self.agent_params.input_filter = env_params.default_input_filter( ) else: # In cases where there is no environment (e.g. batch-rl and imitation learning), there is nowhere to get # a default filter from. So using a default no-filter. # When there is no environment, the user is expected to define input/output filters (if required) using # the preset. self.agent_params.input_filter = NoInputFilter() if self.agent_params.output_filter is None: if env_params is not None: self.agent_params.output_filter = env_params.default_output_filter( ) else: self.agent_params.output_filter = NoOutputFilter()
def __init__( self, agents_params: List[AgentParameters], env_params: EnvironmentParameters, schedule_params: ScheduleParameters, vis_params: VisualizationParameters = VisualizationParameters(), preset_validation_params: PresetValidationParameters = PresetValidationParameters()): self.sess = {agent_params.name: None for agent_params in agents_params} self.level_managers = [] # type: List[MultiAgentLevelManager] self.top_level_manager = None self.environments = [] self.set_schedule_params(schedule_params) self.visualization_parameters = vis_params self.name = 'multi_agent_graph' self.task_parameters = None self._phase = self.phase = RunPhase.UNDEFINED self.preset_validation_params = preset_validation_params self.reset_required = False self.num_checkpoints_to_keep = 4 # TODO: make this a parameter # timers self.graph_creation_time = None self.last_checkpoint_saving_time = time.time() # counters self.total_steps_counters = { RunPhase.HEATUP: TotalStepsCounter(), RunPhase.TRAIN: TotalStepsCounter(), RunPhase.TEST: TotalStepsCounter() } self.checkpoint_id = 0 self.checkpoint_saver = { agent_params.name: None for agent_params in agents_params } self.checkpoint_state_updater = None self.graph_logger = Logger() self.data_store = None self.is_batch_rl = False self.time_metric = TimeTypes.EpisodeNumber self.env_params = env_params self.agents_params = agents_params self.agent_params = agents_params[0] # ...(find a better way)... for agent_index, agent_params in enumerate(agents_params): if len(agents_params) == 1: agent_params.name = "agent" else: agent_params.name = "agent_{}".format(agent_index) agent_params.visualization = copy.copy(vis_params) if agent_params.input_filter is None: agent_params.input_filter = copy.copy( env_params.default_input_filter()) if agent_params.output_filter is None: agent_params.output_filter = copy.copy( env_params.default_output_filter())
def __init__(self, agent_params: AgentParameters, env_params: EnvironmentParameters, schedule_params: ScheduleParameters, vis_params: VisualizationParameters=VisualizationParameters(), preset_validation_params: PresetValidationParameters = PresetValidationParameters()): super().__init__('simple_rl_graph', schedule_params, vis_params) self.agent_params = agent_params self.env_params = env_params self.preset_validation_params = preset_validation_params self.agent_params.visualization = vis_params if self.agent_params.input_filter is None: self.agent_params.input_filter = env_params.default_input_filter() if self.agent_params.output_filter is None: self.agent_params.output_filter = env_params.default_output_filter()