def __init__(self, name="", pubsub_params=None): self.name = name self.pubsub = None if pubsub_params: self.channel = "channel-srs-{}".format(self.name) from rl_coach.memories.backend.memory_impl import get_memory_backend self.pubsub = get_memory_backend(pubsub_params) subscribe_thread = SharedRunningStatsSubscribe(self) subscribe_thread.daemon = True subscribe_thread.start()
def __init__(self, params: KubernetesParameters): """ :param params: The Kubernetes parameters which are used for deploying the components in Coach. These parameters include namespace and kubeconfig. """ super().__init__(params) self.params = params if self.params.kubeconfig: k8sconfig.load_kube_config() else: k8sconfig.load_incluster_config() if not self.params.namespace: _, current_context = k8sconfig.list_kube_config_contexts() self.params.namespace = current_context['context']['namespace'] if os.environ.get('http_proxy'): k8sclient.Configuration._default.proxy = os.environ.get( 'http_proxy') self.params.memory_backend_parameters.orchestrator_params = { 'namespace': self.params.namespace } self.memory_backend = get_memory_backend( self.params.memory_backend_parameters) self.params.data_store_params.orchestrator_params = { 'namespace': self.params.namespace } self.params.data_store_params.namespace = self.params.namespace self.data_store = get_data_store(self.params.data_store_params) if self.params.data_store_params.store_type == "s3": self.s3_access_key = None self.s3_secret_key = None if self.params.data_store_params.creds_file: s3config = ConfigParser() s3config.read(self.params.data_store_params.creds_file) try: self.s3_access_key = s3config.get('default', 'aws_access_key_id') self.s3_secret_key = s3config.get('default', 'aws_secret_access_key') except Error as e: screen.print("Error when reading S3 credentials file: %s", e) else: self.s3_access_key = os.environ.get('ACCESS_KEY_ID') self.s3_secret_key = os.environ.get('SECRET_ACCESS_KEY')
def __init__(self, replicated_device=None, epsilon=1e-2, name="", create_ops=True, pubsub_params=None): self.sess = None self.name = name self.replicated_device = replicated_device self.epsilon = epsilon self.ops_were_created = False if create_ops: with tf.device(replicated_device): self.create_ops() self.pubsub = None if pubsub_params: self.channel = "channel-srs-{}".format(self.name) self.pubsub = get_memory_backend(pubsub_params) subscribe_thread = SharedRunningStatsSubscribe(self) subscribe_thread.daemon = True subscribe_thread.start()
def setup_memory_backend(self) -> None: if hasattr(self.agent_params.memory, 'memory_backend_params'): self.memory_backend = get_memory_backend( self.agent_params.memory.memory_backend_params)
def __init__(self, agent_parameters: AgentParameters, parent: Union['LevelManager', 'CompositeAgent']=None): """ :param agent_parameters: A AgentParameters class instance with all the agent parameters """ super().__init__() self.ap = agent_parameters self.task_id = self.ap.task_parameters.task_index self.is_chief = self.task_id == 0 self.shared_memory = type(agent_parameters.task_parameters) == DistributedTaskParameters \ and self.ap.memory.shared_memory if self.shared_memory: self.shared_memory_scratchpad = self.ap.task_parameters.shared_memory_scratchpad self.name = agent_parameters.name self.parent = parent self.parent_level_manager = None self.full_name_id = agent_parameters.full_name_id = self.name if type(agent_parameters.task_parameters) == DistributedTaskParameters: screen.log_title("Creating agent - name: {} task id: {} (may take up to 30 seconds due to " "tensorflow wake up time)".format(self.full_name_id, self.task_id)) else: screen.log_title("Creating agent - name: {}".format(self.full_name_id)) self.imitation = False self.agent_logger = Logger() self.agent_episode_logger = EpisodeLogger() # get the memory # - distributed training + shared memory: # * is chief? -> create the memory and add it to the scratchpad # * not chief? -> wait for the chief to create the memory and then fetch it # - non distributed training / not shared memory: # * create memory memory_name = self.ap.memory.path.split(':')[1] self.memory_lookup_name = self.full_name_id + '.' + memory_name if self.shared_memory and not self.is_chief: self.memory = self.shared_memory_scratchpad.get(self.memory_lookup_name) else: # modules self.memory = dynamic_import_and_instantiate_module_from_params(self.ap.memory) if hasattr(self.ap.memory, 'memory_backend_params'): self.memory_backend = get_memory_backend(self.ap.memory.memory_backend_params) if self.ap.memory.memory_backend_params.run_type != 'trainer': self.memory.set_memory_backend(self.memory_backend) if agent_parameters.memory.load_memory_from_file_path: screen.log_title("Loading replay buffer from pickle. Pickle path: {}" .format(agent_parameters.memory.load_memory_from_file_path)) self.memory.load(agent_parameters.memory.load_memory_from_file_path) if self.shared_memory and self.is_chief: self.shared_memory_scratchpad.add(self.memory_lookup_name, self.memory) # set devices if type(agent_parameters.task_parameters) == DistributedTaskParameters: self.has_global = True self.replicated_device = agent_parameters.task_parameters.device self.worker_device = "/job:worker/task:{}".format(self.task_id) else: self.has_global = False self.replicated_device = None self.worker_device = "" if agent_parameters.task_parameters.use_cpu: self.worker_device += "/cpu:0" else: self.worker_device += "/device:GPU:0" # filters self.input_filter = self.ap.input_filter self.output_filter = self.ap.output_filter self.pre_network_filter = self.ap.pre_network_filter device = self.replicated_device if self.replicated_device else self.worker_device if hasattr(self.ap.memory, 'memory_backend_params') and self.ap.algorithm.distributed_coach_synchronization_type: self.input_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params) self.output_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params) self.pre_network_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params) else: self.input_filter.set_device(device) self.output_filter.set_device(device) self.pre_network_filter.set_device(device) # initialize all internal variables self._phase = RunPhase.HEATUP self.total_shaped_reward_in_current_episode = 0 self.total_reward_in_current_episode = 0 self.total_steps_counter = 0 self.running_reward = None self.training_iteration = 0 self.last_target_network_update_step = 0 self.last_training_phase_step = 0 self.current_episode = self.ap.current_episode = 0 self.curr_state = {} self.current_hrl_goal = None self.current_episode_steps_counter = 0 self.episode_running_info = {} self.last_episode_evaluation_ran = 0 self.running_observations = [] self.agent_logger.set_current_time(self.current_episode) self.exploration_policy = None self.networks = {} self.last_action_info = None self.running_observation_stats = None self.running_reward_stats = None self.accumulated_rewards_across_evaluation_episodes = 0 self.accumulated_shaped_rewards_across_evaluation_episodes = 0 self.num_successes_across_evaluation_episodes = 0 self.num_evaluation_episodes_completed = 0 self.current_episode_buffer = Episode(discount=self.ap.algorithm.discount, n_step=self.ap.algorithm.n_step) # TODO: add agents observation rendering for debugging purposes (not the same as the environment rendering) # environment parameters self.spaces = None self.in_action_space = self.ap.algorithm.in_action_space # signals self.episode_signals = [] self.step_signals = [] self.loss = self.register_signal('Loss') self.curr_learning_rate = self.register_signal('Learning Rate') self.unclipped_grads = self.register_signal('Grads (unclipped)') self.reward = self.register_signal('Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True) self.shaped_reward = self.register_signal('Shaped Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True) self.discounted_return = self.register_signal('Discounted Return') if isinstance(self.in_action_space, GoalsSpace): self.distance_from_goal = self.register_signal('Distance From Goal', dump_one_value_per_step=True) # use seed if self.ap.task_parameters.seed is not None: random.seed(self.ap.task_parameters.seed) np.random.seed(self.ap.task_parameters.seed) else: # we need to seed the RNG since the different processes are initialized with the same parent seed random.seed() np.random.seed()