def create_agent(self): """Instantiate the RL agent who interacts with the environment. Returns: RL agent """ change_print_color.change('CYAN') print("\nCreating Agent...") policy_params = { 'network_model': tf_network, # tf_network, multi_modal_network, multi_modal_network_fp 'network_params': { 'n_layers': 2, # Number of Hidden layers 'dim_hidden': [32, 32], # List of size per n_layers 'obs_names': self.env.get_obs_info()['names'], 'obs_dof': self.env.get_obs_info()['dimensions'], # DoF for observation data tensor }, # Initialization. 'init_var': 0.1, # Initial policy variance. 'ent_reg': 0.0, # Entropy regularizer (Used to update policy variance) # Solver hyperparameters. 'iterations': self.task_params['tf_iterations'], # Number of iterations per inner iteration (Default:5000). 'batch_size': 15, 'lr': 0.001, # Base learning rate (by default it's fixed). 'lr_policy': 'fixed', # Learning rate policy. 'momentum': 0.9, # Momentum. 'weight_decay': 0.005, # Weight decay to prevent overfitting. 'solver_type': 'Adam', # Solver type (e.g. 'SGD', 'Adam', 'RMSPROP', 'MOMENTUM', 'ADAGRAD'). # GPU usage. 'use_gpu': self.task_params['use_gpu'], # Whether or not to use the GPU for training. 'gpu_id': 0, 'gpu_mem_percentage': self.task_params['gpu_mem_percentage'], # Training data. 'fc_only_iterations': 0, # Iterations of only FC before normal training # Others. 'random_seed': self.hyperparams['seed'] \ if self.task_params['tf_seed'] == -1 \ else self.task_params['tf_seed'], # TF random seed 'log_dir': self.hyperparams['log_dir'], # 'weights_file_prefix': EXP_DIR + 'policy', } policy_opt = {'type': PolicyOptTf, 'hyperparams': policy_params} agent = GPSAgent(act_dim=self.action_dim, obs_dim=self.obs_dim, state_dim=self.state_dim, policy_opt=policy_opt, agent_name='agent' + str('%02d' % self.hyperparams['run_num'])) print("Agent:%s OK\n" % type(agent).__name__) return agent
'lr_policy': 'fixed', # Learning rate policy. 'momentum': 0.9, # Momentum. 'weight_decay': 0.005, # Weight decay. 'solver_type': 'Adam', # Solver type (e.g. 'SGD', 'Adam', etc.). # set gpu usage. 'use_gpu': 1, # Whether or not to use the GPU for training. 'gpu_id': 0, 'random_seed': 1, 'fc_only_iterations': 0, # TODO: Only forwardcontrol? if it is CNN?? 'gpu_mem_percentage': 0.2, # 'weights_file_prefix': EXP_DIR + 'policy', } policy_opt = {'type': PolicyOptTf, 'hyperparams': policy_params} bigman_agent = GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy_opt=policy_opt) print("Bigman Agent:%s OK\n" % type(bigman_agent)) # ################# # # ################# # # ##### COSTS ##### # # ################# # # ################# # # Action Cost act_cost = { 'type': CostAction, 'wu': np.ones(action_dim) * 1e-4, 'target': None, # Target action value }
'solver_type': 'Adam', # Solver type (e.g. 'SGD', 'Adam', etc.). # set gpu usage. 'use_gpu': 1, # Whether or not to use the GPU for training. 'gpu_id': 0, 'random_seed': 1, 'fc_only_iterations': 0, # TODO: Only forwardcontrol? if it is CNN?? 'gpu_mem_percentage': 0.2, # 'weights_file_prefix': EXP_DIR + 'policy', } policy_opt = { 'type': PolicyOptTf, 'hyperparams': policy_params } manipulator2d_agent = GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy_opt=policy_opt, agent_name="bigman_agent") # ################# # # ################# # # ##### COSTS ##### # # ################# # # ################# # # Action Cost act_cost = { 'type': CostAction, 'wu': np.ones(action_dim) * 1e-4, 'target': None, # Target action value } # State Cost
# 'use_gpu': 1, # Whether or not to use the GPU for training. # 'gpu_id': 0, # 'random_seed': 1, # 'fc_only_iterations': 0, # TODO: Only forwardcontrol? if it is CNN?? # 'gpu_mem_percentage': 0.2, # # 'weights_file_prefix': EXP_DIR + 'policy', # }, ] bigman_agents = list() for pp, pol_param in enumerate(policy_params): policy_opt = { 'type': PolicyOptTf, 'hyperparams': pol_param } bigman_agents.append(GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy_opt=policy_opt, agent_name="bigman_agent"+str(pp))) print("Bigman Agent:%s OK\n" % type(bigman_agents[-1])) print("TOTAL BIGMAN AGENTS: %d" % len(bigman_agents)) # ################# # # ################# # # ##### COSTS ##### # # ################# # # ################# # # Action Cost act_cost = { 'type': CostAction, 'wu': np.ones(action_dim) * 1e-4, 'target': None, # Target action value }
'obs_names': bigman_env.get_obs_info()['names'], 'obs_dof': bigman_env.get_obs_info()['dimensions'], # DoF for observation data tensor 'batch_size': 15, # TODO: Check if this value is OK (same than name_samples) #'num_filters': [5, 10], #'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, RGB_IMAGE], # Deprecated from original GPS code #'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES], # Deprecated from original GPS code #'obs_image_data': [RGB_IMAGE], # Deprecated from original GPS code #'sensor_dims': SENSOR_DIMS, # Deprecated from original GPS code #'image_width': IMAGE_WIDTH (80), # For multi_modal_network #'image_height': IMAGE_HEIGHT (64), # For multi_modal_network #'image_channels': IMAGE_CHANNELS (3), # For multi_modal_network } } policy = PolicyOptTf(policy_params, observation_dim, action_dim) #policy = None bigman_agent = GPSAgent(act_dim=action_dim, obs_dim=observation_dim, state_dim=state_dim, policy=policy) # Load previous learned variables #bigman_agent.load(file_save_restore) print("Bigman Agent:%s OK\n" % type(bigman_agent)) # ################# # # ################# # # ##### COSTS ##### # # ################# # # ################# # # Action Cost #TODO: I think it doesn't have sense if the control is joint position act_cost = { 'type': CostAction, 'wu': np.ones(action_dim) * 1e-4,