def __init__(self, state_dim, action_dim, max_action, memory, args): # misc self.criterion = nn.MSELoss() self.state_dim = state_dim self.action_dim = action_dim self.max_action = max_action self.memory = memory self.n = args.n_actor # actor self.actors = [Actor(state_dim, action_dim, max_action, layer_norm=args.layer_norm) for i in range(self.n)] self.actors_target = [Actor( state_dim, action_dim, max_action, layer_norm=args.layer_norm) for i in range(self.n)] self.actors_optimizer = [torch.optim.Adam( self.actors[i].parameters(), lr=args.actor_lr) for i in range(self.n)] for i in range(self.n): self.actors_target[i].load_state_dict(self.actors[i].state_dict()) # critic self.critic = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm) self.critic_target = CriticTD3( state_dim, action_dim, layer_norm=args.layer_norm) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_optimizer = torch.optim.Adam( self.critic.parameters(), lr=args.critic_lr) # cuda if torch.cuda.is_available(): for i in range(self.n): self.actors[i] = self.actors[i].cuda() self.actors_target[i] = self.actors_target[i].cuda() self.critic = self.critic.cuda() self.critic_target = self.critic_target.cuda() # shared memory for i in range(self.n): self.actors[i].share_memory() self.actors_target[i].share_memory() self.critic.share_memory() self.critic_target.share_memory() # hyper-parameters self.tau = args.tau self.discount = args.discount self.batch_size = args.batch_size self.policy_noise = args.policy_noise self.noise_clip = args.noise_clip self.policy_freq = args.policy_freq
def __init__(self, state_dim, action_dim, max_action, memory, args): # actor self.actor = Actor(state_dim, action_dim, max_action, layer_norm=args.layer_norm) self.actor_target = Actor(state_dim, action_dim, max_action, layer_norm=args.layer_norm) self.actor_target.load_state_dict(self.actor.state_dict()) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=args.actor_lr) # critic self.critic = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm) self.critic_target = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=args.critic_lr) # cuda if torch.cuda.is_available(): self.actor = self.actor.cuda() self.actor_target = self.actor_target.cuda() self.critic = self.critic.cuda() self.critic_target = self.critic_target.cuda() # misc self.criterion = nn.MSELoss() self.state_dim = state_dim self.action_dim = action_dim self.max_action = max_action self.memory = memory # hyper-parameters self.tau = args.tau self.discount = args.discount self.batch_size = args.batch_size self.policy_noise = args.policy_noise self.noise_clip = args.noise_clip self.policy_freq = args.policy_freq
#création d'un environnement Swimmer : print("Creating environment") env = gym.make('Swimmer-v2') #itialisation de l'environnement : env.reset() state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = int(env.action_space.high[0]) for filename in filenames: #chargement de l'acteur : print("Loading actor") actor = Actor(state_dim, action_dim, max_action, args) actor.load_model(actor_directory, "actor" + filename) critic = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm) critic.load_model(actor_directory, "critic" + filename) paramsA = np.array(actor.get_params()) print("min : " + str(np.min(paramsA))) print("max : " + str(np.max(paramsA))) picture = np.reshape( paramsA[1:-1], (249, 500)) #on reshape les paramètres (on perd 2 valeurs, tant pis) plt.imsave("actor" + filename, picture, vmin=-3, vmax=3, format='png') paramsC = np.array(critic.get_params()) print("min : " + str(np.min(paramsC))) print("max : " + str(np.max(paramsC))) picture2 = np.reshape( paramsC[1:-1],