def __init__(self, state_dim, action_dim, max_action): self.actor = Actor(state_dim, action_dim, max_action).to(device) self.actor_target = Actor(state_dim, action_dim, max_action).to(device) self.actor_target.load_state_dict(self.actor.state_dict()) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=1e-4) self.critic = Critic(state_dim, action_dim).to(device) self.critic_target = Critic(state_dim, action_dim).to(device) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), weight_decay=1e-2) self.es = sepCEM(self.actor.get_size(), mu_init=self.actor.get_params(), sigma_init=1e-3, damp=1e-3, damp_limit=1e-5, pop_size=10, antithetic=True, parents=4, elitism=False)
def __init__(self, config): self.config = config self.task = config.task_fn() self.worker_network = config.network_fn() self.target_network = config.network_fn() self.target_network.load_state_dict(self.worker_network.state_dict()) self.actor_opt = config.actor_optimizer_fn( self.worker_network.actor.parameters()) self.critic_opt = config.critic_optimizer_fn( self.worker_network.critic.parameters()) self.replay = config.replay_fn() self.random_process = config.random_process_fn() self.criterion = nn.MSELoss() self.total_steps = 0 self.sigma_init = 1e-3 self.damp = 1e-3 self.damp_limit = 1e-5 self.pop_size = 10 self.elitism = 'elitism' self.n_grad = 5 self.start_steps = 1000 #10000 self.n_episodes = 1 self.n_noisy = 0 self.state_normalizer = Normalizer( self.task.state_dim) # null_normaliser # self.reward_normalizer = Normalizer(1) self.es = sepCEM(self.worker_network.actor.get_size(), mu_init=self.worker_network.actor.get_params(), sigma_init=self.sigma_init, damp=self.damp, damp_limit=self.damp_limit, pop_size=self.pop_size, antithetic=not self.pop_size % 2, parents=self.pop_size // 2, elitism=self.elitism)
theta=args.ou_theta, sigma=args.ou_sigma) if USE_CUDA: critic.cuda() critic_t.cuda() actor.cuda() actor_t.cuda() print("OK 4") # CEM es = sepCEM(actor.get_size(), mu_init=actor.get_params(), sigma_init=args.sigma_init, damp=args.damp, damp_limit=args.damp_limit, pop_size=args.pop_size, antithetic=not args.pop_size % 2, parents=args.pop_size // 2, elitism=args.elitism) # es = Control(actor.get_size(), pop_size=args.pop_size, mu_init=actor.get_params()) # training step_cpt = 0 total_steps = 0 actor_steps = 0 df = pd.DataFrame(columns=[ "total_steps", "average_score", "average_score_rl", "average_score_ea", "best_score" ]) print("OK 5")