def __init__(self,
                 env_name,
                 env,
                 actor_lr=3e-4,
                 critic_lr=3e-3,
                 gamma=0.99,
                 batch_size=32,
                 replay_memory_size=1e6,
                 is_test=False,
                 save_model_frequency=200,
                 eval_frequency=10):
        self.env_name = env_name
        self.env = env
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.gamma = gamma
        self.batch_size = batch_size
        self.replay_memory_size = replay_memory_size
        self.save_model_frequency = save_model_frequency
        self.eval_frequency = eval_frequency

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print('Train on device:', self.device)
        if not is_test:
            self.writer = SummaryWriter('./logs/DDPG_{}'.format(self.env_name))
        self.loss_fn = F.mse_loss
        self.memory = Memory(int(replay_memory_size), batch_size)

        n_state, n_action = env.observation_space.shape[0], env.action_space.shape[0]
        self.noise = OUNoise(n_action)

        self.actor = DeterministicActor(n_state, n_action,
                                        action_scale=int(env.action_space.high[0])).to(self.device)
        self.target_actor = DeterministicActor(n_state, n_action,
                                               action_scale=int(env.action_space.high[0])).to(self.device)
        update_model(self.target_actor, self.actor)
        self.actor_opt = optim.Adam(self.actor.parameters(), lr=self.actor_lr)

        self.critic = Critic(n_state + n_action).to(self.device)
        self.target_critic = Critic(n_state + n_action).to(self.device)
        update_model(self.target_critic, self.critic)
        self.critic_opt = optim.Adam(self.critic.parameters(), lr=self.critic_lr)

        print(self.actor)
        print(self.critic)
Exemplo n.º 2
0
    def __init__(self,
                 env_name,
                 env,
                 critic_lr=3e-4,
                 train_iters=20,
                 backtrack_coeff=1,
                 backtrack_damp_coeff=0.5,
                 backtrack_alpha=0.5,
                 delta=0.01,
                 sample_size=2048,
                 gamma=0.99,
                 lam=0.97,
                 is_test=False,
                 save_model_frequency=200,
                 eval_frequency=20):
        self.env_name = env_name
        self.env = env
        self.critic_lr = critic_lr
        self.train_iters = train_iters
        self.backtrack_coeff = backtrack_coeff
        self.backtrack_damp_coeff = backtrack_damp_coeff
        self.backtrack_alpha = backtrack_alpha
        self.sample_size = sample_size
        self.delta = delta
        self.gamma = gamma
        self.lam = lam
        self.save_model_frequency = save_model_frequency
        self.eval_frequency = eval_frequency

        self.total_step = 0
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        # self.device = torch.device('cpu')
        print('Train on device:', self.device)
        if not is_test:
            self.writer = SummaryWriter('./logs/TD3_{}'.format(self.env_name))
        self.loss_fn = F.mse_loss

        n_state, n_action = env.observation_space.shape[
            0], env.action_space.shape[0]
        self.old_policy = GaussianActor(
            n_state, n_action, 128,
            action_scale=int(env.action_space.high[0])).to(self.device)
        self.new_policy = GaussianActor(
            n_state, n_action, 128,
            action_scale=int(env.action_space.high[0])).to(self.device)
        update_model(self.old_policy, self.new_policy)
        self.critic = Critic(n_state, 128).to(self.device)
        self.critic_opt = optim.Adam(self.critic.parameters(),
                                     lr=self.critic_lr)
        self.trace = Trace()

        print(self.new_policy)
        print(self.critic)
Exemplo n.º 3
0
    def __init__(self,
                 env_name,
                 env,
                 actor_lr=3e-4,
                 critic_lr=3e-4,
                 sample_size=2048,
                 gamma=0.99,
                 lam=0.95,
                 is_test=False,
                 save_model_frequency=200,
                 eval_frequency=10):
        self.env_name = env_name
        self.env = env
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.sample_size = sample_size
        self.gamma = gamma
        self.lam = lam
        self.save_model_frequency = save_model_frequency
        self.eval_frequency = eval_frequency

        self.total_step = 0
        self.state_normalize = ZFilter(env.observation_space.shape[0])
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        print('Train on device:', self.device)
        if not is_test:
            self.writer = SummaryWriter('./logs_epoch_update/A2C_{}'.format(
                self.env_name))
        self.loss_fn = F.smooth_l1_loss

        self.trace = Trace()
        self.actor = GaussianActor(
            env.observation_space.shape[0],
            env.action_space.shape[0],
            action_scale=int(env.action_space.high[0])).to(self.device)
        self.actor_opt = optim.Adam(self.actor.parameters(), lr=self.actor_lr)
        self.critic = Critic(env.observation_space.shape[0]).to(self.device)
        self.critic_opt = optim.Adam(self.critic.parameters(),
                                     lr=self.critic_lr)
        print(self.actor)
        print(self.critic)
Exemplo n.º 4
0
    def __init__(self,
                 env_name,
                 env,
                 actor_lr=3e-4,
                 critic_lr=3e-3,
                 gamma=0.99,
                 is_continue_action_space=False,
                 reward_shaping_func=lambda x: x[1],
                 is_test=False,
                 save_model_frequency=200,
                 eval_frequency=10):
        self.env_name = env_name
        self.env = env
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.gamma = gamma
        self.reward_shaping_func = reward_shaping_func
        self.save_model_frequency = save_model_frequency
        self.eval_frequency = eval_frequency

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        print('Train on device:', self.device)
        if not is_test:
            self.writer = SummaryWriter('./logs_step_update/A2C_{}'.format(
                self.env_name))
        self.loss_fn = F.mse_loss

        if is_continue_action_space:
            self.actor = GaussianActor(
                env.observation_space.shape[0],
                env.action_space.shape[0],
                action_scale=int(env.action_space.high[0])).to(self.device)
        else:
            self.actor = Actor(env.observation_space.shape[0],
                               env.action_space.n).to(self.device)
        self.actor_opt = optim.Adam(self.actor.parameters(), lr=self.actor_lr)
        self.critic = Critic(env.observation_space.shape[0]).to(self.device)
        self.critic_opt = optim.Adam(self.critic.parameters(),
                                     lr=self.critic_lr)
        print(self.actor)
        print(self.critic)
    def __init__(self,
                 env_name,
                 env,
                 actor_lr=3e-4,
                 critic_lr=3e-4,
                 sample_size=2048,
                 batch_size=64,
                 sample_reuse=1,
                 train_iters=5,
                 clip=0.2,
                 gamma=0.99,
                 lam=0.95,
                 is_test=False,
                 save_model_frequency=200,
                 eval_frequency=5,
                 save_log_frequency=1):
        self.env_name = env_name
        self.env = env
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.sample_size = sample_size
        self.batch_size = batch_size
        self.sample_reuse = sample_reuse
        self.train_iters = train_iters
        self.clip = clip
        self.gamma = gamma
        self.lam = lam
        self.save_model_frequency = save_model_frequency
        self.eval_frequency = eval_frequency
        self.save_log_frequency = save_log_frequency

        self.total_step = 0
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        print('Train on device:', self.device)
        if not is_test:
            self.writer = SummaryWriter('./logs/PPO_{}'.format(self.env_name))
        self.loss_fn = F.mse_loss

        n_state, n_action = env.observation_space.shape[
            0], env.action_space.shape[0]
        self.state_normalize = ZFilter(n_state)
        self.actor = GaussianActor(n_state,
                                   n_action,
                                   128,
                                   action_scale=int(env.action_space.high[0]),
                                   weights_init_=orthogonal_weights_init_).to(
                                       self.device)
        self.critic = Critic(n_state, 128,
                             orthogonal_weights_init_).to(self.device)

        # self.optimizer = optim.Adam([
        #     {'params': self.critic.parameters(), 'lr': self.critic_lr},
        #     {'params': self.actor.parameters(), 'lr': self.actor_lr}
        # ])
        self.actor_opt = optim.Adam(self.actor.parameters(), lr=self.actor_lr)
        self.critic_opt = optim.Adam(self.critic.parameters(),
                                     lr=self.critic_lr)
        self.trace = Trace()

        print(self.actor)
        print(self.critic)