示例#1
0
    def __init__(self,
                 state_size,
                 action_size,
                 random_seed,
                 memory,
                 hyper_param=None):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """

        if hyper_param is None:
            hyper_param = HyperParam()
            hyper_param.actor_fc1 = 128
            hyper_param.actor_fc2 = 128
            hyper_param.critic_fc1 = 128
            hyper_param.critic_fc2 = 128
            hyper_param.lr_actor = LR_ACTOR
            hyper_param.lr_critic = LR_CRITIC
            hyper_param.tau = TAU

        self.hyper_param = hyper_param

        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed,
                                 hyper_param.actor_fc1,
                                 hyper_param.actor_fc2).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed,
                                  hyper_param.actor_fc1,
                                  hyper_param.actor_fc2).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=hyper_param.lr_actor)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Initialize and local to target to be the same
        # self.soft_update(self.critic_local, self.critic_target, tau=1.0)
        # self.soft_update(self.actor_local, self.actor_target, tau=1.0)

        # Noise process
        self.noise = OUNoise(action_size, random_seed)
        self.memory = memory  # ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
示例#2
0
    def __init__(self, state_size, action_size, random_seed):
        # Initialize an Agent object.
        # Params
        # ======
        #     state_size (int): dimension of each state
        #     action_size (int): dimension of each action
        #     random_seed (int): random seed
        #
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)

        self.actor_local = Actor(state_size, action_size, random_seed).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)

        self.critic_local = Critic(state_size, action_size, random_seed).to(device)
        self.critic_target = Critic(state_size, action_size, random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC)

        self.noise = OUNoise(action_size, random_seed)

        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
示例#3
0
    def __init__(self,
                 state_size,
                 action_size,
                 num_agents,
                 random_seed,
                 hyper_param=None):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.num_agents = num_agents

        if hyper_param is None:
            hyper_param = HyperParam()
            hyper_param.epsilon = True
            hyper_param.epsilon_decay = EXPLORE_EXPLOIT_DECAY
            hyper_param.epsilon_spaced_init = 100
            hyper_param.epsilon_spaced_decay = 1.5
            hyper_param.actor_fc1 = 128
            hyper_param.actor_fc2 = 128
            hyper_param.critic_fc1 = 128
            hyper_param.critic_fc2 = 128
            hyper_param.lr_actor = 1e-3
            hyper_param.lr_critic = 1e-3
            hyper_param.tau = 1e-4
            hyper_param.batch_size = 128
            hyper_param.n_learn_updates = 10
            hyper_param.n_time_steps = 20

        self.hyper_param = hyper_param

        self.device = device
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE,
                                   hyper_param.batch_size, random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed,
                                 hyper_param.actor_fc1,
                                 hyper_param.actor_fc2).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed,
                                  hyper_param.actor_fc1,
                                  hyper_param.actor_fc2).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=hyper_param.lr_actor)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size * num_agents,
                                   action_size * num_agents,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size * num_agents,
                                    action_size * num_agents,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=hyper_param.lr_critic,
                                           weight_decay=WEIGHT_DECAY)

        # self.epsilon = SpacedRepetitionDecay(ExponentialDecay(1.0, 0.0, hyper_param.epsilon_decay),
        #                                      hyper_param.epsilon_spaced_init, hyper_param.epsilon_spaced_decay)

        self.epsilon = PositiveMemoriesFactorExplorationDecay(
            0.5, 0, 0.0002, 0.12, self.memory)

        self.noise = OUNoise(action_size, random_seed)

        self.train_mode = True

        self.actor_loss = []
        self.critic_loss = []
示例#4
0
    def __init__(self,
                 state_size,
                 action_size,
                 num_agents,
                 random_seed,
                 hyper_param=None):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.num_agents = num_agents

        if hyper_param is None:
            hyper_param = HyperParam()
            hyper_param.epsilon = False
            hyper_param.actor_fc1 = 128
            hyper_param.actor_fc2 = 128
            hyper_param.critic_fc1 = 128
            hyper_param.critic_fc2 = 128
            hyper_param.lr_actor = 1e-3
            hyper_param.lr_critic = 1e-3
            hyper_param.eps_actor = 1e-7
            hyper_param.eps_critic = 1e-7
            hyper_param.tau = 1e-4
            hyper_param.buffer_size = int(1e6)
            hyper_param.batch_size = 128
            hyper_param.n_learn_updates = 10
            hyper_param.n_time_steps = 20
            hyper_param.gamma = 0.99

        self.hyper_param = hyper_param

        self.device = device
        self.memory = ReplayBuffer(action_size, self.hyper_param.buffer_size,
                                   self.hyper_param.batch_size, random_seed)

        # Actor Network (w/ Target Network)
        self.actor_local = Actor(state_size, action_size, random_seed,
                                 hyper_param.actor_fc1,
                                 hyper_param.actor_fc2).to(device)
        self.actor_target = Actor(state_size, action_size, random_seed,
                                  hyper_param.actor_fc1,
                                  hyper_param.actor_fc2).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=hyper_param.lr_actor,
                                          eps=hyper_param.eps_actor)

        # Critic Network (w/ Target Network)
        self.critic_local = Critic(state_size, action_size,
                                   random_seed).to(device)
        self.critic_target = Critic(state_size, action_size,
                                    random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=hyper_param.lr_critic,
                                           eps=hyper_param.eps_critic)

        self.hard_update(self.actor_target, self.actor_local)
        self.hard_update(self.critic_target, self.critic_local)

        self.noise = OUNoise(action_size, random_seed, mu=0.0)

        self.train_mode = True

        self.actor_loss = []
        self.critic_loss = []

        self.orig_actions = [[0.0, 0.0], [0.0, 0.0]]

        if hyper_param.epsilon:
            self.epsilon = hyper_param.epsilon_model(self.memory)