Exemplo n.º 1
0
    def __init__(self, state_size, action_size, seed, mode="train"):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
            mode (str): if eval, the agent will not learn and collect experiences
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = PrioritizedBuffer(action_size, BUFFER_SIZE, BATCH_SIZE,
                                        seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
        # Caches the expected action value of the last act
        self.last_action_value = None

        self.set_mode(mode)
    def __init__(self, state_size, action_size, seed):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)
        self.q_optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Policy Network
        self.policy_network_local = PolicyNetwork(state_size, action_size,
                                                  seed).to(device)
        self.policy_network_target = PolicyNetwork(state_size, action_size,
                                                   seed).to(device)
        self.policy_optimizer = optim.Adam(
            self.policy_network_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

        # Action selection
        self.noise_scale = START_NOISE_SCALE
Exemplo n.º 3
0
    def __init__(self, state_size, action_size, seed
                 , local_filename = None, target_filename = None):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device)
        
        ## if filename is given load them
        if local_filename is not None:
            self.qnetwork_local.load_state_dict(torch.load(local_filename))
        if target_filename is not None:
            self.qnetwork_target.load_state_dict(torch.load(target_filename))
            
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Exemplo n.º 4
0
    def __init__(self, state_size, action_size, seed, learning_rate):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(),
                                    lr=learning_rate)

        # Replay memory
        self.buffer_size = int(1e5)
        self.batch_size = 64

        self.memory = ReplayBuffer(action_size, self.buffer_size,
                                   self.batch_size, seed)

        self.t_step = 0
    def __init__(self, state_size, action_size, gamma,
                 hidden_layers, drop_p,
                 batch_size, learning_rate, soft_upd_param, update_every, buffer_size, seed):
        super(AgentDoubleDQ, self).__init__(
            state_size, action_size, gamma,
            hidden_layers, drop_p,
            batch_size, learning_rate, soft_upd_param, update_every, buffer_size, seed)

        # Q-Network Architecture: Dueling Q-Nets
        self.qnetwork_local = QNetwork(
            self.state_size, self.action_size, self.seed, self.hidden_layers, self.drop_p).to(device)
        self.qnetwork_target = QNetwork(
            self.state_size, self.action_size, self.seed, self.hidden_layers, self.drop_p).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=self.learning_rate)
        # Experience Replay
        self.memory = ReplayBuffer(action_size, buffer_size, batch_size, seed)
    def __init__(self,
                 state_size,
                 action_size,
                 seed,
                 buffer_size=BUFFER_SIZE,
                 batch_size=BATCH_SIZE,
                 gamma=GAMMA,
                 tau=TAU,
                 lr=LR,
                 update_every=4):
        ''' Initialize the agent 
        
        Params
        ======
            state_size (int) : dimension of state space
            action_size (int): dimension of action space
            seed (int)       : number of seed
            buffer_size (int): maximum size of buffer
            batch_size (int): size of each training batch
            gamma (float): discount factor
            tau (float) : for soft update of target parameters
            lr (float) : the learning rate
            update_every (int) : how often to update the network
       
        '''

        # Initialize important parameter
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)
        self.update_every = update_every

        # initialize Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size,
                                       seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size,
                                        seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Initialize replay memory buffer
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
    def __init__(self, state_size, action_size, seed, td_target_type="DQN"):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
        assert td_target_type in {"DQN", "Double DQN"}
        self.td_target_type = td_target_type

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
    def __init__(self, network_type, state_size, action_size, double_dqn=True):
        """Initialize an Agent object.

        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
        """
        self.state_size = state_size
        self.action_size = action_size
        self.double_dqn = double_dqn
        # Q-Network
        if network_type == 'fc': # Fully connected
            self.qnetwork_local = QNetwork(state_size, action_size).to(device)
        else: # Convolutional
            self.qnetwork_local = ConvQNetwork(state_size, action_size).to(device) # state_size == in_channels
        self.qnetwork_target = copy.deepcopy(self.qnetwork_local)

        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(network_type, action_size, BUFFER_SIZE, BATCH_SIZE)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0