def __init__(self, state_size, action_size, seed, mode="train"): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed mode (str): if eval, the agent will not learn and collect experiences """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = PrioritizedBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 # Caches the expected action value of the last act self.last_action_value = None self.set_mode(mode)
def __init__(self, state_size, action_size, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.q_optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Policy Network self.policy_network_local = PolicyNetwork(state_size, action_size, seed).to(device) self.policy_network_target = PolicyNetwork(state_size, action_size, seed).to(device) self.policy_optimizer = optim.Adam( self.policy_network_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 # Action selection self.noise_scale = START_NOISE_SCALE
def __init__(self, state_size, action_size, seed , local_filename = None, target_filename = None): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) ## if filename is given load them if local_filename is not None: self.qnetwork_local.load_state_dict(torch.load(local_filename)) if target_filename is not None: self.qnetwork_target.load_state_dict(torch.load(target_filename)) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, state_size, action_size, seed, learning_rate): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=learning_rate) # Replay memory self.buffer_size = int(1e5) self.batch_size = 64 self.memory = ReplayBuffer(action_size, self.buffer_size, self.batch_size, seed) self.t_step = 0
def __init__(self, state_size, action_size, gamma, hidden_layers, drop_p, batch_size, learning_rate, soft_upd_param, update_every, buffer_size, seed): super(AgentDoubleDQ, self).__init__( state_size, action_size, gamma, hidden_layers, drop_p, batch_size, learning_rate, soft_upd_param, update_every, buffer_size, seed) # Q-Network Architecture: Dueling Q-Nets self.qnetwork_local = QNetwork( self.state_size, self.action_size, self.seed, self.hidden_layers, self.drop_p).to(device) self.qnetwork_target = QNetwork( self.state_size, self.action_size, self.seed, self.hidden_layers, self.drop_p).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=self.learning_rate) # Experience Replay self.memory = ReplayBuffer(action_size, buffer_size, batch_size, seed)
def __init__(self, state_size, action_size, seed, buffer_size=BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, tau=TAU, lr=LR, update_every=4): ''' Initialize the agent Params ====== state_size (int) : dimension of state space action_size (int): dimension of action space seed (int) : number of seed buffer_size (int): maximum size of buffer batch_size (int): size of each training batch gamma (float): discount factor tau (float) : for soft update of target parameters lr (float) : the learning rate update_every (int) : how often to update the network ''' # Initialize important parameter self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.update_every = update_every # initialize Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Initialize replay memory buffer self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, state_size, action_size, seed, td_target_type="DQN"): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) assert td_target_type in {"DQN", "Double DQN"} self.td_target_type = td_target_type # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, network_type, state_size, action_size, double_dqn=True): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action """ self.state_size = state_size self.action_size = action_size self.double_dqn = double_dqn # Q-Network if network_type == 'fc': # Fully connected self.qnetwork_local = QNetwork(state_size, action_size).to(device) else: # Convolutional self.qnetwork_local = ConvQNetwork(state_size, action_size).to(device) # state_size == in_channels self.qnetwork_target = copy.deepcopy(self.qnetwork_local) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(network_type, action_size, BUFFER_SIZE, BATCH_SIZE) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0