def __init__(self,history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.net = CNN(history_length).to(device) self.optimizer = torch.optim.Adam(self.net.parameters(), lr=1e-4) # self.optimizer = torch.optim.SGD(self.net.parameters(),lr=conf.lr,momentum = 0.9) self.loss_func = torch.nn.CrossEntropyLoss().to(device)
def __init__(self, lr=1e-4, history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.net = CNN(history_length=history_length, n_classes=5).cuda() self.history_length = history_length self.criterion = nn.CrossEntropyLoss().cuda() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
class BCAgent: def __init__(self,history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.net = CNN(history_length).to(device) self.optimizer = torch.optim.Adam(self.net.parameters(), lr=1e-4) # self.optimizer = torch.optim.SGD(self.net.parameters(),lr=conf.lr,momentum = 0.9) self.loss_func = torch.nn.CrossEntropyLoss().to(device) def update(self, X_batch, y_batch): # TODO: transform input to tensors # TODO: forward + backward + optimize X_batch = torch.tensor(X_batch).permute(0, 3, 1, 2).to(device) y_batch = torch.LongTensor(y_batch).view(-1).to(device) y_pred = self.predict(X_batch).to(device) self.optimizer.zero_grad() loss = self.loss_func(y_pred, y_batch).to(device) loss.backward() self.optimizer.step() return loss def predict(self, X): # TODO: forward pass outputs = self.net(X) # outputs = torch.FloatTensor(outputs) return outputs def load(self, file_name): self.net.load_state_dict(torch.load(file_name)) def save(self, file_name): torch.save(self.net.state_dict(), file_name)
def __init__(self, history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.learning_rate = 1e-4 self.net = CNN(history_length = history_length).cuda() self.loss = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr= self.learning_rate)
def __init__(self, device='cpu', history_length=1, lr=1e-4, n_classes=5): # TODO: Define network, loss function, optimizer self.device = torch.device(device) self.net = CNN(history_length=history_length, n_classes=n_classes) self.net.to(self.device) self.lossfn = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
def __init__(self, history_size, n_actions=5, lr=0.0004): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.history_size = history_size self.num_actions = n_actions self.net = CNN(self.history_size, n_actions).cuda() self.lr = lr self.criterion = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
def __init__(self, history_length, learning_rate, weights_classes): weights_classes = None if weights_classes is None else weights_classes.to( DEVICE) self.net = CNN(history_length=history_length, n_classes=4) # self.net = Resnet18(history_length=history_length, n_classes=4) self.criterion = nn.CrossEntropyLoss(weight=weights_classes) self.optimizer = torch.optim.Adam(params=self.net.parameters(), lr=learning_rate) # self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, threshold=0.00001) self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( self.optimizer, T_0=1, T_mult=3)
def __init__(self, network_type, lr, hidden_layers): # TODO: Define network, loss function, optimizer # self.net = FCN(...) or CNN(...) if network_type == "FCN": self.net = FCN(hidden_layers).to(device) else: self.net = CNN().to(device) self.loss_fcn = nn.CrossEntropyLoss() self.optimizer = optim.Adam(self.net.parameters(), lr)
class BCAgent: def __init__(self, lr=1e-4, history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.net = CNN(history_length=history_length, n_classes=5).cuda() self.history_length = history_length self.criterion = nn.CrossEntropyLoss().cuda() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr) def update(self, X_batch, y_batch): # TODO: transform input to tensors X_batch = torch.Tensor(X_batch).cuda() X_batch = X_batch.view((-1, self.history_length + 1, 96, 96)) y_batch = torch.LongTensor(y_batch).cuda() # TODO: forward + backward + optimize #forward preds = self.predict(X_batch) #backward loss = self.criterion(preds, y_batch) self.optimizer.zero_grad() loss.backward() self.optimizer.step() #optimize self.optimizer.step() return loss, preds def predict(self, X): # TODO: forward pass outputs = self.net(X) return outputs def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name))
class BCAgent: def __init__(self, device='cpu', history_length=1, lr=1e-4, n_classes=5): # TODO: Define network, loss function, optimizer self.device = torch.device(device) self.net = CNN(history_length=history_length, n_classes=n_classes) self.net.to(self.device) self.lossfn = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr) def update(self, X_batch, y_batch): # TODO: transform input to tensors X_batch = X_batch.float().to(self.device) y_batch = y_batch.long().to(self.device) self.net = self.net.train() # TODO: forward + backward + optimize pred = self.net(X_batch) loss = self.lossfn(pred, y_batch) self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss def predict(self, X, prob=False): self.net = self.net.eval() # TODO: forward pass X = X.float().to(self.device) outputs = self.net(X) if prob: output = torch.nn.functional.softmax(outputs, dim=1) else: output = torch.argmax(outputs, dim=1) return output.cpu().detach().numpy() def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name))
class BCAgent: def __init__(self, history_size, n_actions=5, lr=0.0004): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.history_size = history_size self.num_actions = n_actions self.net = CNN(self.history_size, n_actions).cuda() self.lr = lr self.criterion = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr) def update(self, X_batch, y_batch): # TODO: transform input to tensors # TODO: forward + backward + optimize X_batch = torch.FloatTensor(X_batch).permute(0, 3, 1, 2).cuda() y_batch = torch.LongTensor(y_batch).cuda() # print(X_batch.shape, y_batch.shape) y_predicted = self.net(X_batch) self.net.train() self.optimizer.zero_grad() loss = self.criterion(y_predicted, y_batch) loss.backward() self.optimizer.step() return loss.item() def predict(self, X): # TODO: forward pass X = torch.FloatTensor(X).permute(0, 3, 1, 2).cuda() self.net.eval() with torch.no_grad(): outputs = self.net(X) return outputs def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name))
class BCAgentCNN(BaseBCAgent): def __init__(self, history_length, learning_rate, weights_classes): weights_classes = None if weights_classes is None else weights_classes.to( DEVICE) self.net = CNN(history_length=history_length, n_classes=4) # self.net = Resnet18(history_length=history_length, n_classes=4) self.criterion = nn.CrossEntropyLoss(weight=weights_classes) self.optimizer = torch.optim.Adam(params=self.net.parameters(), lr=learning_rate) # self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, threshold=0.00001) self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( self.optimizer, T_0=1, T_mult=3) def update(self, X_batch, y_batch): self.optimizer.zero_grad() y_batch_pred = self.net(_x(X_batch)) loss = self.criterion(y_batch_pred, _y(y_batch)) loss.backward() self.optimizer.step() return loss def predict(self, X): outputs = self.net(_x(X)).detach() return outputs
class BCAgent: def __init__(self, network_type, lr, hidden_layers): # TODO: Define network, loss function, optimizer # self.net = FCN(...) or CNN(...) if network_type == "FCN": self.net = FCN(hidden_layers).to(device) else: self.net = CNN().to(device) self.loss_fcn = nn.CrossEntropyLoss() self.optimizer = optim.Adam(self.net.parameters(), lr) def update(self, X_batch, y_batch): # TODO: transform input to tensors # TODO: forward + backward + optimize X_batch = torch.tensor(X_batch).to(device) y_batch = torch.FloatTensor(y_batch).to(device) self.net.zero_grad() output = self.net(X_batch) y_batch = y_batch.view(y_batch.size(0)) loss = self.loss_fcn(output, y_batch.long()) loss.backward() self.optimizer.step() return loss def predict(self, X): # TODO: forward pass X = X.to(device) outputs = self.net(X) return outputs def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name, map_location=device))
class BCAgent: def __init__(self, history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.learning_rate = 1e-4 self.net = CNN(history_length = history_length).cuda() self.loss = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr= self.learning_rate) def update(self, X_batch, y_batch): # TODO: transform input to tensors # TODO: forward + backward + optimize X_batch = torch.FloatTensor(X_batch) # it may not work if numpy float64 X_batch = X_batch.permute(0,3,1,2).cuda() # or # X_batch = torch.Tensor(list(X_batch.values), requires_grad=True) y_batch = torch.FloatTensor(y_batch).cuda() # or # y_batch = torch.Tensor(list(y_batch.values), requires_grad=True) outputs = self.net(X_batch) self.net.train() self.optimizer.zero_grad() loss = self.loss (outputs, y_batch.squeeze(1).long()) loss.backward() #Gradient clipping: clip = 1 torch.nn.utils.clip_grad_norm_(self.net.parameters(),clip) self.optimizer.step() return loss def predict(self, X): # TODO: forward pass self.net.eval() with torch.no_grad(): X = torch.FloatTensor(X) X = X.permute(0,3,1,2).cuda() # or # X = torch.Tensor(list(X.values), requires_grad=True) outputs = self.net(X) self.net.train() return outputs def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name))
default=500, required=False) parser.add_argument("-r", "--render", action='store_true', help="render during training and evaluation", default=False, required=False) args = parser.parse_args() print(args) env = gym.make('CarRacing-v0').unwrapped # TODO: Define Q network, target network and DQN agent # ... Q_network = CNN(history_length=5, n_classes=5) Q_target = CNN(history_length=5, n_classes=5) agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=5, buffer_size=1e5, lr=1e-4) train_online(env, agent, num_episodes=args.episodes, history_length=5, model_dir="./models_carracing", eval_cycle=20, num_eval_episodes=5, skip_frames=5,
agent.save(os.path.join(model_dir, f"dqn_agent_1.pt")) print(f"episode: {i+1}, total reward: {episode_reward}") max_timesteps = min(max_timesteps + 20, 1500) tensorboard.close_session() return training, validation if __name__ == "__main__": num_eval_episodes = 5 eval_cycle = 20 num_actions = 5 env = gym.make('CarRacing-v0').unwrapped Q = CNN(n_classes=5) Q_target = CNN(n_classes=5) agent = DQNAgentCar(Q, Q_target, num_actions, gamma=0.9, batch_size=20, epsilon=0.9, tau=0.01, lr=0.001, history_length=0) training, validation = train_online(env, agent, num_episodes=420, history_length=0,
return rgb2gray(state).reshape(96, 96) / 255.0 if __name__ == "__main__": num_eval_episodes = 5 eval_cycle = 20 hist = 3 num_actions = 5 env = gym.make('CarRacing-v0').unwrapped # TODO: Define Q network, target network and DQN agent # ... hist = 3 num_actions = 5 Q_target = CNN(hist + 1, num_actions) Q = CNN(hist + 1, num_actions) # 2. init DQNAgent (see dqn/dqn_agent.py) agent = DQNAgent(Q, Q_target, num_actions, double=False, history_length=1e6) # agent = DQNAgent(Q, Q_target, num_actions, double=False, epsilon = 0.99, eps_decay = True, history_length=1e6) # 3. train DQN agent with train_online(...) train_online(env, agent, num_episodes=1000, history_length=hist, model_dir="./models_carracing")