class BCAgent: def __init__(self,history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.net = CNN(history_length).to(device) self.optimizer = torch.optim.Adam(self.net.parameters(), lr=1e-4) # self.optimizer = torch.optim.SGD(self.net.parameters(),lr=conf.lr,momentum = 0.9) self.loss_func = torch.nn.CrossEntropyLoss().to(device) def update(self, X_batch, y_batch): # TODO: transform input to tensors # TODO: forward + backward + optimize X_batch = torch.tensor(X_batch).permute(0, 3, 1, 2).to(device) y_batch = torch.LongTensor(y_batch).view(-1).to(device) y_pred = self.predict(X_batch).to(device) self.optimizer.zero_grad() loss = self.loss_func(y_pred, y_batch).to(device) loss.backward() self.optimizer.step() return loss def predict(self, X): # TODO: forward pass outputs = self.net(X) # outputs = torch.FloatTensor(outputs) return outputs def load(self, file_name): self.net.load_state_dict(torch.load(file_name)) def save(self, file_name): torch.save(self.net.state_dict(), file_name)
class BCAgent: def __init__(self, history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.learning_rate = 1e-4 self.net = CNN(history_length = history_length).cuda() self.loss = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr= self.learning_rate) def update(self, X_batch, y_batch): # TODO: transform input to tensors # TODO: forward + backward + optimize X_batch = torch.FloatTensor(X_batch) # it may not work if numpy float64 X_batch = X_batch.permute(0,3,1,2).cuda() # or # X_batch = torch.Tensor(list(X_batch.values), requires_grad=True) y_batch = torch.FloatTensor(y_batch).cuda() # or # y_batch = torch.Tensor(list(y_batch.values), requires_grad=True) outputs = self.net(X_batch) self.net.train() self.optimizer.zero_grad() loss = self.loss (outputs, y_batch.squeeze(1).long()) loss.backward() #Gradient clipping: clip = 1 torch.nn.utils.clip_grad_norm_(self.net.parameters(),clip) self.optimizer.step() return loss def predict(self, X): # TODO: forward pass self.net.eval() with torch.no_grad(): X = torch.FloatTensor(X) X = X.permute(0,3,1,2).cuda() # or # X = torch.Tensor(list(X.values), requires_grad=True) outputs = self.net(X) self.net.train() return outputs def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name))
class BCAgent: def __init__(self, device='cpu', history_length=1, lr=1e-4, n_classes=5): # TODO: Define network, loss function, optimizer self.device = torch.device(device) self.net = CNN(history_length=history_length, n_classes=n_classes) self.net.to(self.device) self.lossfn = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr) def update(self, X_batch, y_batch): # TODO: transform input to tensors X_batch = X_batch.float().to(self.device) y_batch = y_batch.long().to(self.device) self.net = self.net.train() # TODO: forward + backward + optimize pred = self.net(X_batch) loss = self.lossfn(pred, y_batch) self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss def predict(self, X, prob=False): self.net = self.net.eval() # TODO: forward pass X = X.float().to(self.device) outputs = self.net(X) if prob: output = torch.nn.functional.softmax(outputs, dim=1) else: output = torch.argmax(outputs, dim=1) return output.cpu().detach().numpy() def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name))
class BCAgent: def __init__(self, history_size, n_actions=5, lr=0.0004): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.history_size = history_size self.num_actions = n_actions self.net = CNN(self.history_size, n_actions).cuda() self.lr = lr self.criterion = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr) def update(self, X_batch, y_batch): # TODO: transform input to tensors # TODO: forward + backward + optimize X_batch = torch.FloatTensor(X_batch).permute(0, 3, 1, 2).cuda() y_batch = torch.LongTensor(y_batch).cuda() # print(X_batch.shape, y_batch.shape) y_predicted = self.net(X_batch) self.net.train() self.optimizer.zero_grad() loss = self.criterion(y_predicted, y_batch) loss.backward() self.optimizer.step() return loss.item() def predict(self, X): # TODO: forward pass X = torch.FloatTensor(X).permute(0, 3, 1, 2).cuda() self.net.eval() with torch.no_grad(): outputs = self.net(X) return outputs def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name))
class BCAgent: def __init__(self, lr=1e-4, history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.net = CNN(history_length=history_length, n_classes=5).cuda() self.history_length = history_length self.criterion = nn.CrossEntropyLoss().cuda() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr) def update(self, X_batch, y_batch): # TODO: transform input to tensors X_batch = torch.Tensor(X_batch).cuda() X_batch = X_batch.view((-1, self.history_length + 1, 96, 96)) y_batch = torch.LongTensor(y_batch).cuda() # TODO: forward + backward + optimize #forward preds = self.predict(X_batch) #backward loss = self.criterion(preds, y_batch) self.optimizer.zero_grad() loss.backward() self.optimizer.step() #optimize self.optimizer.step() return loss, preds def predict(self, X): # TODO: forward pass outputs = self.net(X) return outputs def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name))
class BCAgent: def __init__(self, network_type, lr, hidden_layers): # TODO: Define network, loss function, optimizer # self.net = FCN(...) or CNN(...) if network_type == "FCN": self.net = FCN(hidden_layers).to(device) else: self.net = CNN().to(device) self.loss_fcn = nn.CrossEntropyLoss() self.optimizer = optim.Adam(self.net.parameters(), lr) def update(self, X_batch, y_batch): # TODO: transform input to tensors # TODO: forward + backward + optimize X_batch = torch.tensor(X_batch).to(device) y_batch = torch.FloatTensor(y_batch).to(device) self.net.zero_grad() output = self.net(X_batch) y_batch = y_batch.view(y_batch.size(0)) loss = self.loss_fcn(output, y_batch.long()) loss.backward() self.optimizer.step() return loss def predict(self, X): # TODO: forward pass X = X.to(device) outputs = self.net(X) return outputs def save(self, file_name): torch.save(self.net.state_dict(), file_name) def load(self, file_name): self.net.load_state_dict(torch.load(file_name, map_location=device))