Exemplo n.º 1
0
 def __init__(self,history_length=1):
     # TODO: Define network, loss function, optimizer
     # self.net = CNN(...)
     self.net = CNN(history_length).to(device)
     self.optimizer = torch.optim.Adam(self.net.parameters(), lr=1e-4)
     # self.optimizer = torch.optim.SGD(self.net.parameters(),lr=conf.lr,momentum = 0.9)
     self.loss_func = torch.nn.CrossEntropyLoss().to(device)
Exemplo n.º 2
0
 def __init__(self, lr=1e-4, history_length=1):
     # TODO: Define network, loss function, optimizer
     # self.net = CNN(...)
     self.net = CNN(history_length=history_length, n_classes=5).cuda()
     self.history_length = history_length
     self.criterion = nn.CrossEntropyLoss().cuda()
     self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
Exemplo n.º 3
0
class BCAgent:

    def __init__(self,history_length=1):
        # TODO: Define network, loss function, optimizer
        # self.net = CNN(...)
        self.net = CNN(history_length).to(device)
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=1e-4)
        # self.optimizer = torch.optim.SGD(self.net.parameters(),lr=conf.lr,momentum = 0.9)
        self.loss_func = torch.nn.CrossEntropyLoss().to(device)

    def update(self, X_batch, y_batch):
        # TODO: transform input to tensors
        # TODO: forward + backward + optimize
        X_batch = torch.tensor(X_batch).permute(0, 3, 1, 2).to(device)
        y_batch = torch.LongTensor(y_batch).view(-1).to(device)
        y_pred = self.predict(X_batch).to(device)
        self.optimizer.zero_grad()
        loss = self.loss_func(y_pred, y_batch).to(device)
        loss.backward()
        self.optimizer.step()

        return loss

    def predict(self, X):
        # TODO: forward pass
        outputs = self.net(X)
        # outputs = torch.FloatTensor(outputs)
        return outputs

    def load(self, file_name):
        self.net.load_state_dict(torch.load(file_name))


    def save(self, file_name):
        torch.save(self.net.state_dict(), file_name)
Exemplo n.º 4
0
 def __init__(self, history_length=1):
     # TODO: Define network, loss function, optimizer
     # self.net = CNN(...)
     self.learning_rate = 1e-4
     self.net = CNN(history_length = history_length).cuda()
     self.loss = torch.nn.CrossEntropyLoss()
     self.optimizer = torch.optim.Adam(self.net.parameters(), lr= self.learning_rate)
Exemplo n.º 5
0
    def __init__(self, device='cpu', history_length=1, lr=1e-4, n_classes=5):
        # TODO: Define network, loss function, optimizer
        self.device = torch.device(device)

        self.net = CNN(history_length=history_length, n_classes=n_classes)
        self.net.to(self.device)

        self.lossfn = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
Exemplo n.º 6
0
 def __init__(self, history_size, n_actions=5, lr=0.0004):
     # TODO: Define network, loss function, optimizer
     # self.net = CNN(...)
     self.history_size = history_size
     self.num_actions = n_actions
     self.net = CNN(self.history_size, n_actions).cuda()
     self.lr = lr
     self.criterion = torch.nn.CrossEntropyLoss()
     self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
Exemplo n.º 7
0
 def __init__(self, history_length, learning_rate, weights_classes):
     weights_classes = None if weights_classes is None else weights_classes.to(
         DEVICE)
     self.net = CNN(history_length=history_length, n_classes=4)
     # self.net = Resnet18(history_length=history_length, n_classes=4)
     self.criterion = nn.CrossEntropyLoss(weight=weights_classes)
     self.optimizer = torch.optim.Adam(params=self.net.parameters(),
                                       lr=learning_rate)
     # self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, threshold=0.00001)
     self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
         self.optimizer, T_0=1, T_mult=3)
Exemplo n.º 8
0
    def __init__(self, network_type, lr, hidden_layers):
        # TODO: Define network, loss function, optimizer
        # self.net = FCN(...) or CNN(...)
        if network_type == "FCN":
            self.net = FCN(hidden_layers).to(device)
        else:
            self.net = CNN().to(device)

        self.loss_fcn = nn.CrossEntropyLoss()

        self.optimizer = optim.Adam(self.net.parameters(), lr)
Exemplo n.º 9
0
class BCAgent:
    def __init__(self, lr=1e-4, history_length=1):
        # TODO: Define network, loss function, optimizer
        # self.net = CNN(...)
        self.net = CNN(history_length=history_length, n_classes=5).cuda()
        self.history_length = history_length
        self.criterion = nn.CrossEntropyLoss().cuda()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)

    def update(self, X_batch, y_batch):
        # TODO: transform input to tensors
        X_batch = torch.Tensor(X_batch).cuda()
        X_batch = X_batch.view((-1, self.history_length + 1, 96, 96))
        y_batch = torch.LongTensor(y_batch).cuda()
        # TODO: forward + backward + optimize
        #forward
        preds = self.predict(X_batch)

        #backward
        loss = self.criterion(preds, y_batch)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        #optimize
        self.optimizer.step()
        return loss, preds

    def predict(self, X):
        # TODO: forward pass
        outputs = self.net(X)
        return outputs

    def save(self, file_name):
        torch.save(self.net.state_dict(), file_name)

    def load(self, file_name):
        self.net.load_state_dict(torch.load(file_name))
Exemplo n.º 10
0
class BCAgent:
    
    def __init__(self, device='cpu', history_length=1, lr=1e-4, n_classes=5):
        # TODO: Define network, loss function, optimizer
        self.device = torch.device(device)

        self.net = CNN(history_length=history_length, n_classes=n_classes)
        self.net.to(self.device)

        self.lossfn = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)

    def update(self, X_batch, y_batch):
        # TODO: transform input to tensors
        X_batch = X_batch.float().to(self.device)
        y_batch = y_batch.long().to(self.device)

        self.net = self.net.train()

        # TODO: forward + backward + optimize
        pred = self.net(X_batch)
        loss = self.lossfn(pred, y_batch)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss

    def predict(self, X, prob=False):

        self.net = self.net.eval()
        
        # TODO: forward pass
        X = X.float().to(self.device)
        outputs = self.net(X)
        if prob:
            output = torch.nn.functional.softmax(outputs, dim=1)
        else:
            output = torch.argmax(outputs, dim=1)
        return output.cpu().detach().numpy()

    def save(self, file_name):
        torch.save(self.net.state_dict(), file_name)

    def load(self, file_name):
        self.net.load_state_dict(torch.load(file_name))
Exemplo n.º 11
0
class BCAgent:
    def __init__(self, history_size, n_actions=5, lr=0.0004):
        # TODO: Define network, loss function, optimizer
        # self.net = CNN(...)
        self.history_size = history_size
        self.num_actions = n_actions
        self.net = CNN(self.history_size, n_actions).cuda()
        self.lr = lr
        self.criterion = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)

    def update(self, X_batch, y_batch):
        # TODO: transform input to tensors
        # TODO: forward + backward + optimize
        X_batch = torch.FloatTensor(X_batch).permute(0, 3, 1, 2).cuda()
        y_batch = torch.LongTensor(y_batch).cuda()
        # print(X_batch.shape, y_batch.shape)
        y_predicted = self.net(X_batch)
        self.net.train()
        self.optimizer.zero_grad()
        loss = self.criterion(y_predicted, y_batch)
        loss.backward()
        self.optimizer.step()
        return loss.item()

    def predict(self, X):
        # TODO: forward pass
        X = torch.FloatTensor(X).permute(0, 3, 1, 2).cuda()
        self.net.eval()
        with torch.no_grad():
            outputs = self.net(X)
        return outputs

    def save(self, file_name):
        torch.save(self.net.state_dict(), file_name)

    def load(self, file_name):
        self.net.load_state_dict(torch.load(file_name))
Exemplo n.º 12
0
class BCAgentCNN(BaseBCAgent):
    def __init__(self, history_length, learning_rate, weights_classes):
        weights_classes = None if weights_classes is None else weights_classes.to(
            DEVICE)
        self.net = CNN(history_length=history_length, n_classes=4)
        # self.net = Resnet18(history_length=history_length, n_classes=4)
        self.criterion = nn.CrossEntropyLoss(weight=weights_classes)
        self.optimizer = torch.optim.Adam(params=self.net.parameters(),
                                          lr=learning_rate)
        # self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, threshold=0.00001)
        self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.optimizer, T_0=1, T_mult=3)

    def update(self, X_batch, y_batch):
        self.optimizer.zero_grad()
        y_batch_pred = self.net(_x(X_batch))
        loss = self.criterion(y_batch_pred, _y(y_batch))
        loss.backward()
        self.optimizer.step()
        return loss

    def predict(self, X):
        outputs = self.net(_x(X)).detach()
        return outputs
Exemplo n.º 13
0
class BCAgent:
    def __init__(self, network_type, lr, hidden_layers):
        # TODO: Define network, loss function, optimizer
        # self.net = FCN(...) or CNN(...)
        if network_type == "FCN":
            self.net = FCN(hidden_layers).to(device)
        else:
            self.net = CNN().to(device)

        self.loss_fcn = nn.CrossEntropyLoss()

        self.optimizer = optim.Adam(self.net.parameters(), lr)

    def update(self, X_batch, y_batch):
        # TODO: transform input to tensors
        # TODO: forward + backward + optimize
        X_batch = torch.tensor(X_batch).to(device)
        y_batch = torch.FloatTensor(y_batch).to(device)
        self.net.zero_grad()
        output = self.net(X_batch)
        y_batch = y_batch.view(y_batch.size(0))
        loss = self.loss_fcn(output, y_batch.long())
        loss.backward()
        self.optimizer.step()
        return loss

    def predict(self, X):
        # TODO: forward pass
        X = X.to(device)
        outputs = self.net(X)
        return outputs

    def save(self, file_name):
        torch.save(self.net.state_dict(), file_name)

    def load(self, file_name):
        self.net.load_state_dict(torch.load(file_name, map_location=device))
Exemplo n.º 14
0
class BCAgent:
    
    def __init__(self, history_length=1):
        # TODO: Define network, loss function, optimizer
        # self.net = CNN(...)
        self.learning_rate = 1e-4
        self.net = CNN(history_length = history_length).cuda()
        self.loss = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr= self.learning_rate)

    def update(self, X_batch, y_batch):
        # TODO: transform input to tensors
        # TODO: forward + backward + optimize

        X_batch = torch.FloatTensor(X_batch) # it may not work if numpy float64
        X_batch = X_batch.permute(0,3,1,2).cuda()
        # or
        # X_batch = torch.Tensor(list(X_batch.values), requires_grad=True)
        y_batch = torch.FloatTensor(y_batch).cuda()
        # or
        # y_batch = torch.Tensor(list(y_batch.values), requires_grad=True)
        outputs = self.net(X_batch)

        self.net.train()
        self.optimizer.zero_grad()
        loss = self.loss (outputs, y_batch.squeeze(1).long())
        loss.backward()
        #Gradient clipping:
        clip = 1
        torch.nn.utils.clip_grad_norm_(self.net.parameters(),clip)
        self.optimizer.step()

        return loss

    def predict(self, X):
        # TODO: forward pass
        self.net.eval()
        with torch.no_grad():
            X = torch.FloatTensor(X)
            X = X.permute(0,3,1,2).cuda()
            # or
            # X = torch.Tensor(list(X.values), requires_grad=True)
            outputs = self.net(X)
        self.net.train()
        return outputs

    def save(self, file_name):
        torch.save(self.net.state_dict(), file_name)

    def load(self, file_name):
        self.net.load_state_dict(torch.load(file_name))
Exemplo n.º 15
0
                        default=500,
                        required=False)
    parser.add_argument("-r",
                        "--render",
                        action='store_true',
                        help="render during training and evaluation",
                        default=False,
                        required=False)
    args = parser.parse_args()
    print(args)

    env = gym.make('CarRacing-v0').unwrapped

    # TODO: Define Q network, target network and DQN agent
    # ...
    Q_network = CNN(history_length=5, n_classes=5)
    Q_target = CNN(history_length=5, n_classes=5)
    agent = DQNAgent(Q=Q_network,
                     Q_target=Q_target,
                     num_actions=5,
                     buffer_size=1e5,
                     lr=1e-4)

    train_online(env,
                 agent,
                 num_episodes=args.episodes,
                 history_length=5,
                 model_dir="./models_carracing",
                 eval_cycle=20,
                 num_eval_episodes=5,
                 skip_frames=5,
Exemplo n.º 16
0
            agent.save(os.path.join(model_dir, f"dqn_agent_1.pt"))

        print(f"episode: {i+1}, total reward: {episode_reward}")

        max_timesteps = min(max_timesteps + 20, 1500)
    tensorboard.close_session()
    return training, validation


if __name__ == "__main__":
    num_eval_episodes = 5
    eval_cycle = 20
    num_actions = 5

    env = gym.make('CarRacing-v0').unwrapped
    Q = CNN(n_classes=5)
    Q_target = CNN(n_classes=5)
    agent = DQNAgentCar(Q,
                        Q_target,
                        num_actions,
                        gamma=0.9,
                        batch_size=20,
                        epsilon=0.9,
                        tau=0.01,
                        lr=0.001,
                        history_length=0)

    training, validation = train_online(env,
                                        agent,
                                        num_episodes=420,
                                        history_length=0,
    return rgb2gray(state).reshape(96, 96) / 255.0


if __name__ == "__main__":

    num_eval_episodes = 5
    eval_cycle = 20
    hist = 3
    num_actions = 5
    env = gym.make('CarRacing-v0').unwrapped

    # TODO: Define Q network, target network and DQN agent
    # ...
    hist = 3
    num_actions = 5
    Q_target = CNN(hist + 1, num_actions)
    Q = CNN(hist + 1, num_actions)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    agent = DQNAgent(Q,
                     Q_target,
                     num_actions,
                     double=False,
                     history_length=1e6)
    # agent = DQNAgent(Q, Q_target, num_actions, double=False, epsilon = 0.99, eps_decay = True, history_length=1e6)
    # 3. train DQN agent with train_online(...)
    train_online(env,
                 agent,
                 num_episodes=1000,
                 history_length=hist,
                 model_dir="./models_carracing")