示例#1
0
 def __init__(self,history_length=1):
     # TODO: Define network, loss function, optimizer
     # self.net = CNN(...)
     self.net = CNN(history_length).to(device)
     self.optimizer = torch.optim.Adam(self.net.parameters(), lr=1e-4)
     # self.optimizer = torch.optim.SGD(self.net.parameters(),lr=conf.lr,momentum = 0.9)
     self.loss_func = torch.nn.CrossEntropyLoss().to(device)
示例#2
0
 def __init__(self, lr=1e-4, history_length=1):
     # TODO: Define network, loss function, optimizer
     # self.net = CNN(...)
     self.net = CNN(history_length=history_length, n_classes=5).cuda()
     self.history_length = history_length
     self.criterion = nn.CrossEntropyLoss().cuda()
     self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
示例#3
0
 def __init__(self, history_length=1):
     # TODO: Define network, loss function, optimizer
     # self.net = CNN(...)
     self.learning_rate = 1e-4
     self.net = CNN(history_length = history_length).cuda()
     self.loss = torch.nn.CrossEntropyLoss()
     self.optimizer = torch.optim.Adam(self.net.parameters(), lr= self.learning_rate)
 def __init__(self, history_size, n_actions=5, lr=0.0004):
     # TODO: Define network, loss function, optimizer
     # self.net = CNN(...)
     self.history_size = history_size
     self.num_actions = n_actions
     self.net = CNN(self.history_size, n_actions).cuda()
     self.lr = lr
     self.criterion = torch.nn.CrossEntropyLoss()
     self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
示例#5
0
    def __init__(self, device='cpu', history_length=1, lr=1e-4, n_classes=5):
        # TODO: Define network, loss function, optimizer
        self.device = torch.device(device)

        self.net = CNN(history_length=history_length, n_classes=n_classes)
        self.net.to(self.device)

        self.lossfn = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
示例#6
0
 def __init__(self, history_length, learning_rate, weights_classes):
     weights_classes = None if weights_classes is None else weights_classes.to(
         DEVICE)
     self.net = CNN(history_length=history_length, n_classes=4)
     # self.net = Resnet18(history_length=history_length, n_classes=4)
     self.criterion = nn.CrossEntropyLoss(weight=weights_classes)
     self.optimizer = torch.optim.Adam(params=self.net.parameters(),
                                       lr=learning_rate)
     # self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, threshold=0.00001)
     self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
         self.optimizer, T_0=1, T_mult=3)
示例#7
0
    def __init__(self, network_type, lr, hidden_layers):
        # TODO: Define network, loss function, optimizer
        # self.net = FCN(...) or CNN(...)
        if network_type == "FCN":
            self.net = FCN(hidden_layers).to(device)
        else:
            self.net = CNN().to(device)

        self.loss_fcn = nn.CrossEntropyLoss()

        self.optimizer = optim.Adam(self.net.parameters(), lr)
示例#8
0
            agent.save(os.path.join(model_dir, f"dqn_agent_1.pt"))

        print(f"episode: {i+1}, total reward: {episode_reward}")

        max_timesteps = min(max_timesteps + 20, 1500)
    tensorboard.close_session()
    return training, validation


if __name__ == "__main__":
    num_eval_episodes = 5
    eval_cycle = 20
    num_actions = 5

    env = gym.make('CarRacing-v0').unwrapped
    Q = CNN(n_classes=5)
    Q_target = CNN(n_classes=5)
    agent = DQNAgentCar(Q,
                        Q_target,
                        num_actions,
                        gamma=0.9,
                        batch_size=20,
                        epsilon=0.9,
                        tau=0.01,
                        lr=0.001,
                        history_length=0)

    training, validation = train_online(env,
                                        agent,
                                        num_episodes=420,
                                        history_length=0,
示例#9
0
                        default=500,
                        required=False)
    parser.add_argument("-r",
                        "--render",
                        action='store_true',
                        help="render during training and evaluation",
                        default=False,
                        required=False)
    args = parser.parse_args()
    print(args)

    env = gym.make('CarRacing-v0').unwrapped

    # TODO: Define Q network, target network and DQN agent
    # ...
    Q_network = CNN(history_length=5, n_classes=5)
    Q_target = CNN(history_length=5, n_classes=5)
    agent = DQNAgent(Q=Q_network,
                     Q_target=Q_target,
                     num_actions=5,
                     buffer_size=1e5,
                     lr=1e-4)

    train_online(env,
                 agent,
                 num_episodes=args.episodes,
                 history_length=5,
                 model_dir="./models_carracing",
                 eval_cycle=20,
                 num_eval_episodes=5,
                 skip_frames=5,
    return rgb2gray(state).reshape(96, 96) / 255.0


if __name__ == "__main__":

    num_eval_episodes = 5
    eval_cycle = 20
    hist = 3
    num_actions = 5
    env = gym.make('CarRacing-v0').unwrapped

    # TODO: Define Q network, target network and DQN agent
    # ...
    hist = 3
    num_actions = 5
    Q_target = CNN(hist + 1, num_actions)
    Q = CNN(hist + 1, num_actions)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    agent = DQNAgent(Q,
                     Q_target,
                     num_actions,
                     double=False,
                     history_length=1e6)
    # agent = DQNAgent(Q, Q_target, num_actions, double=False, epsilon = 0.99, eps_decay = True, history_length=1e6)
    # 3. train DQN agent with train_online(...)
    train_online(env,
                 agent,
                 num_episodes=1000,
                 history_length=hist,
                 model_dir="./models_carracing")