def __init__(self,history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.net = CNN(history_length).to(device) self.optimizer = torch.optim.Adam(self.net.parameters(), lr=1e-4) # self.optimizer = torch.optim.SGD(self.net.parameters(),lr=conf.lr,momentum = 0.9) self.loss_func = torch.nn.CrossEntropyLoss().to(device)
def __init__(self, lr=1e-4, history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.net = CNN(history_length=history_length, n_classes=5).cuda() self.history_length = history_length self.criterion = nn.CrossEntropyLoss().cuda() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
def __init__(self, history_length=1): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.learning_rate = 1e-4 self.net = CNN(history_length = history_length).cuda() self.loss = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr= self.learning_rate)
def __init__(self, history_size, n_actions=5, lr=0.0004): # TODO: Define network, loss function, optimizer # self.net = CNN(...) self.history_size = history_size self.num_actions = n_actions self.net = CNN(self.history_size, n_actions).cuda() self.lr = lr self.criterion = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
def __init__(self, device='cpu', history_length=1, lr=1e-4, n_classes=5): # TODO: Define network, loss function, optimizer self.device = torch.device(device) self.net = CNN(history_length=history_length, n_classes=n_classes) self.net.to(self.device) self.lossfn = torch.nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
def __init__(self, history_length, learning_rate, weights_classes): weights_classes = None if weights_classes is None else weights_classes.to( DEVICE) self.net = CNN(history_length=history_length, n_classes=4) # self.net = Resnet18(history_length=history_length, n_classes=4) self.criterion = nn.CrossEntropyLoss(weight=weights_classes) self.optimizer = torch.optim.Adam(params=self.net.parameters(), lr=learning_rate) # self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, threshold=0.00001) self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( self.optimizer, T_0=1, T_mult=3)
def __init__(self, network_type, lr, hidden_layers): # TODO: Define network, loss function, optimizer # self.net = FCN(...) or CNN(...) if network_type == "FCN": self.net = FCN(hidden_layers).to(device) else: self.net = CNN().to(device) self.loss_fcn = nn.CrossEntropyLoss() self.optimizer = optim.Adam(self.net.parameters(), lr)
agent.save(os.path.join(model_dir, f"dqn_agent_1.pt")) print(f"episode: {i+1}, total reward: {episode_reward}") max_timesteps = min(max_timesteps + 20, 1500) tensorboard.close_session() return training, validation if __name__ == "__main__": num_eval_episodes = 5 eval_cycle = 20 num_actions = 5 env = gym.make('CarRacing-v0').unwrapped Q = CNN(n_classes=5) Q_target = CNN(n_classes=5) agent = DQNAgentCar(Q, Q_target, num_actions, gamma=0.9, batch_size=20, epsilon=0.9, tau=0.01, lr=0.001, history_length=0) training, validation = train_online(env, agent, num_episodes=420, history_length=0,
default=500, required=False) parser.add_argument("-r", "--render", action='store_true', help="render during training and evaluation", default=False, required=False) args = parser.parse_args() print(args) env = gym.make('CarRacing-v0').unwrapped # TODO: Define Q network, target network and DQN agent # ... Q_network = CNN(history_length=5, n_classes=5) Q_target = CNN(history_length=5, n_classes=5) agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=5, buffer_size=1e5, lr=1e-4) train_online(env, agent, num_episodes=args.episodes, history_length=5, model_dir="./models_carracing", eval_cycle=20, num_eval_episodes=5, skip_frames=5,
return rgb2gray(state).reshape(96, 96) / 255.0 if __name__ == "__main__": num_eval_episodes = 5 eval_cycle = 20 hist = 3 num_actions = 5 env = gym.make('CarRacing-v0').unwrapped # TODO: Define Q network, target network and DQN agent # ... hist = 3 num_actions = 5 Q_target = CNN(hist + 1, num_actions) Q = CNN(hist + 1, num_actions) # 2. init DQNAgent (see dqn/dqn_agent.py) agent = DQNAgent(Q, Q_target, num_actions, double=False, history_length=1e6) # agent = DQNAgent(Q, Q_target, num_actions, double=False, epsilon = 0.99, eps_decay = True, history_length=1e6) # 3. train DQN agent with train_online(...) train_online(env, agent, num_episodes=1000, history_length=hist, model_dir="./models_carracing")