def __init__(self, path, beta, prior_token): self.info = 'prior' self.params = parse_model_config("{}params.conf".format(path)) self.batch_size = int(self.params['batch_size']) self.lr = float(self.params['lr']) self.epsilon = float(self.params['epsilon_init']) self.epsilon_incre = float(self.params['epsilon_incre']) self.epsilon_target = float(self.params['epsilon_target']) self.gamma = float(self.params['gamma']) self.target_replace_iter = int(self.params['target_replace_iter']) self.memory_capacity = int(self.params['memory_capacity']) self.n_actions = int(self.params['n_actions']) self.n_states = int(self.params['n_states']) self.prior_batch_size = int(self.params['prior_batch_size']) self.eval_net = Net(self.n_states, self.n_actions).to('cuda') self.target_net = Net(self.n_states, self.n_actions).to('cuda') self.learn_step_counter = 0 # for target updating self.memory_counter = 0 # for storing memory self.memory = np.zeros( (self.memory_capacity, self.n_states * 2 + 2)) # initialize memory self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=self.lr) self.loss_dqn_func = nn.MSELoss() self.loss_prior_func = nn.MSELoss() self.prior_beta = beta self.prior = BoostPrior("{}{}/".format(path, prior_token), self.prior_batch_size)
def __init__(self, config_path, img_size=416): super(Darknet, self).__init__() self.module_defs = parse_model_config(config_path) self.hyperparams, self.module_list = create_modules(self.module_defs) self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")] self.img_size = img_size self.seen = 0 self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
def __init__(self, config_path, img_size=416): super(Darknet, self).__init__() self.module_defs = parse_model_config(config_path) self.hyperparams, self.module_list = create_modules(self.module_defs) self.img_size = img_size self.mse_loss = nn.MSELoss() self.bce_loss = nn.BCELoss() # self.no_object_loss_scale = 0.1 # trade-off params on loss calculation. self.obj_scale = 1 # trade off when calculate loss_conf self.noobj_scale = 0.5 # trade off when calculate loss_conf self.conf_scale = 1 # trade off when calculate loss_conf self.cls_scale = 1 # trade off when calculate loss_conf
def train(env, prior=False, save_ind=None, beta=None, prior_token=None): if prior: assert beta is not None, "Please provide beta!" assert prior_token is not None, "Please provide prior_token!" dqn = BoostDQN("./data/{}/".format(env.map_info), beta, prior_token) else: dqn = DQN("./data/{}/".format(env.map_info)) # init_pos = [[4, 0], [0, 4], [9, 9]] # map3 init_pos = [[7, 0], [0, 6], [15, 13]] # map4 if beta is not None: writer = SummaryWriter( exist_or_create_folder("./logs/{}/{}_{}".format( env.map_info, dqn.info, beta))) else: writer = SummaryWriter( exist_or_create_folder("./logs/{}/{}".format( env.map_info, dqn.info))) ep_rs = [] ep_r_steps = [] step_rs = [] steps_counter = 0 params = parse_model_config("./data/{}/params.conf".format(env.map_info)) max_ep = int(float(params["max_ep"])) max_steps = int(float(params["max_steps"])) writer.add_graph(dqn.eval_net, torch.rand(256).unsqueeze(0).to('cuda')) for i_episode in range(max_ep): if steps_counter > max_steps: break # Pick a start point randomly. s = env.reset([init_pos[np.random.randint(0, 3)]]) ep_r = 0 while True: env.render(0.001) a = dqn.choose_action(s) # take action s_, r, done, info = env.step(a) dqn.store_transition(s, a, r, s_) ep_r += r s = s_ steps_counter += 1 writer.add_scalar("reward/step_r", r, steps_counter) step_rs.append(r) if dqn.memory_counter > (dqn.batch_size + 1): if dqn.info == 'dqn': loss = dqn.learn() writer.add_scalar("loss/loss", loss.item(), steps_counter) if dqn.info == 'prior': loss, loss_dqn, loss_prior = dqn.learn() writer.add_scalar("loss/loss", loss.item(), steps_counter) writer.add_scalar("loss/loss_dqn", loss_dqn.item(), steps_counter) writer.add_scalar("loss/loss_prior", loss_prior.item(), steps_counter) if done: dqn.update_epsilon() env.render(0.001) print("Ep: {} | Ep_r: {} | steps_counter: {}".format( i_episode, ep_r, steps_counter)) writer.add_scalar("reward/ep_r", ep_r, steps_counter) writer.add_scalar("params/epsilon", dqn.epsilon, steps_counter) ep_rs.append(ep_r) ep_r_steps.append(steps_counter) break if save_ind is not None: if beta is None: np.savetxt( exist_or_create_folder("./logs/{}/{}/ep_rs_{}.npy".format( env.map_info, dqn.info, save_ind)), np.array(ep_rs)) np.savetxt( exist_or_create_folder("./logs/{}/{}/ep_rs_step_{}.npy".format( env.map_info, dqn.info, save_ind)), np.array(ep_r_steps)) np.savetxt( exist_or_create_folder("./logs/{}/{}/step_rs_{}.npy".format( env.map_info, dqn.info, save_ind)), np.array(step_rs)) else: np.savetxt( exist_or_create_folder("./logs/{}/{}_{}/ep_rs_{}.npy".format( env.map_info, dqn.info, beta, save_ind)), np.array(ep_rs)) np.savetxt( exist_or_create_folder( "./logs/{}/{}_{}/ep_rs_step_{}.npy".format( env.map_info, dqn.info, beta, save_ind)), np.array(ep_r_steps)) np.savetxt( exist_or_create_folder("./logs/{}/{}_{}/step_rs_{}.npy".format( env.map_info, dqn.info, beta, save_ind)), np.array(step_rs)) else: if beta is None: np.savetxt( exist_or_create_folder("./logs/{}/{}/ep_rs.npy".format( env.map_info, dqn.info)), np.array(ep_rs)) np.savetxt( exist_or_create_folder("./logs/{}/{}/ep_rs_step.npy".format( env.map_info, dqn.info)), np.array(ep_r_steps)) np.savetxt( exist_or_create_folder("./logs/{}/{}/step_rs.npy".format( env.map_info, dqn.info)), np.array(step_rs)) else: np.savetxt( exist_or_create_folder("./logs/{}/{}_{}/ep_rs.npy".format( env.map_info, dqn.info, beta)), np.array(ep_rs)) np.savetxt( exist_or_create_folder("./logs/{}/{}_{}/ep_rs_step.npy".format( env.map_info, dqn.info, beta)), np.array(ep_r_steps)) np.savetxt( exist_or_create_folder("./logs/{}/{}_{}/step_rs.npy".format( env.map_info, dqn.info, beta)), np.array(step_rs))