예제 #1
0
    def __init__(self, path, beta, prior_token):
        self.info = 'prior'
        self.params = parse_model_config("{}params.conf".format(path))
        self.batch_size = int(self.params['batch_size'])
        self.lr = float(self.params['lr'])
        self.epsilon = float(self.params['epsilon_init'])
        self.epsilon_incre = float(self.params['epsilon_incre'])
        self.epsilon_target = float(self.params['epsilon_target'])
        self.gamma = float(self.params['gamma'])
        self.target_replace_iter = int(self.params['target_replace_iter'])
        self.memory_capacity = int(self.params['memory_capacity'])
        self.n_actions = int(self.params['n_actions'])
        self.n_states = int(self.params['n_states'])
        self.prior_batch_size = int(self.params['prior_batch_size'])

        self.eval_net = Net(self.n_states, self.n_actions).to('cuda')
        self.target_net = Net(self.n_states, self.n_actions).to('cuda')

        self.learn_step_counter = 0  # for target updating
        self.memory_counter = 0  # for storing memory
        self.memory = np.zeros(
            (self.memory_capacity, self.n_states * 2 + 2))  # initialize memory
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(),
                                          lr=self.lr)
        self.loss_dqn_func = nn.MSELoss()
        self.loss_prior_func = nn.MSELoss()

        self.prior_beta = beta
        self.prior = BoostPrior("{}{}/".format(path, prior_token),
                                self.prior_batch_size)
예제 #2
0
 def __init__(self, config_path, img_size=416):
     super(Darknet, self).__init__()
     self.module_defs = parse_model_config(config_path)
     self.hyperparams, self.module_list = create_modules(self.module_defs)
     self.yolo_layers = [layer[0] for layer in self.module_list if hasattr(layer[0], "metrics")]
     self.img_size = img_size
     self.seen = 0
     self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
예제 #3
0
    def __init__(self, config_path, img_size=416):
        super(Darknet, self).__init__()
        self.module_defs = parse_model_config(config_path)
        self.hyperparams, self.module_list = create_modules(self.module_defs)
        self.img_size = img_size

        self.mse_loss = nn.MSELoss()
        self.bce_loss = nn.BCELoss()
        # self.no_object_loss_scale = 0.1
        # trade-off params on loss calculation.
        self.obj_scale = 1  # trade off when calculate loss_conf
        self.noobj_scale = 0.5  # trade off when calculate loss_conf
        self.conf_scale = 1  # trade off when calculate loss_conf
        self.cls_scale = 1  # trade off when calculate loss_conf
예제 #4
0
def train(env, prior=False, save_ind=None, beta=None, prior_token=None):
    if prior:
        assert beta is not None, "Please provide beta!"
        assert prior_token is not None, "Please provide prior_token!"
        dqn = BoostDQN("./data/{}/".format(env.map_info), beta, prior_token)
    else:
        dqn = DQN("./data/{}/".format(env.map_info))

    # init_pos = [[4, 0], [0, 4], [9, 9]]  # map3
    init_pos = [[7, 0], [0, 6], [15, 13]]  # map4
    if beta is not None:
        writer = SummaryWriter(
            exist_or_create_folder("./logs/{}/{}_{}".format(
                env.map_info, dqn.info, beta)))
    else:
        writer = SummaryWriter(
            exist_or_create_folder("./logs/{}/{}".format(
                env.map_info, dqn.info)))

    ep_rs = []
    ep_r_steps = []
    step_rs = []
    steps_counter = 0

    params = parse_model_config("./data/{}/params.conf".format(env.map_info))
    max_ep = int(float(params["max_ep"]))
    max_steps = int(float(params["max_steps"]))

    writer.add_graph(dqn.eval_net, torch.rand(256).unsqueeze(0).to('cuda'))

    for i_episode in range(max_ep):
        if steps_counter > max_steps:
            break
        # Pick a start point randomly.
        s = env.reset([init_pos[np.random.randint(0, 3)]])
        ep_r = 0
        while True:
            env.render(0.001)
            a = dqn.choose_action(s)

            # take action
            s_, r, done, info = env.step(a)
            dqn.store_transition(s, a, r, s_)
            ep_r += r
            s = s_

            steps_counter += 1
            writer.add_scalar("reward/step_r", r, steps_counter)
            step_rs.append(r)

            if dqn.memory_counter > (dqn.batch_size + 1):
                if dqn.info == 'dqn':
                    loss = dqn.learn()
                    writer.add_scalar("loss/loss", loss.item(), steps_counter)
                if dqn.info == 'prior':
                    loss, loss_dqn, loss_prior = dqn.learn()
                    writer.add_scalar("loss/loss", loss.item(), steps_counter)
                    writer.add_scalar("loss/loss_dqn", loss_dqn.item(),
                                      steps_counter)
                    writer.add_scalar("loss/loss_prior", loss_prior.item(),
                                      steps_counter)

            if done:
                dqn.update_epsilon()
                env.render(0.001)
                print("Ep: {} | Ep_r: {} | steps_counter: {}".format(
                    i_episode, ep_r, steps_counter))
                writer.add_scalar("reward/ep_r", ep_r, steps_counter)
                writer.add_scalar("params/epsilon", dqn.epsilon, steps_counter)
                ep_rs.append(ep_r)
                ep_r_steps.append(steps_counter)
                break

    if save_ind is not None:
        if beta is None:
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}/ep_rs_{}.npy".format(
                    env.map_info, dqn.info, save_ind)), np.array(ep_rs))
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}/ep_rs_step_{}.npy".format(
                    env.map_info, dqn.info, save_ind)), np.array(ep_r_steps))
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}/step_rs_{}.npy".format(
                    env.map_info, dqn.info, save_ind)), np.array(step_rs))
        else:
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}_{}/ep_rs_{}.npy".format(
                    env.map_info, dqn.info, beta, save_ind)), np.array(ep_rs))
            np.savetxt(
                exist_or_create_folder(
                    "./logs/{}/{}_{}/ep_rs_step_{}.npy".format(
                        env.map_info, dqn.info, beta, save_ind)),
                np.array(ep_r_steps))
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}_{}/step_rs_{}.npy".format(
                    env.map_info, dqn.info, beta, save_ind)),
                np.array(step_rs))
    else:
        if beta is None:
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}/ep_rs.npy".format(
                    env.map_info, dqn.info)), np.array(ep_rs))
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}/ep_rs_step.npy".format(
                    env.map_info, dqn.info)), np.array(ep_r_steps))
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}/step_rs.npy".format(
                    env.map_info, dqn.info)), np.array(step_rs))
        else:
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}_{}/ep_rs.npy".format(
                    env.map_info, dqn.info, beta)), np.array(ep_rs))
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}_{}/ep_rs_step.npy".format(
                    env.map_info, dqn.info, beta)), np.array(ep_r_steps))
            np.savetxt(
                exist_or_create_folder("./logs/{}/{}_{}/step_rs.npy".format(
                    env.map_info, dqn.info, beta)), np.array(step_rs))