示例#1
0
def main(config):

    reward_rule = VRPReward(**config['reward_params'])
    log("Using Network Device {}".format(network_device))

    M = make_environ(reward_rule=reward_rule,
                     device=network_device,
                     verbose=False,
                     **config['env_params'])
    # M.print_events()
    config['use_neptune'] = config['use_neptune'] and config['mode'] == 'train'

    log_name = str(datetime.now()).replace(" ", "-").replace(":", "-").replace(".", "-")
    config['agent_params']['log_path'] = os.path.join(config['agent_params']['save_dir'], log_name)
    os.makedirs(config['agent_params']['log_path'], exist_ok=True)
    
    if config['use_neptune']:
        import neptune
        nep_args = config['neptune_params']
        token = os.environ['NEPTUNE_API_TOKEN']
        proj_name = nep_args['proj_name']
        neptune.init(project_qualified_name=proj_name, api_token=token)
        neptune.create_experiment(params=make_params_dict(config), upload_source_files=[args.config])

    agent = DQN_Agent(M, config['agent_params'], network_device, memory_device, config['use_neptune'])
    if config['mode'] == 'train':
        agent.train()
示例#2
0
def main(config):

    os.makedirs(animation_path, exist_ok=True)

    reward_rule = VRPReward(**config['reward_params'])
    log("Using Network Device {}".format('cpu'))
    M = make_environ(reward_rule=reward_rule,
                     device='cpu',
                     verbose=False,
                     **config['env_params'])
    # M.print_events()
    A = UniRandAgent(M)
    agent = DQN_Agent(M, config['agent_params'], 'cpu', 'cpu',
                      config['use_neptune'])
    agent.load(model_path)

    vis_map(M,
            M.vec,
            background=False,
            margin=1.3,
            info=img_info,
            im_name='./dummy.jpg')
    img_shape = get_size()
    M.assign_auto_agent(A)
    print(img_shape)
    M.reset()

    video_name = os.path.join(animation_path,
                              "{}.mp4".format(img_info.replace(' ', '')))
    gif_name = os.path.join(animation_path,
                            "{}.gif".format(img_info.replace(' ', '')))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(video_name, fourcc, 8,
                            (img_shape[1], img_shape[0]))
    with imageio.get_writer(gif_name, mode='I') as gif_writer:
        for t in trange(T):
            im_name = os.path.join(animation_path, "{}.jpg".format(t))
            action = agent.act(M.vec_flatten, 0, action_rng)
            M.feedback_step(action)
            # M.step_auto()
            vis_map(M,
                    M.vec,
                    background=False,
                    margin=1.3,
                    info=img_info,
                    im_name=im_name)
            im_vec = cv2.imread(im_name)
            video.write(im_vec)
            gif_writer.append_data(im_vec)
            os.remove(im_name)

    cv2.destroyAllWindows()
    video.release()
示例#3
0
    def __init__(self,
                 queue_reward=0,
                 wait_reward=0,
                 opr_reward=0,
                 time_penalty_power=1,
                 efficiency_reward=0,
                 invalid_route_reward=0):
        self.queue_reward = queue_reward
        self.wait_reward = wait_reward
        self.opr_reward = opr_reward
        self.time_penalty_power = time_penalty_power
        self.efficiency_reward = efficiency_reward
        self.invalid_route_reward = invalid_route_reward

        log("Initialize reward function | QueueCost:{} | WaitCost:{} | OprCost:{} | TimePenaltyPower:{} | BusEfficiency:{} | InvalidRouteCost:{}"
            .format(self.queue_reward, self.wait_reward, self.opr_reward,
                    self.time_penalty_power, self.efficiency_reward,
                    self.invalid_route_reward))
示例#4
0
    def __init__(self, capacity, device, state_size, n_action, hist_len):
        self.capacity = capacity
        self.device = device

        log("Initializing Episode Memory | Capacity {:.3g} | Requesting {:.4g}GB"
            .format(capacity,
                    8 * capacity * (2 * state_size + n_action + 1) / (2**30)))

        self.states = torch.zeros([capacity] + [state_size]).to(self.device)
        self.actions = torch.zeros([capacity] + [n_action],
                                   dtype=torch.long).to(self.device)
        self.rewards = torch.zeros([capacity]).to(self.device)
        self.next_states = torch.zeros([capacity] + [state_size]).to(
            self.device)

        self.hist_len = hist_len
        self.position = 0
        self.size = 0
        log("Episode Memory Initialized")
示例#5
0
def main(config):

    reward_rule = VRPReward(**config['reward_params'])
    log("Using Network Device {}".format(network_device))

    M = make_environ(reward_rule=reward_rule,
                     device=network_device,
                     verbose=False,
                     **config['env_params'])
    # M.print_events()
    config['use_neptune'] = config['use_neptune'] and config['mode'] == 'train'

    if config['use_neptune']:
        nep_args = config['neptune_params']
        token = os.environ['NEPTUNE_API_TOKEN']
        proj_name = nep_args['proj_name']
        neptune.init(project_qualified_name=proj_name, api_token=token)
        neptune.create_experiment(params=make_params_dict(config), upload_source_files=[args.config])

    agent = DQN_Agent(M, config['agent_params'], network_device, memory_device, config['use_neptune'])
    if config['mode'] == 'train':
        agent.train()