def main(config): reward_rule = VRPReward(**config['reward_params']) log("Using Network Device {}".format(network_device)) M = make_environ(reward_rule=reward_rule, device=network_device, verbose=False, **config['env_params']) # M.print_events() config['use_neptune'] = config['use_neptune'] and config['mode'] == 'train' log_name = str(datetime.now()).replace(" ", "-").replace(":", "-").replace(".", "-") config['agent_params']['log_path'] = os.path.join(config['agent_params']['save_dir'], log_name) os.makedirs(config['agent_params']['log_path'], exist_ok=True) if config['use_neptune']: import neptune nep_args = config['neptune_params'] token = os.environ['NEPTUNE_API_TOKEN'] proj_name = nep_args['proj_name'] neptune.init(project_qualified_name=proj_name, api_token=token) neptune.create_experiment(params=make_params_dict(config), upload_source_files=[args.config]) agent = DQN_Agent(M, config['agent_params'], network_device, memory_device, config['use_neptune']) if config['mode'] == 'train': agent.train()
def main(config): os.makedirs(animation_path, exist_ok=True) reward_rule = VRPReward(**config['reward_params']) log("Using Network Device {}".format('cpu')) M = make_environ(reward_rule=reward_rule, device='cpu', verbose=False, **config['env_params']) # M.print_events() A = UniRandAgent(M) agent = DQN_Agent(M, config['agent_params'], 'cpu', 'cpu', config['use_neptune']) agent.load(model_path) vis_map(M, M.vec, background=False, margin=1.3, info=img_info, im_name='./dummy.jpg') img_shape = get_size() M.assign_auto_agent(A) print(img_shape) M.reset() video_name = os.path.join(animation_path, "{}.mp4".format(img_info.replace(' ', ''))) gif_name = os.path.join(animation_path, "{}.gif".format(img_info.replace(' ', ''))) fourcc = cv2.VideoWriter_fourcc(*'mp4v') video = cv2.VideoWriter(video_name, fourcc, 8, (img_shape[1], img_shape[0])) with imageio.get_writer(gif_name, mode='I') as gif_writer: for t in trange(T): im_name = os.path.join(animation_path, "{}.jpg".format(t)) action = agent.act(M.vec_flatten, 0, action_rng) M.feedback_step(action) # M.step_auto() vis_map(M, M.vec, background=False, margin=1.3, info=img_info, im_name=im_name) im_vec = cv2.imread(im_name) video.write(im_vec) gif_writer.append_data(im_vec) os.remove(im_name) cv2.destroyAllWindows() video.release()
def __init__(self, queue_reward=0, wait_reward=0, opr_reward=0, time_penalty_power=1, efficiency_reward=0, invalid_route_reward=0): self.queue_reward = queue_reward self.wait_reward = wait_reward self.opr_reward = opr_reward self.time_penalty_power = time_penalty_power self.efficiency_reward = efficiency_reward self.invalid_route_reward = invalid_route_reward log("Initialize reward function | QueueCost:{} | WaitCost:{} | OprCost:{} | TimePenaltyPower:{} | BusEfficiency:{} | InvalidRouteCost:{}" .format(self.queue_reward, self.wait_reward, self.opr_reward, self.time_penalty_power, self.efficiency_reward, self.invalid_route_reward))
def __init__(self, capacity, device, state_size, n_action, hist_len): self.capacity = capacity self.device = device log("Initializing Episode Memory | Capacity {:.3g} | Requesting {:.4g}GB" .format(capacity, 8 * capacity * (2 * state_size + n_action + 1) / (2**30))) self.states = torch.zeros([capacity] + [state_size]).to(self.device) self.actions = torch.zeros([capacity] + [n_action], dtype=torch.long).to(self.device) self.rewards = torch.zeros([capacity]).to(self.device) self.next_states = torch.zeros([capacity] + [state_size]).to( self.device) self.hist_len = hist_len self.position = 0 self.size = 0 log("Episode Memory Initialized")
def main(config): reward_rule = VRPReward(**config['reward_params']) log("Using Network Device {}".format(network_device)) M = make_environ(reward_rule=reward_rule, device=network_device, verbose=False, **config['env_params']) # M.print_events() config['use_neptune'] = config['use_neptune'] and config['mode'] == 'train' if config['use_neptune']: nep_args = config['neptune_params'] token = os.environ['NEPTUNE_API_TOKEN'] proj_name = nep_args['proj_name'] neptune.init(project_qualified_name=proj_name, api_token=token) neptune.create_experiment(params=make_params_dict(config), upload_source_files=[args.config]) agent = DQN_Agent(M, config['agent_params'], network_device, memory_device, config['use_neptune']) if config['mode'] == 'train': agent.train()