c.model_save_int = 100 # in episodes c.profile_int = 50 # in episodes def policy_noise(action): return t.clamp( add_clipped_normal_noise_to_action(action, c.policy_noise_params), -1, 1) if __name__ == "__main__": save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial) prep_args(c, save_env) # save_env.remove_trials_older_than(diff_hour=1) global_board.init(save_env.get_trial_train_log_dir()) writer = global_board.writer logger.info("Directories prepared.") actor = MW( Actor(observe_dim, action_dim, 1).to(c.device), c.device, c.device) actor_t = MW( Actor(observe_dim, action_dim, 1).to(c.device), c.device, c.device) critic = MW( Critic(observe_dim, action_dim).to(c.device), c.device, c.device) critic_t = MW( Critic(observe_dim, action_dim).to(c.device), c.device, c.device) critic2 = MW( Critic(observe_dim, action_dim).to(c.device), c.device, c.device) critic2_t = MW( Critic(observe_dim, action_dim).to(c.device), c.device, c.device)
if is_warm_up: # generate random actions act_dim = env.get_action_space(group_handle)[0] actions = np.random.randint(0, act_dim, agent_num, dtype=np.int32) env.set_action(group_handle, actions) if __name__ == "__main__": total_steps = max_epochs * max_episodes * max_steps # preparations prep_dirs_default(root_dir) logger.info("Directories prepared.") global_board.init(log_dir + "train_log") writer = global_board.writer env = magent.GridWorld(generate_combat_config(map_size), map_size=map_size) agent_num = int(np.sqrt(map_size * map_size * agent_ratio))**2 group1_handle, group2_handle = env.get_handles() # shape: (act,) action_dim = env.get_action_space(group1_handle)[0] # shape: (view_width, view_height, n_channel) view_space = env.get_view_space(group1_handle) view_dim = np.prod(view_space) # shape: (ID embedding + last action + last reward + relative pos) feature_dim = env.get_feature_space(group1_handle)[0] base_actor = SwarmActor(view_dim, action_dim, history_depth, neighbor_num,
logger.info("Framework 1 initialized") c2.restart_from_trial = load_trial2 framework2 = create_models2() logger.info("Framework 2 initialized") operators = [(framework1, run_agents1, load_framework1), (framework2, run_agents2, load_framework2)] # testing # preparations config = generate_combat_config(map_size) env = magent.GridWorld(config, map_size=map_size) env.reset() global_board.init(test_root_dir) writer = global_board.writer logger.info("Directories prepared.") # begin training episode = Counter() episode_finished = False wins = [0, 0] while episode < max_episodes: episode.count() logger.info("Begin episode {} at {}".format(episode, dt.now().strftime("%m/%d-%H:%M:%S"))) # environment initialization env.reset() env.set_render_dir(test_root_dir)