# warm up should be less than one epoch c.ddpg_update_batch_size = 100 c.ddpg_warmup_steps = 200 c.model_save_int = 100 # in episodes c.profile_int = 50 # in episodes def policy_noise(action): return t.clamp( add_clipped_normal_noise_to_action(action, c.policy_noise_params), -1, 1) if __name__ == "__main__": save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial) prep_args(c, save_env) # save_env.remove_trials_older_than(diff_hour=1) global_board.init(save_env.get_trial_train_log_dir()) writer = global_board.writer logger.info("Directories prepared.") actor = MW( Actor(observe_dim, action_dim, 1).to(c.device), c.device, c.device) actor_t = MW( Actor(observe_dim, action_dim, 1).to(c.device), c.device, c.device) critic = MW( Critic(observe_dim, action_dim).to(c.device), c.device, c.device) critic_t = MW( Critic(observe_dim, action_dim).to(c.device), c.device, c.device) critic2 = MW(
load_framework2 = "naive_ppo_parallel" load_trial1 = "2020_05_06_21_50_57" load_trial2 = "2020_05_06_21_50_57" test_root_dir = "" def load_framework(name): module = imp.import_module(".magent_" + name) return module.c, module.create_models, module.run_agents if __name__ == "__main__": c1, create_models1, run_agents1 = load_framework(load_framework1) c2, create_models2, run_agents2 = load_framework(load_framework1) save_env1 = SaveEnv(c1.root_dir, restart_use_trial=load_trial1) prep_args(c1, save_env1) save_env2 = SaveEnv(c2.root_dir, restart_use_trial=load_trial2) prep_args(c2, save_env2) c1.restart_from_trial = load_trial1 framework1 = create_models1() logger.info("Framework 1 initialized") c2.restart_from_trial = load_trial2 framework2 = create_models2() logger.info("Framework 2 initialized") operators = [(framework1, run_agents1, load_framework1), (framework2, run_agents2, load_framework2)] # testing