] # handle invalid dir char for i in range(len(model_names)): model_names[i] = model_names[i].replace('[', '').replace(']', '').replace( ' ', '').replace(',', '_') # handle standard arg, i.e., {} model_names = ['standard' if name == '' else name for name in model_names] # model loop for i in trange(len(args), desc='model', leave=True): model_dir = '{}/{}'.format(root, model_names[i]) os.mkdir(model_dir) # log cmd with open('{}/cmd_config.txt'.format(model_dir), 'w') as f: for k, v in control_args.items(): f.write(str(k) + ': ' + str(v) + '\n') arg = args[i] # repeat loop for n in trange(control_args['repeat'], desc='repeat', leave=True): dir = '{}/{}'.format(model_dir, n) os.mkdir(dir) maddpg = MADDPG(env, **arg) if control_args.has_key('load'): model_path = control_args['load'] maddpg.load_actor(model_path) maddpg.load_critic(model_path) if control_args['train']: maddpg.train(dir, control_args['save_interval']) maddpg.save(dir) maddpg.test(dir, n=control_args['n_test'])
if __name__ == "__main__": # Configuration n_episodes = 5000 # Set seed seed_everything(42) # Unitiy environment env = UnityEnvironment("./Tennis_Linux/Tennis.x86_64") # Agent agent = TennisMultiAgent(state_size=24, action_size=2, n_agents=2) # DDPG maddpg = MADDPG(env=env, agent=agent) scores, avg_scores = maddpg.train(n_episodes=n_episodes) # Close the environment env.close() # Plot scores fig, ax = plt.subplots(figsize=(10, 6)) ax.plot(np.linspace(1, n_episodes + 1, n_episodes), scores) ax.plot(np.linspace(1, n_episodes + 1, n_episodes), avg_scores) ax.set_xlabel("Episodes") ax.set_ylabel("Score per Episode") ax.set_title("Training progress of MADDPG model") fig.savefig('train_scores.png')