def main(w_count, success_count): env = MyEnv({}) while True: # print(f'step {step}') # ランダムアクションの選択 # action_index = env.action_space.sample() a = np.array([0, 1, 2, 3]) p = np.array([1, 1, 1, 1]) """ a = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8]) p = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1]) """ p = p / np.sum(p) action_index = np.random.choice(a, p=p) # 環境を1step 実行 observation, reward, done, _ = env.step(action_index) if args.render_mode == 'human': print(f'\naction is selected at {env.steps}') status_print(env, observation, reward, done) # 環境の描画 shot = env.render(mode=args.render_mode) # Space keyでpause, デバッグ用 pause_for_debug() # エピソードの終了処理 if done: # print('done') w_count, success_count = conunt_results(env, w_count, success_count) break return w_count, success_count
def main(): # Initialize ray ray.init(ignore_reinit_error=True, log_to_driver=False) # Generate & Check environment env = MyEnv({}) # Define trainer agent model_name = MODEL_NAME config = ppo.DEFAULT_CONFIG.copy() config['env_config'] = {} config['num_gpus'] = 0 config['framework'] = 'tfe' config['eager_tracing'] = True agent = ppo.PPOTrainer(config=config, env=MyEnv) agent.restore(model_name) for idx in range(90): """ Initialization """ observation = env.reset() frames = [] """ Save some initial values """ fighter_0 = env.fighter.ingress jammer_0 = env.jammer.ingress while True: action_index = agent.compute_action(observation) # 環境を1step 実行 observation, reward, done, _ = env.step(action_index) # 環境の描画とビデオ録画 # shot = env.render(mode=args.render_mode) frames.append(env.render(mode=args.render_mode)) # Space keyでpause, デバッグ用 pause_for_debug() # Slow down rendering pygame.time.wait(10) # エピソードの終了処理 if done: status_print(env, observation, reward, done, fighter_0, jammer_0) video_name = ALGORITHM + '_' + env.mission_condition + '-' + str( idx) make_video(video_name, frames) make_jason(env, video_name, fighter_0, jammer_0) break
def main(): # Initialize ray ray.init(ignore_reinit_error=True, log_to_driver=False) # Generate & Check environment env = MyEnv({}) # Define trainer agent model_name = MODEL_NAME config = ppo.DEFAULT_CONFIG.copy() config['env_config'] = {} config['num_workers'] = NUM_WORKERS config['num_gpus'] = 0 config['framework'] = 'tfe' config['eager_tracing'] = True agent = ppo.PPOTrainer(config=config, env=MyEnv) agent.restore(model_name) success_history = [] success_count = 0 for idx in range(N_EVAL_EPISODES): """ Initialization """ observation = env.reset() frames = [] """ Save some initial values """ fighter_0 = env.fighter.ingress jammer_0 = env.jammer.ingress while True: action_index = agent.compute_action(observation) # 環境を1step 実行 observation, reward, done, info = env.step(action_index) # 環境の描画とビデオ録画 # shot = env.render(mode=args.render_mode) frames.append(env.render(mode=args.render_mode)) # Slow down rendering # pygame.time.wait(10) # エピソードの終了処理 if done: success_history.append(info['success']) if info['success'] > .5: success_count += 1 break n_success = success_count n_fail = N_EVAL_EPISODES - n_success if np.sum(success_history) != success_count: raise Exception('Something is wrong!') """ Summarize results """ print('==================== Summary of the results ====================') print( f'Mission conditions = w1 : w2 : w3 = ' f'{env.mission_probability[0]:.3f} : {env.mission_probability[1]:.3f} : {env.mission_probability[2]:.3f}' ) print(f' Model is < {MODEL_NAME} >') print( f' Number of success missions: {round(n_success)} / {N_EVAL_EPISODES}, ' f' Number of failed missions {round(n_fail)} / {N_EVAL_EPISODES}')