def main(w_count, success_count):
    env = MyEnv({})

    while True:
        # print(f'step {step}')
        # ランダムアクションの選択
        # action_index = env.action_space.sample()
        a = np.array([0, 1, 2, 3])
        p = np.array([1, 1, 1, 1])
        """
        a = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])
        p = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1])
        """
        p = p / np.sum(p)
        action_index = np.random.choice(a, p=p)

        # 環境を1step 実行
        observation, reward, done, _ = env.step(action_index)
        if args.render_mode == 'human':
            print(f'\naction is selected at {env.steps}')
            status_print(env, observation, reward, done)

        # 環境の描画
        shot = env.render(mode=args.render_mode)

        # Space keyでpause, デバッグ用
        pause_for_debug()

        # エピソードの終了処理
        if done:
            # print('done')
            w_count, success_count = conunt_results(env, w_count,
                                                    success_count)
            break
    return w_count, success_count
def main():
    # Initialize ray
    ray.init(ignore_reinit_error=True, log_to_driver=False)

    # Generate & Check environment
    env = MyEnv({})

    # Define trainer agent
    model_name = MODEL_NAME

    config = ppo.DEFAULT_CONFIG.copy()
    config['env_config'] = {}
    config['num_gpus'] = 0
    config['framework'] = 'tfe'
    config['eager_tracing'] = True

    agent = ppo.PPOTrainer(config=config, env=MyEnv)
    agent.restore(model_name)

    for idx in range(90):
        """ Initialization """
        observation = env.reset()
        frames = []
        """ Save some initial values """
        fighter_0 = env.fighter.ingress
        jammer_0 = env.jammer.ingress

        while True:
            action_index = agent.compute_action(observation)

            # 環境を1step 実行
            observation, reward, done, _ = env.step(action_index)

            # 環境の描画とビデオ録画
            # shot = env.render(mode=args.render_mode)
            frames.append(env.render(mode=args.render_mode))

            # Space keyでpause, デバッグ用
            pause_for_debug()

            # Slow down rendering
            pygame.time.wait(10)

            # エピソードの終了処理
            if done:
                status_print(env, observation, reward, done, fighter_0,
                             jammer_0)
                video_name = ALGORITHM + '_' + env.mission_condition + '-' + str(
                    idx)
                make_video(video_name, frames)
                make_jason(env, video_name, fighter_0, jammer_0)
                break
Пример #3
0
def main():
    # Initialize ray
    ray.init(ignore_reinit_error=True, log_to_driver=False)

    # Generate & Check environment
    env = MyEnv({})

    # Define trainer agent
    model_name = MODEL_NAME

    config = ppo.DEFAULT_CONFIG.copy()
    config['env_config'] = {}
    config['num_workers'] = NUM_WORKERS
    config['num_gpus'] = 0
    config['framework'] = 'tfe'
    config['eager_tracing'] = True

    agent = ppo.PPOTrainer(config=config, env=MyEnv)
    agent.restore(model_name)

    success_history = []
    success_count = 0
    for idx in range(N_EVAL_EPISODES):
        """ Initialization """
        observation = env.reset()
        frames = []
        """ Save some initial values """
        fighter_0 = env.fighter.ingress
        jammer_0 = env.jammer.ingress

        while True:
            action_index = agent.compute_action(observation)

            # 環境を1step 実行
            observation, reward, done, info = env.step(action_index)

            # 環境の描画とビデオ録画
            # shot = env.render(mode=args.render_mode)
            frames.append(env.render(mode=args.render_mode))

            # Slow down rendering
            # pygame.time.wait(10)

            # エピソードの終了処理
            if done:
                success_history.append(info['success'])
                if info['success'] > .5:
                    success_count += 1
                break

    n_success = success_count
    n_fail = N_EVAL_EPISODES - n_success
    if np.sum(success_history) != success_count:
        raise Exception('Something is wrong!')
    """ Summarize results """
    print('==================== Summary of the results ====================')
    print(
        f'Mission conditions = w1 : w2 : w3 = '
        f'{env.mission_probability[0]:.3f} : {env.mission_probability[1]:.3f} : {env.mission_probability[2]:.3f}'
    )
    print(f'   Model is < {MODEL_NAME} >')
    print(
        f'   Number of success missions: {round(n_success)} / {N_EVAL_EPISODES},  '
        f'   Number of failed missions {round(n_fail)} / {N_EVAL_EPISODES}')