Пример #1
0
def learner2gif(lnr: DDPGLearner, s_format,
                group_interp: GroupAgentInterpretation, name: str, extra: str):
    meta = f'{lnr.memory.__class__.__name__}_{"FEED_TYPE_STATE" if s_format==FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}'
    interp = AgentInterpretation(lnr, ds_type=DatasetType.Train)
    interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta)
    group_interp.add_interpretation(interp)
    group_interp.to_pickle(
        f'../docs_src/data/{name}_{lnr.model.name.lower()}/',
        f'{lnr.model.name.lower()}_{meta}')
    [g.write(f'../res/run_gifs/{name}_{extra}') for g in interp.generate_gif()]
Пример #2
0
def test_ddpg_models_ant(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    extra_s = f'{experience.__name__}_{model_cls.__name__}_{s_format}'
    for i in range(5):
        print('\n')
        # data=MDPDataBunch.from_env('AntPyBulletEnv-v0', render='human', bs=64, add_valid=False, keep_env_open=False,
        # 	feed_type=s_format, memory_management_strategy='k_partitions_top', k=3)
        # exploration_method=OrnsteinUhlenbeck(size=data.action.taken_action.shape, epsilon_start=1, epsilon_end=0.1,
        # 	decay=0.00001)
        # memory=experience(memory_size=1000000, reduce_ram=True)
        # model=create_ddpg_model(data=data, base_arch=model_cls, lr=1e-3, actor_lr=1e-4)
        # learner=ddpg_learner(data=data, model=model, memory=memory, exploration_method=exploration_method,
        # 	opt_func=torch.optim.Adam, callback_fns=[RewardMetric, EpsilonMetric])
        # learner.fit(4)
        learner = trained_learner(model_cls,
                                  'AntPyBulletEnv-v0',
                                  s_format,
                                  experience,
                                  decay=0.0001,
                                  render='rgb_array',
                                  epochs=1000)
        learner2gif(learner, s_format, group_interp, 'ant', extra_s)
        # meta=f'{experience.__name__}_{"FEED_TYPE_STATE" if s_format==FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}'
        # interp=AgentInterpretation(learner, ds_type=DatasetType.Train)
        # interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta)
        # group_interp.add_interpretation(interp)
        # group_interp.to_pickle(f'../docs_src/data/ant_{model.name.lower()}/',
        # 	f'{model.name.lower()}_{meta}')
        # [g.write('../res/run_gifs/ant', frame_skip=5) for g in interp.generate_gif()]
        del learner
Пример #3
0
def test_ddpg_models_acrobot(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    extra_s = f'{experience.__name__}_{model_cls.__name__}_{s_format}'
    for i in range(5):
        print('\n')
        learner = trained_learner(model_cls,
                                  'Acrobot-v1',
                                  s_format,
                                  experience,
                                  decay=0.0001,
                                  render='rgb_array')
        learner2gif(learner, s_format, group_interp, 'acrobot', extra_s)
        del learner
def test_dqn_models_cartpole(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    extra_s = f'{experience.__name__}_{model_cls.__name__}_{s_format}'
    for i in range(5):
        learn = trained_learner(model_cls,
                                'CartPole-v1',
                                s_format,
                                experience,
                                bs=32,
                                layers=[64, 64],
                                memory_size=1000000,
                                decay=0.001)

        learner2gif(learn, s_format, group_interp, 'cartpole', extra_s)
def test_dqn_fit_maze_env(model_cls, s_format, mem):
    group_interp = GroupAgentInterpretation()
    extra_s = f'{mem.__name__}_{model_cls.__name__}_{s_format}'
    for i in range(5):
        learn = trained_learner(model_cls,
                                'maze-random-5x5-v0',
                                s_format,
                                mem,
                                bs=32,
                                layers=[32, 32],
                                memory_size=1000000,
                                decay=0.00001,
                                res_wrap=partial(ResolutionWrapper,
                                                 w_step=3,
                                                 h_step=3))

        learner2gif(learn, s_format, group_interp, 'maze_5x5', extra_s)
def test_dqn_models_lunarlander(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    extra_s = f'{experience.__name__}_{model_cls.__name__}_{s_format}'
    for i in range(5):
        learn = trained_learner(model_cls,
                                'LunarLander-v2',
                                s_format,
                                experience,
                                bs=32,
                                layers=[128, 64],
                                memory_size=1000000,
                                decay=0.00001,
                                copy_over_frequency=600,
                                lr=[0.001, 0.00025],
                                epochs=1000)
        learner2gif(learn, s_format, group_interp, 'lunarlander', extra_s)
        del learn
        gc.collect()
Пример #7
0
def test_ddpg_models_mountain_car_continuous(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    for i in range(5):
        print('\n')
        data = MDPDataBunch.from_env(
            'MountainCarContinuous-v0',
            render='rgb_array',
            bs=40,
            add_valid=False,
            keep_env_open=False,
            feed_type=s_format,
            memory_management_strategy='k_partitions_top',
            k=3,
            res_wrap=partial(ResolutionWrapper, w_step=2, h_step=2))
        exploration_method = OrnsteinUhlenbeck(
            size=data.action.taken_action.shape,
            epsilon_start=1,
            epsilon_end=0.1,
            decay=0.0001)
        memory = experience(memory_size=1000000, reduce_ram=True)
        model = create_ddpg_model(data=data, base_arch=model_cls)
        learner = ddpg_learner(data=data,
                               model=model,
                               memory=memory,
                               exploration_method=exploration_method,
                               callback_fns=[RewardMetric, EpsilonMetric])
        learner.fit(450)

        meta = f'{experience.__name__}_{"FEED_TYPE_STATE" if s_format==FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}'
        interp = AgentInterpretation(learner, ds_type=DatasetType.Train)
        interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta)
        group_interp.add_interpretation(interp)
        group_interp.to_pickle(
            f'../docs_src/data/mountaincarcontinuous_{model.name.lower()}/',
            f'{model.name.lower()}_{meta}')
        [
            g.write('../res/run_gifs/mountaincarcontinuous')
            for g in interp.generate_gif()
        ]
        data.close()
        del learner
        del model
        del data
def test_dqn_models_mountaincar(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    for i in range(5):
        learn = trained_learner(model_cls,
                                'MountainCar-v0',
                                s_format,
                                experience,
                                bs=32,
                                layers=[24, 12],
                                memory_size=1000000,
                                decay=0.00001,
                                copy_over_frequency=1000)
        meta = f'{experience.__name__}_{"FEED_TYPE_STATE" if s_format == FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}'
        interp = AgentInterpretation(learn, ds_type=DatasetType.Train)
        interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta)
        group_interp.add_interpretation(interp)
        filename = f'{learn.model.name.lower()}_{meta}'
        group_interp.to_pickle(
            f'../docs_src/data/mountaincar_{learn.model.name.lower()}/',
            filename)
        [g.write('../res/run_gifs/mountaincar') for g in interp.generate_gif()]
        del learn