示例#1
0
def test_ddpg_create_ddpg_model(model_cls, s_format, env):
    data = MDPDataBunch.from_env(env,
                                 render='rgb_array',
                                 bs=32,
                                 add_valid=False,
                                 feed_type=s_format)
    model = create_ddpg_model(data, model_cls)
    model.eval()
    model(data.state.s.float())
    check_shape(env, data, s_format)
    data.close()
示例#2
0
def test_ddpg_ddpglearner(model_cls, s_format, mem, env):
    data = MDPDataBunch.from_env(env,
                                 render='rgb_array',
                                 bs=32,
                                 add_valid=False,
                                 feed_type=s_format)
    model = create_ddpg_model(data, model_cls)
    memory = mem(memory_size=1000, reduce_ram=True)
    exploration_method = OrnsteinUhlenbeck(size=data.action.taken_action.shape,
                                           epsilon_start=1,
                                           epsilon_end=0.1,
                                           decay=0.001)
    ddpg_learner(data=data,
                 model=model,
                 memory=memory,
                 exploration_method=exploration_method)
    check_shape(env, data, s_format)
    data.close()
示例#3
0
def test_ddpg_models_mountain_car_continuous(model_cls, s_format, experience):
    group_interp = GroupAgentInterpretation()
    for i in range(5):
        print('\n')
        data = MDPDataBunch.from_env(
            'MountainCarContinuous-v0',
            render='rgb_array',
            bs=40,
            add_valid=False,
            keep_env_open=False,
            feed_type=s_format,
            memory_management_strategy='k_partitions_top',
            k=3,
            res_wrap=partial(ResolutionWrapper, w_step=2, h_step=2))
        exploration_method = OrnsteinUhlenbeck(
            size=data.action.taken_action.shape,
            epsilon_start=1,
            epsilon_end=0.1,
            decay=0.0001)
        memory = experience(memory_size=1000000, reduce_ram=True)
        model = create_ddpg_model(data=data, base_arch=model_cls)
        learner = ddpg_learner(data=data,
                               model=model,
                               memory=memory,
                               exploration_method=exploration_method,
                               callback_fns=[RewardMetric, EpsilonMetric])
        learner.fit(450)

        meta = f'{experience.__name__}_{"FEED_TYPE_STATE" if s_format==FEED_TYPE_STATE else "FEED_TYPE_IMAGE"}'
        interp = AgentInterpretation(learner, ds_type=DatasetType.Train)
        interp.plot_rewards(cumulative=True, per_episode=True, group_name=meta)
        group_interp.add_interpretation(interp)
        group_interp.to_pickle(
            f'../docs_src/data/mountaincarcontinuous_{model.name.lower()}/',
            f'{model.name.lower()}_{meta}')
        [
            g.write('../res/run_gifs/mountaincarcontinuous')
            for g in interp.generate_gif()
        ]
        data.close()
        del learner
        del model
        del data
示例#4
0
def trained_learner(model_cls,
                    env,
                    s_format,
                    experience,
                    bs=64,
                    layers=None,
                    render='rgb_array',
                    memory_size=1000000,
                    decay=0.0001,
                    lr=None,
                    actor_lr=None,
                    epochs=450,
                    opt=torch.optim.RMSprop,
                    **kwargs):
    lr, actor_lr = ifnone(lr, 1e-3), ifnone(actor_lr, 1e-4)
    data = MDPDataBunch.from_env(env,
                                 render=render,
                                 bs=bs,
                                 add_valid=False,
                                 keep_env_open=False,
                                 feed_type=s_format,
                                 memory_management_strategy='k_partitions_top',
                                 k=3,
                                 **kwargs)
    exploration_method = OrnsteinUhlenbeck(size=data.action.taken_action.shape,
                                           epsilon_start=1,
                                           epsilon_end=0.1,
                                           decay=decay)
    memory = experience(memory_size=memory_size, reduce_ram=True)
    model = create_ddpg_model(data=data,
                              base_arch=model_cls,
                              lr=lr,
                              actor_lr=actor_lr,
                              layers=layers,
                              opt=opt)
    learner = ddpg_learner(data=data,
                           model=model,
                           memory=memory,
                           exploration_method=exploration_method,
                           callback_fns=[RewardMetric, EpsilonMetric])
    learner.fit(epochs)
    return learner