示例#1
0
def get_dqn_v0_args(str_):
    t = [('lr', '5en4'), ('max_timesteps', '1e5'), ('buffer_size', '5e4'),
         ('exploration_fraction', '1en1'), ('exploration_final_eps', '0x02'),
         ('train_freq', '100.'), ('batch_size', '32'),
         ('learning_starts', '1000'), ('gamma', '1x0'),
         ('target_network_update_freq', '1e5'), ('prioritized_replay', '0'),
         ('cnn_model', '0'), ('print_freq', '1000')]
    da = utils.DefArgs(t)
    args = da.process_string(str_)
    vs = vars(args)
    for k in [
            'batch_size', 'buffer_size', 'learning_starts', 'max_timesteps',
            'target_network_update_freq', 'prioritized_replay', 'cnn_model',
            'print_freq'
    ]:
        vs[k] = int(utils.str_to_float(vs[k]))

    vs['prioritized_replay'] = vs['prioritized_replay'] > 0

    for k in [
            'lr', 'exploration_fraction', 'exploration_final_eps', 'gamma',
            'train_freq'
    ]:
        vs[k] = utils.str_to_float(vs[k])
    return vs
示例#2
0
def get_greedy_v0_args(str_):
    t = [('max_episodes', '5e3')]
    da = utils.DefArgs(t)
    args = da.process_string(str_)
    vs = vars(args)
    for k in ['max_episodes']:
        vs[k] = int(utils.str_to_float(vs[k]))
    return vs
示例#3
0
def get_other_args(str_):
    t = [('seed', '0'), ('social_network_graph_env', '0'), ('num_valid', '50')]
    da = utils.DefArgs(t)
    args = da.process_string(str_)
    vs = vars(args)
    for k in ['seed', 'num_valid']:
        vs[k] = int(utils.str_to_float(vs[k]))

    vs['social_network_graph_env'] = {
        '0': 'SocialNetworkGraphEnv-v1'
    }[vs['social_network_graph_env']]
    return vs
示例#4
0
    def get_env_args(str_):
        t = [('method_name', 'ours'), ('graph_n', '64'), ('edges_n', '200'),
             ('n_init_a', '10'), ('threshold_q', '.5'), ('num_seeds', '4'),
             ('reward_version', '0'), ('seed', '0'),
             ('graph_obs_version', '0')]
        da = utils.DefArgs(t)
        args = da.process_string(str_)
        vs = vars(args)

        for k in [
                'graph_n', 'edges_n', 'n_init_a', 'num_seeds',
                'reward_version', 'seed', 'graph_obs_version'
        ]:
            vs[k] = int(utils.str_to_float(vs[k]))

        for k in ['threshold_q']:
            vs[k] = utils.str_to_float(vs[k])

        assert (vs['threshold_q'] >= 0
                and vs['threshold_q'] <= 1)  #valid percentage
        return vs