def get_dqn_v0_args(str_): t = [('lr', '5en4'), ('max_timesteps', '1e5'), ('buffer_size', '5e4'), ('exploration_fraction', '1en1'), ('exploration_final_eps', '0x02'), ('train_freq', '100.'), ('batch_size', '32'), ('learning_starts', '1000'), ('gamma', '1x0'), ('target_network_update_freq', '1e5'), ('prioritized_replay', '0'), ('cnn_model', '0'), ('print_freq', '1000')] da = utils.DefArgs(t) args = da.process_string(str_) vs = vars(args) for k in [ 'batch_size', 'buffer_size', 'learning_starts', 'max_timesteps', 'target_network_update_freq', 'prioritized_replay', 'cnn_model', 'print_freq' ]: vs[k] = int(utils.str_to_float(vs[k])) vs['prioritized_replay'] = vs['prioritized_replay'] > 0 for k in [ 'lr', 'exploration_fraction', 'exploration_final_eps', 'gamma', 'train_freq' ]: vs[k] = utils.str_to_float(vs[k]) return vs
def get_greedy_v0_args(str_): t = [('max_episodes', '5e3')] da = utils.DefArgs(t) args = da.process_string(str_) vs = vars(args) for k in ['max_episodes']: vs[k] = int(utils.str_to_float(vs[k])) return vs
def get_other_args(str_): t = [('seed', '0'), ('social_network_graph_env', '0'), ('num_valid', '50')] da = utils.DefArgs(t) args = da.process_string(str_) vs = vars(args) for k in ['seed', 'num_valid']: vs[k] = int(utils.str_to_float(vs[k])) vs['social_network_graph_env'] = { '0': 'SocialNetworkGraphEnv-v1' }[vs['social_network_graph_env']] return vs
def get_env_args(str_): t = [('method_name', 'ours'), ('graph_n', '64'), ('edges_n', '200'), ('n_init_a', '10'), ('threshold_q', '.5'), ('num_seeds', '4'), ('reward_version', '0'), ('seed', '0'), ('graph_obs_version', '0')] da = utils.DefArgs(t) args = da.process_string(str_) vs = vars(args) for k in [ 'graph_n', 'edges_n', 'n_init_a', 'num_seeds', 'reward_version', 'seed', 'graph_obs_version' ]: vs[k] = int(utils.str_to_float(vs[k])) for k in ['threshold_q']: vs[k] = utils.str_to_float(vs[k]) assert (vs['threshold_q'] >= 0 and vs['threshold_q'] <= 1) #valid percentage return vs