"Server.log_every_n": 10, "PoloWrappedReplayBuffer.batch_size": 32, "PoloOutOfGraphReplayBuffer.solved_unsolved_ratio": 0.5, "training_steps": 200000, "train_every_num_steps": 10, "game_buffer_size": 10, "run_eval_worker": False, } params_grid = { "get_env_creator.N": [40], # Here set problem size "MCTS.episode_max_steps": [ 51, ], # should be no less than N for ChainEnvironment "EnsembleValueAccumulatorMeanStdMaxUCB.kappa_fn": [ ConstKappa(0), ], "ValueBase.model_name": [ "linear_multi_head", ], } experiments_list = create_experiments_helper( experiment_name='Ensemble ChainEnv', python_path='.:./deps/gym-sokoban:./deps/ourlib:' './deps/baselines:./deps/dopamine:./deps/gym-sokoban-fast:./deps/chainenv:', paths_to_dump='', exclude=[], base_config=base_config, params_grid=params_grid)
"MCTS.num_sampling_moves": 0, "MCTS.value_annealing": 1.0, "MCTS.avoid_loops": True, "MCTS.gamma": 0.99, "MCTS.node_value_mode": "bootstrap", "MCTS.episode_max_steps": 50, "Server.min_replay_history": 1000, "PoloWrappedReplayBuffer.batch_size": 32, "PoloOutOfGraphReplayBuffer.solved_unsolved_ratio": 0.5, "curriculum": False, "training_steps": 500000, "train_every_num_steps": 100, "game_buffer_size": 25, "log_every_n": 50, "run_eval_worker": False, "Server.save_checkpoint_every_train_steps": 500, } params_grid = { "MCTS.avoid_loops": [True, False], "ValueBase.model_name": ["convnet_mnist", "kc_parametrized_cnn_v0_2"] } experiments_list = create_experiments_helper( experiment_name='Mcts sanity experiment', python_path='.:./deps/gym-sokoban:./deps/ourlib:' './deps/baselines:./deps/dopamine:./deps/gym-sokoban-fast', paths_to_dump='', base_config=base_config, params_grid=params_grid)
], "KC_MCTS.num_ensembles_per_game": [ 1, ], "ValueBase.learning_rate_fn": [ 0.00025, ], "EnsembleValueAccumulatorMeanStdMaxUCB.ucb_coeff": [ 0.0, ], "EnsembleValueAccumulatorMeanStdMaxUCB.exploration_target": [ False, ], "MCTS.num_mcts_passes": [ 10, ], "ValueBase.model_name": [ "multiple_mlps", ], "ValueEnsemble2.prior_scale": [None], } experiments_list = create_experiments_helper( experiment_name='Sokoban single-board', python_path='.:./deps/gym-sokoban:./deps/ourlib:' './deps/baselines:./deps/dopamine:./deps/gym-sokoban-fast:./deps/chainenv:./deps/toy-mr:', paths_to_dump='', callbacks=(), base_config=base_config, params_grid=params_grid)
"EnsembleValueTraits.dead_end_value": -2.0, "MCTSWithVotingTwoModels.num_mcts_passes": 10, "MCTSWithVotingTwoModels.num_sampling_moves": 0, "MCTSWithVotingTwoModels.avoid_loops": True, "MCTSWithVotingTwoModels.gamma": 0.99, "MCTSWithVotingTwoModels.episode_max_steps": 200, "MCTSWithVotingTwoModels.avoid_history_coeff": -2., "Server.min_replay_history": 1000, "PoloOutOfGraphReplayBuffer.solved_unsolved_ratio": 0.5, "training_steps": 500000, "train_every_num_steps": 100, "game_buffer_size": 25, "run_eval_worker": False, "Server.log_every_n": 50, "MCTSWithVotingTwoModels.node_value_mode": "bootstrap", } params_grid = { "use_perfect_env.value": [False,], "SimulatedSokobanEnvModel.model_path": ["checkpoints/epoch.0003.hdf5"], "PoloWrappedReplayBuffer.batch_size": [96,], } experiments_list = create_experiments_helper(experiment_name='sokoban with learned model', python_path='.:./deps/gym-sokoban:./deps/ourlib:' './deps/baselines:./deps/dopamine:./deps/gym-sokoban-fast:./deps/chainenv:./polo_plus/kc:./deps/toy-mr:', paths_to_dump='', base_config=base_config, params_grid=params_grid)