reconstruction_weight=1.0, kl_weight=1.0, math_weight=0.0, train_math=False, math_A=None, noisy=True, build_background_encoder=lambda scope: MLP(n_units=[10, 10], scope=scope), build_background_decoder=IdentityFunction, max_possible_objects=None) simple_config = alg_config.copy( alg_name="simple", build_network=simple.SimpleVAE, render_hook=simple.SimpleVAE_RenderHook(), build_encoder=networks.Backbone, build_decoder=networks.InverseBackbone, n_channels=128, n_final_layers=3, kernel_size=1, pixels_per_cell=(12, 12), ) baseline_config = alg_config.copy( alg_name="baseline", build_network=baseline.Baseline_Network, render_hook=baseline.Baseline_RenderHook(), build_object_encoder=lambda scope: MLP(n_units=[512, 256], scope=scope), build_object_decoder=lambda scope: MLP(n_units=[256, 512], scope=scope), cc_threshold=1e-3, object_shape=(21, 21), )
def test_simple_add(test_config): # Fully specify the config here so that this test is not affected by config changes external to this file. config = Config( env_name="test_simple_add_a2c", name="test_simple_add_a2c", get_updater=a2c.A2C, n_controller_units=32, batch_size=16, optimizer_spec="adam", opt_steps_per_update=20, sub_batch_size=0, epsilon=0.2, lr_schedule=1e-4, max_steps=501, build_policy=BuildEpsilonSoftmaxPolicy(), build_controller=BuildLstmController(), exploration_schedule=0.1, val_exploration_schedule=0.0, actor_exploration_schedule=None, policy_weight=1.0, value_weight=0.0, value_reg_weight=0.0, entropy_weight=0.01, split=False, q_lmbda=1.0, v_lmbda=1.0, policy_importance_c=0, q_importance_c=None, v_importance_c=None, max_grad_norm=None, gamma=1.0, use_differentiable_loss=False, use_gpu=False, display_step=500, seed=0, # env-specific build_env=simple_addition.build_env, T=30, curriculum=[ dict(width=1), dict(width=2), dict(width=3), ], base=10, final_reward=True, ) config.update(test_config) n_repeats = 1 # Haven't made it completely deterministic yet, so keep it at 1. results = defaultdict(int) threshold = 0.15 for i in range(n_repeats): config = config.copy() output = _raw_run(config) stdout = output.path_for('stdout') result = _get_deterministic_output(stdout) results[result] += 1 assert output.history[-1]['best_01_loss'] < threshold if len(results) != 1: for r in sorted(results): print("\n" + "*" * 80) print("The following occurred {} times:\n".format(results[r])) print(r) raise Exception("Results were not deterministic.") assert len(output.config.curriculum) == 3 config.load_path = output.path_for('weights/best_of_stage_2') assert os.path.exists(config.load_path + ".index") assert os.path.exists(config.load_path + ".meta") # Load one of the hypotheses, train it for a bit, make sure the accuracy is still high. config.curriculum = [output.config.curriculum[-1]] config = config.copy() output = _raw_run(config) stdout = output.path_for('stdout') result = _get_deterministic_output(stdout) results[result] += 1 assert output.history[-1]['best_01_loss'] < threshold # Load one of the hypotheses, don't train it at all, make sure the accuracy is still high. config.do_train = False config = config.copy() output = _raw_run(config) stdout = output.path_for('stdout') result = _get_deterministic_output(stdout) results[result] += 1 assert output.history[-1]['best_01_loss'] < threshold
grid_config = env_config.copy( env_name="nips_2018_grid", build_env=Nips2018Grid, # dataset params min_chars=16, max_chars=25, n_patch_examples=0, image_shape=(6 * 14, 6 * 14), patch_shape=(14, 14), characters=list(range(10)), patch_size_std=0.0, colours="white", grid_shape=(6, 6), spacing=(-3, -3), random_offset_range=(15, 15), n_distractors_per_image=0, backgrounds="", backgrounds_sample_every=False, background_colours="", background_cfg=dict(mode="colour", colour="black"), object_shape=(14, 14), postprocessing="", preserve_env=False, n_train=25000, n_val=1e3, eval_step=1000, display_step=1000, render_step=5000, patience=1000000, max_steps=110000, )
split=False, q_lmbda=1.0, v_lmbda=1.0, policy_importance_c=0, q_importance_c=None, v_importance_c=None, max_grad_norm=None, gamma=1.0, use_differentiable_loss=False, render_n_rollouts=4, ) actor_critic_config = config.copy( exp_name="ActorCritic", split=True ) ppo_config = config.copy( exp_name="PPO", opt_steps_per_update=10, epsilon=0.2, value_weight=0.0, ) # Same config that is used in the test. test_config = config.copy( exp_name="TestA2C", opt_steps_per_update=20,