示例#1
0
 def make_config():
   return config_lib.Config(
       data=config_lib.OneOf(
           [config_lib.Config(task=1, a='hello'),
            config_lib.Config(
                task=2,
                a=config_lib.OneOf(
                    [config_lib.Config(x=1, y=2),
                     config_lib.Config(x=-1, y=1000, z=4)],
                    x=1)),
            config_lib.Config(task=3, c=1234)],
           task=2),
       model=config_lib.Config(stuff=1))
示例#2
0
def default_config():
    return config_lib.Config(
        agent=config_lib.OneOf(
            [
                config_lib.Config(
                    algorithm='pg',
                    policy_lstm_sizes=[35, 35],
                    # Set value_lstm_sizes to None to share weights with policy.
                    value_lstm_sizes=[35, 35],
                    obs_embedding_size=10,
                    grad_clip_threshold=10.0,
                    param_init_factor=1.0,
                    lr=5e-5,
                    pi_loss_hparam=1.0,
                    vf_loss_hparam=0.5,
                    entropy_beta=1e-2,
                    regularizer=0.0,
                    softmax_tr=1.0,  # Reciprocal temperature.
                    optimizer='rmsprop',  # 'adam', 'sgd', 'rmsprop'
                    topk=0,  # Top-k unique codes will be stored.
                    topk_loss_hparam=0.0,  # off policy loss multiplier.
                    # Uniformly sample this many episodes from topk buffer per batch.
                    # If topk is 0, this has no effect.
                    topk_batch_size=1,
                    # Exponential moving average baseline for REINFORCE.
                    # If zero, A2C is used.
                    # If non-zero, should be close to 1, like .99, .999, etc.
                    ema_baseline_decay=0.99,
                    # Whether agent can emit EOS token. If true, agent can emit EOS
                    # token which ends the episode early (ends the sequence).
                    # If false, agent must emit tokens until the timestep limit is
                    # reached. e.g. True means variable length code, False means fixed
                    # length code.
                    # WARNING: Making this false slows things down.
                    eos_token=False,
                    replay_temperature=1.0,
                    # Replay probability. 1 = always replay, 0 = always on policy.
                    alpha=0.0,
                    # Whether to normalize importance weights in each minibatch.
                    iw_normalize=True),
                config_lib.Config(
                    algorithm='ga', crossover_rate=0.99, mutation_rate=0.086),
                config_lib.Config(algorithm='rand')
            ],
            algorithm='pg',
        ),
        env=config_lib.Config(
            # If True, task-specific settings are not needed.
            task='',  # 'print', 'echo', 'reverse', 'remove', ...
            task_cycle=[],  # If non-empty, reptitions will cycle through tasks.
            task_kwargs='{}',  # Python dict literal.
            task_manager_config=config_lib.Config(
                # Reward recieved per test case. These bonuses will be scaled
                # based on how many test cases there are.
                correct_bonus=2.0,  # Bonus for code getting correct answer.
                code_length_bonus=1.0),  # Maximum bonus for short code.
            correct_syntax=False,
        ),
        batch_size=64,
        timestep_limit=32)
示例#3
0
 def make_config():
   return config_lib.Config(
       data=config_lib.OneOf(
           [config_lib.Config(task=1, a='hello'),
            config_lib.Config(task=2, a='world', b='stuff'),
            config_lib.Config(task=3, c=1234)],
           task=2),
       model=config_lib.Config(stuff=1))