Exemplo n.º 1
0
    def _get_agent(self, action_spec):
        config = ConfigDict()
        config.model = ConfigDict()
        config.model.class_path = "liaison.agents.models.mlp"
        config.model.hidden_layer_sizes = [64, 64]

        config.lr_init = 1e-3
        config.lr_min = 1e-4
        config.lr_start_dec_step = 1000
        config.lr_dec_steps = 1000
        config.lr_dec_val = .1
        config.lr_dec_approach = 'linear'

        config.ent_dec_init = 1
        config.ent_dec_min = 0
        config.ent_dec_steps = 1000
        config.ent_start_dec_step = 1000
        config.ent_dec_val = .1
        config.ent_dec_approach = 'linear'

        config.grad_clip = 1.0
        config.discount_factor = 0.99
        config.clip_rho_threshold = 1.0
        config.clip_pg_rho_threshold = 1.0

        config.loss = ConfigDict()
        config.loss.vf_loss_coeff = 1.0
        with tf.variable_scope('mlp', reuse=tf.AUTO_REUSE):
            return MLPAgent(action_spec=action_spec,
                            name='test',
                            seed=42,
                            **config)
Exemplo n.º 2
0
  def _get_agent_instance(self):
    action_spec = BoundedArraySpec((10, 20),
                                   np.int32,
                                   0,
                                   N_NODES - 1,
                                   name='test_spec')

    config = ConfigDict()
    config.model = self._get_model_config()

    config.lr_init = 1e-3
    config.lr_min = 1e-4
    config.lr_start_dec_step = 1000
    config.lr_dec_steps = 1000
    config.lr_dec_val = .1
    config.lr_dec_approach = 'linear'

    config.ent_dec_init = 1
    config.ent_dec_min = 0
    config.ent_dec_steps = 1000
    config.ent_start_dec_step = 1000
    config.ent_dec_val = .1
    config.ent_dec_approach = 'linear'

    config.grad_clip = 1.0
    config.discount_factor = 0.99
    config.clip_rho_threshold = 1.0
    config.clip_pg_rho_threshold = 1.0

    config.loss = ConfigDict()
    config.loss.vf_loss_coeff = 1.0

    with tf.variable_scope('gcn_rins', reuse=tf.AUTO_REUSE):
      return MLPAgent(action_spec=action_spec, name='test', seed=42, **config)
Exemplo n.º 3
0
def get_agent_config():
    config = ConfigDict()

    config.lr_init = 1e-4
    config.lr_min = 1e-7
    config.lr_start_dec_step = 1000
    config.lr_dec_steps = 1000
    config.lr_dec_val = .1
    config.lr_dec_approach = 'linear'

    config.ent_dec_init = 1e-2
    config.ent_dec_min = 0.0
    config.ent_dec_steps = 1000
    config.ent_start_dec_step = 1000
    # dec_val not used for linear scheme
    config.ent_dec_val = .1
    config.ent_dec_approach = 'linear'

    # specify <= 0 here to disable grad clip
    config.grad_clip = 1.0
    config.discount_factor = 1.0

    config.optimizer = ConfigDict()
    # Options: Adam or RMSProp.
    config.optimizer.name = 'Adam'
    # hyperparams for RMSProp
    config.optimizer.decay = .9
    config.optimizer.momentum = 0.0
    config.optimizer.epsilon = 1e-7
    # required fields.
    config.class_path = "liaison.agents.gcn_multi_actions"
    config.class_name = "Agent"

    config.model = ConfigDict()
    config.model.class_path = 'liaison.agents.models.transformer_auto_regressive'
    config.model.num_blocks = 4
    config.model.d_ff = 32
    config.model.num_heads = 4
    config.model.d_model = 64
    config.model.dropout_rate = 0.
    config.model.use_mlp_value_func = False

    # The following code duplicated in gcn_rins.py as well.
    # Propagate any changes made as needed.
    config.model.model_kwargs = ConfigDict()
    config.model.model_kwargs.class_path = "liaison.agents.models.bipartite_gcn_rins"
    config.model.model_kwargs.n_prop_layers = 4
    config.model.model_kwargs.edge_embed_dim = 16
    config.model.model_kwargs.node_embed_dim = 32
    config.model.model_kwargs.global_embed_dim = 32
    config.model.model_kwargs.node_hidden_layer_sizes = [16, 16]
    config.model.model_kwargs.edge_hidden_layer_sizes = [16, 16]
    config.model.model_kwargs.policy_torso_hidden_layer_sizes = [16, 16]
    config.model.model_kwargs.value_torso_hidden_layer_sizes = [16, 16]
    config.model.model_kwargs.policy_summarize_hidden_layer_sizes = [16]
    config.model.model_kwargs.value_summarize_hidden_layer_sizes = [16]
    config.model.model_kwargs.supervised_prediction_torso_hidden_layer_sizes = [
        16, 16
    ]
    config.model.model_kwargs.sum_aggregation = False
    config.model.model_kwargs.use_layer_norm = True
    config.model.model_kwargs.apply_gradient_to_graphnet_every = 1
    config.model.model_kwargs.memory_hack = False

    config.clip_rho_threshold = 1.0
    config.clip_pg_rho_threshold = 1.0

    config.loss = ConfigDict()
    config.loss.vf_loss_coeff = 1.0

    config.loss.al_coeff = ConfigDict()
    config.loss.al_coeff.init_val = 0.
    config.loss.al_coeff.min_val = 0.
    config.loss.al_coeff.start_decay_step = int(1e10)
    config.loss.al_coeff.decay_steps = 5000
    # dec_val not used for linear scheme
    config.loss.al_coeff.dec_val = .1
    config.loss.al_coeff.dec_approach = 'linear'

    # applicable for agent 'liaison.agents.gcn_large_batch'
    config.apply_grads_every = 1
    config.log_features_every = 0

    config.freeze_graphnet_weights_step = 50 + 10

    return config