Пример #1
0
        name="baseline_conv_network",
        input_shape=env_spec.observation_space.shape,
        output_dim=env_spec.action_space.flat_dim,
        conv_filters=(64, 64, 64, 32),
        conv_filter_sizes=((5, 5), (3, 3), (3, 3), (3, 3)),
        conv_strides=(3, 3, 3, 2),
        conv_pads=('SAME', 'SAME', 'SAME', 'SAME'),
        hidden_sizes=(256, ),
        hidden_nonlinearity=tf.nn.relu,
        output_nonlinearity=None,
    )

    policy = GaussianConvFeaturePolicy(
        "conv_feature_policy",
        env_spec=env_spec,
        feature_network=policy_cnn,
        hidden_sizes=(128, 64),
        clip_action=False,
    )

    baseline = NNBaseline(
        env_spec=env_spec,
        feature_network=baseline_cnn,
        hidden_sizes=(128, 64),
        hidden_nonlinearity=tf.nn.relu,
        init_lr=1e-4,
        n_itr=5,
        train_feature_network=True,
    )

    batch_size = 9600
Пример #2
0
        name="conv_feature_network",
        input_shape=env_spec.observation_space.shape,
        output_dim=env_spec.action_space.flat_dim,
        conv_filters=(32, 32, 32, 32, 32),
        conv_filter_sizes=((3,3),(3,3),(3,3),(3,3), (3,3)),
        conv_strides=(2, 2, 2, 2, 2),
        conv_pads=('SAME', 'SAME', 'SAME', 'SAME', 'SAME'),
        hidden_sizes=(256,),
        hidden_nonlinearity=tf.nn.relu,
        output_nonlinearity=None,
    )

    policy = GaussianConvFeaturePolicy(
        "conv_feature_policy",
        env_spec=env_spec,
        feature_network=cnn,
        hidden_sizes=(128,64),
        output_nonlinearity=tf.nn.tanh,
    )

    baseline = NNBaseline(
        env_spec=env_spec,
        feature_network=cnn,
        hidden_sizes=(128,64),
        hidden_nonlinearity=tf.nn.relu,
        init_lr=0.001,
        n_itr=5,
    )

    batch_size = 2400
    algo = TRPO(