name="baseline_conv_network", input_shape=env_spec.observation_space.shape, output_dim=env_spec.action_space.flat_dim, conv_filters=(64, 64, 64, 32), conv_filter_sizes=((5, 5), (3, 3), (3, 3), (3, 3)), conv_strides=(3, 3, 3, 2), conv_pads=('SAME', 'SAME', 'SAME', 'SAME'), hidden_sizes=(256, ), hidden_nonlinearity=tf.nn.relu, output_nonlinearity=None, ) policy = GaussianConvFeaturePolicy( "conv_feature_policy", env_spec=env_spec, feature_network=policy_cnn, hidden_sizes=(128, 64), clip_action=False, ) baseline = NNBaseline( env_spec=env_spec, feature_network=baseline_cnn, hidden_sizes=(128, 64), hidden_nonlinearity=tf.nn.relu, init_lr=1e-4, n_itr=5, train_feature_network=True, ) batch_size = 9600
name="conv_feature_network", input_shape=env_spec.observation_space.shape, output_dim=env_spec.action_space.flat_dim, conv_filters=(32, 32, 32, 32, 32), conv_filter_sizes=((3,3),(3,3),(3,3),(3,3), (3,3)), conv_strides=(2, 2, 2, 2, 2), conv_pads=('SAME', 'SAME', 'SAME', 'SAME', 'SAME'), hidden_sizes=(256,), hidden_nonlinearity=tf.nn.relu, output_nonlinearity=None, ) policy = GaussianConvFeaturePolicy( "conv_feature_policy", env_spec=env_spec, feature_network=cnn, hidden_sizes=(128,64), output_nonlinearity=tf.nn.tanh, ) baseline = NNBaseline( env_spec=env_spec, feature_network=cnn, hidden_sizes=(128,64), hidden_nonlinearity=tf.nn.relu, init_lr=0.001, n_itr=5, ) batch_size = 2400 algo = TRPO(