def get_updater(env): policy = Policy(GoToPoint(), Deterministic(2), env.obs_shape) # controller = FeedforwardCell(lambda inp, output_size: MLP([128, 128])(inp, output_size), 1) controller = FeedforwardCell( lambda inp, output_size: fully_connected( inp, output_size, activation_fn=None), 1) estimator = NeuralValueEstimator(controller, env.obs_shape) alg = cfg.alg_class(estimator, name="critic") updater = RLUpdater(env, policy, alg) return updater
def build_object_network_controller(output_size, name): from dps.utils.tf import ObjectNetwork ff = ObjectNetwork(n_repeats=1, scope="collection_controller") return FeedforwardCell(ff, output_size, name=name)
def build_attentional_relation_network(output_size, name): from dps.utils.tf import AttentionalRelationNetwork ff = AttentionalRelationNetwork(n_repeats=2, scope="collection_controller") return FeedforwardCell(ff, output_size, name=name)
"Exponential(start=10000.0, end=0.000000001, decay_rate=0.1, decay_steps=3000, log=True)", z_pres_temperature=1.0, run_all_time_steps=False, stopping_threshold=0.99, per_process_gpu_memory_fraction=0.3, training_wheels=0.0, scale_prior_mean=-1.0, scale_prior_std=np.sqrt(0.05), shift_prior_mean=0.0, shift_prior_std=3.0, complete_rnn_input=False, ) dair_config = air_config.copy( difference_air=True, build_cell=lambda scope: FeedforwardCell(MLP(n_units=[256, 256, 256, 256]), cfg.rnn_n_units), ) nem_config = alg_config.copy( alg_name="nem", build_network=nem.NEM_Network, batch_size=16, lr_schedule=0.001, max_grad_norm=None, threshold=-np.inf, max_experiments=None, render_hook=nem.NeuralEM_RenderHook(4), render_step=5000, # ------- from nem.py -------- noise_prob=0.2, # probability of annihilating the pixel
def __call__(self, param_shape, name=None): return FeedforwardCell(MLP(), param_shape, name=name)
def __call__(self, param_shape, name=None): return FeedforwardCell(MLP(*self.args, **self.kwargs), param_shape, name=name)
from dps import cfg from dps.config import DEFAULT_CONFIG from dps.env import BatchGymEnv from dps.utils.tf import MLP, FeedforwardCell from dps.rl import rl_render_hook, BuildSoftmaxPolicy, BuildMlpController def build_env(): return BatchGymEnv(gym_env='MountainCar-v0') controller = lambda params_dim, name: FeedforwardCell( lambda inp, output_size: MLP( [cfg.n_controller_units, cfg.n_controller_units])(inp, output_size), params_dim, name=name) config = DEFAULT_CONFIG.copy() # So far, have not been able to solve this with a policy gradient method, the exploration problem is quite hard. config.update( env_name="mountain_car", build_env=build_env, build_controller=BuildMlpController(), build_policy=BuildSoftmaxPolicy(one_hot=False), exploration_schedule="1.0",