args.dyn_components) dynE = (dynE + 1) * args.dyn_components + 1 else: output_density = models.DiagGaussianDensity(dynE / 2) dyn_model = models.mlp( D + U, dynE, args.dyn_shape, dropout_layers=[ models.modules.CDropout(args.dyn_drop_rate * np.ones(hid)) if args.dyn_drop_rate > 0 else None for hid in args.dyn_shape ], nonlin=torch.nn.ReLU) dyn = models.DynamicsModel(dyn_model, reward_func=reward_func, output_density=output_density).float() # initalize policy pol_model = models.mlp(D, 2 * U, args.pol_shape, dropout_layers=[ models.modules.BDropout(args.pol_drop_rate) if args.pol_drop_rate > 0 else None for hid in args.pol_shape ], nonlin=torch.nn.ReLU, output_nonlin=partial(models.DiagGaussianDensity, U))
# init reward/cost function if learn_reward: dynE = 2 * (D + 1) reward_func = None else: dynE = 2 * D reward_func = partial(reward_func, target=target, Q=Q, angle_dims=angle_dims) # init dynamics model (heteroscedastic noise) dyn = models.DynamicsModel(models.dropout_mlp( D + U, dynE, [200] * 2, dropout_layers=[models.modules.CDropout(0.1)] * 2, nonlin=torch.nn.ReLU), reward_func=reward_func).float() forward_fn = partial(forward, dynamics=dyn) # init policy pol = models.Policy( models.dropout_mlp(D, U, output_nonlin=torch.nn.Tanh, dropout_layers=[models.modules.BDropout(0.1)] * 2), maxU).float() randpol = RandPolicy(maxU) # init experience dataset exp = ExperienceDataset()