class Policy(nn.Module): def __init__(self, nn, action_space, noisy_net=False): super(Policy, self).__init__() assert isinstance(nn, torch.nn.Module) self.nn = nn if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.nn.output_size, num_outputs, noisy=noisy_net) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.nn.output_size, num_outputs) else: raise NotImplementedError @property def is_recurrent(self): return self.nn.is_recurrent @property def recurrent_hidden_state_size(self): """Size of rnn_hx.""" return self.nn.recurrent_hidden_state_size def forward(self, inputs, rnn_hxs, masks): raise NotImplementedError def act(self, inputs, rnn_hxs, masks, deterministic=False): value, actor_features, rnn_hxs = self.nn(inputs, rnn_hxs, masks) dist = self.dist(actor_features) if deterministic: action = dist.mode() else: action = dist.sample() action_log_probs = dist.log_probs(action) _ = dist.entropy().mean() return value, action, action_log_probs, rnn_hxs def get_value(self, inputs, rnn_hxs, masks): value, _, _ = self.nn(inputs, rnn_hxs, masks) return value def evaluate_actions(self, inputs, rnn_hxs, masks, action): value, actor_features, rnn_hxs = self.nn(inputs, rnn_hxs, masks) dist = self.dist(actor_features) action_log_probs = dist.log_probs(action) dist_entropy = dist.entropy().mean() return value, action_log_probs, dist_entropy, rnn_hxs def reset_noise(self): self.nn.reset_noise() self.dist.reset_noise()
def __init__(self, num_inputs, action_space): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.linear1 = nn.Linear(32 * 7 * 7, 512) self.critic_linear = nn.Linear(512, 1) self.V_linear_1 = nn.Linear(512, 20) self.V_linear_2 = nn.Linear(20, 1) self.Q_linear_1 = nn.Linear(512 + action_space.n, 20) self.Q_linear_2 = nn.Linear(20, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) # self.conv1_bn = nn.BatchNorm2d(32) # self.conv2_bn = nn.BatchNorm2d(64) # self.conv3_bn = nn.BatchNorm2d(32) self.act_func = F.leaky_relu # F.tanh ## F.elu F.relu F.softplus self.linear1 = nn.Linear(32 * 7 * 7, 512) self.critic_linear = nn.Linear(512, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, nn, action_space, noisy_net=False): super(Policy, self).__init__() assert isinstance(nn, torch.nn.Module) self.nn = nn if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.nn.output_size, num_outputs, noisy=noisy_net) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.nn.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, recurrent_policy, hidden_size, args): super(Policy, self).__init__() if len(obs_shape) == 3: self.base = CNNBase(obs_shape[0], recurrent_policy) elif len(obs_shape) == 1: assert not recurrent_policy, \ "Recurrent policy is not implemented for the MLP controller" self.base = MLPBase(obs_shape[0], hidden_size, args) else: raise NotImplementedError if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) else: raise NotImplementedError self.state_size = self.base.state_size self.leaky = args.leaky self.scale = 1.
def __init__( self, observability: str, option_dims: int, option_space: str, # obs_spaces: collections.OrderedDict, hidden_size: int, base_model: str, base_kwargs: Dict, # policy_base_kwargs: Dict, ): super().__init__( observability=observability, action_dims=option_dims, base_model=base_model, base_kwargs=base_kwargs, ) assert option_space in ['continuous', 'discrete'] self.option_space = option_space if self.option_space == 'continuous': # Overwrite Policy class attributes # self.fc12 = init_(nn.Linear(hidden_size, # 2 * omega_option_dims)) self.dist = DiagGaussian(self.base.output_size, option_dims)
def __init__(self, obs_shape, action_space, one_hot, hid_size, recurrent_policy, label): super(EHRL_Policy, self).__init__() self.hid_size = hid_size self.label = label # self.num_hid_layers = num_hid_layers # self.num_subpolicies = num_subpolicies # self.gaussian_fixed_var = gaussian_fixed_var if len(obs_shape) == 3: self.base = CNNBase(obs_shape[0], one_hot, self.hid_size, recurrent_policy) elif len(obs_shape) == 1: assert not recurrent_policy, \ "Recurrent policy is not implemented for the MLP controller" self.base = MLPBase(obs_shape[0], one_hot, self.hid_size) else: raise NotImplementedError if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) else: raise NotImplementedError self.state_size = self.base.state_size
def __init__(self, num_inputs, action_space): super(MLPPolicy, self).__init__() self.action_space = action_space self.input_norm = WelfordNormalization(num_inputs) self.a_fc1 = nn.Linear(num_inputs, 64) self.a_fc2 = nn.Linear(64, 64) self.v_fc1 = nn.Linear(num_inputs, 64) self.v_fc2 = nn.Linear(64, 64) self.v_fc3 = nn.Linear(64, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(64, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, num_heads=1, hidden_size=512): super(CNNPolicy, self).__init__() self.num_heads = num_heads self.representations = [] self.critics = [] for _ in range(num_heads): self.representations.append( self.build_representation(num_inputs, hidden_size=hidden_size)) self.critics.append(self.build_critic(hidden_size, 1)) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(hidden_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(hidden_size, num_outputs) else: raise NotImplementedError self.critics = nn.ModuleList(self.critics) self.representations = nn.ModuleList(self.representations) self.param_groups = [list(self.parameters())] self.train() self.reset_parameters()
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n num_action_outputs = 512 self.dist = Categorical(num_action_outputs,num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, num_inputs, action_space, use_gru): super(CNNPolicy, self).__init__() #print('num_inputs=%s' % str(num_inputs)) self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=2) self.conv2 = nn.Conv2d(32, 32, 4, stride=2) self.conv3 = nn.Conv2d(32, 32, 4, stride=2) self.conv4 = nn.Conv2d(32, 32, 4, stride=1) self.linear1 = nn.Linear(32 * 2 * 2, 256) if use_gru: self.gru = nn.GRUCell(512, 512) self.critic_linear = nn.Linear(256, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(256, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(256, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, obs_shape, action_space, recurrent_policy=False, dataset=None, resnet=False, pretrained=False): super(myNet, self).__init__() self.dataset = dataset if len(obs_shape) == 3: #our mnist case self.base = model.CNNBase(obs_shape[0], recurrent_policy, dataset=dataset) elif len(obs_shape) == 1: assert not recurrent_policy, \ "Recurrent policy is not implemented for the MLP controller" self.base = MLPBase(obs_shape[0]) else: raise NotImplementedError if action_space.__class__.__name__ == "Discrete": # our case num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) else: raise NotImplementedError if dataset in ['mnist', 'cifar10']: self.clf = Categorical(self.base.output_size, 2) #10) self.state_size = self.base.state_size
def __init__(self, num_inputs, action_space): super(BPW_MLPPolicy, self).__init__() self.action_space = action_space self.fc1 = nn.Linear(num_inputs, 256) self.lrelu1 = nn.LeakyReLU(0.1) self.fc2 = nn.Linear(256, 256) self.lrelu2 = nn.LeakyReLU(0.1) self.fc3 = nn.Linear(256, 128) self.lrelu3 = nn.LeakyReLU(0.1) self.fc4 = nn.Linear(128, 128) self.lrelu4 = nn.LeakyReLU(0.1) self.value = nn.Linear(128, 1) self.policy = nn.Linear(128, 64) self.lrelu_policy = nn.LeakyReLU(0.1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(64, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, use_gru): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 2, stride=1) self.conv2 = nn.Conv2d(32, 32, 2, stride=1) self.conv3 = nn.Conv2d(32, 32, 2, stride=1) self.linear1 = nn.Linear(32 * 4 * 4, 512) if use_gru: self.gru = nn.GRUCell(512, 512) #self.lstm.register_forward_hook(printstat) self.critic_linear = nn.Linear(512, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, num_inputs, hidden_size, num_outputs): super(Policy, self).__init__() self.base = nn.Sequential( init_relu(nn.Linear(num_inputs, hidden_size)), nn.ReLU(), init_tanh(nn.Linear(hidden_size, hidden_size)), nn.Tanh()) self.dist = DiagGaussian(hidden_size, num_outputs)
def __init__(self, num_inputs, action_space, num_heads=1, reward_predictor=False, use_s=True, use_s_a=False, use_s_a_sprime=False): assert use_s + use_s_a + use_s_a_sprime <= 1 super(MLPPolicy, self).__init__() self.use_s = use_s self.use_s_a = use_s_a self.use_s_a_sprime = use_s_a_sprime self.num_heads = num_heads self.action_space = action_space self.a_fc1 = nn.Linear(num_inputs, 64) self.a_fc2 = nn.Linear(64, 64) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(64, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError self.critics = [] self.param_groups = [list(self.parameters())] cur_critic = self.build_critic(num_inputs, num_outputs=num_heads, hidden_size=64) self.critics.append(cur_critic) self.critics = nn.ModuleList(self.critics) for critic in list(self.critics): self.param_groups.append(list(critic.parameters())) if reward_predictor: if self.use_s: r_hat_input_size = num_inputs elif self.use_s_a: r_hat_input_size = num_inputs + num_outputs else: r_hat_input_size = num_inputs * 2 + num_outputs self.rp = self.build_critic(r_hat_input_size, num_outputs=1, hidden_size=64) self.param_groups.append(list(self.rp.parameters())) self.train() self.reset_parameters()
def __init__( self, name, ob_dim, latent_dim, in_layer=None, out_activation=None, hidden_dims=[64, 64, 64], hidden_activation=tf.nn.tanh, weight_init=tf.contrib.layers.xavier_initializer, bias_init=tf.zeros_initializer, reuse_scope=False, ): with tf.variable_scope(name, reuse=reuse_scope): if in_layer is None: self.obs = tf.placeholder(tf.float32, shape=[None, ob_dim], name='obs') else: self.obs = in_layer self.mean_network = MLP('means', ob_dim, latent_dim, out_activation=out_activation, hidden_dims=hidden_dims, hidden_activation=hidden_activation, weight_init=weight_init, bias_init=bias_init, in_layer=self.obs) self.means = self.mean_network.layers['out'] self.log_var_network = MLP('log_vars', ob_dim, latent_dim, out_activation=out_activation, hidden_dims=hidden_dims, hidden_activation=hidden_activation, weight_init=weight_init, bias_init=bias_init, in_layer=self.obs) self.log_vars = self.log_var_network.layers['out'] self.distribution = DiagGaussian(self.means, self.log_vars) self.zs = self.distribution.sample()
def build_dist(self, action_space): if isinstance(action_space, Discrete): num_outputs = action_space.n return Categorical(self.recurrent_module.output_size, num_outputs) elif isinstance(action_space, Box): num_outputs = action_space.shape[0] return DiagGaussian(self.recurrent_module.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_shape, action_space, base_kwargs=None, activation=1, modulation=False): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if len(obs_shape) == 3: self.base = CNNBase(obs_shape[0], activation = activation, modulation=modulation, **base_kwargs) elif len(obs_shape) == 1: self.base = MLPBase(obs_shape[0], **base_kwargs) else: raise NotImplementedError if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, num_inputs, action_space): super(CNNPolicy, self).__init__() if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError self.num_inputs = num_inputs #num of stacked frames self.num_outputs = num_outputs #action size self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.linear1 = nn.Linear(32 * 7 * 7, 512) self.critic_linear = nn.Linear(512, 1)
def __init__(self, obs_shape, action_space, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if len(obs_shape) == 1: self.base = MLPBase(obs_shape[0], **base_kwargs) else: raise NotImplementedError num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs)
def __init__(self, num_inputs, action_space, use_gru, act_func): super(CNNPolicy, self).__init__() self.act_func = act_func self.acti = None ############## SETTING ACTIVATION FUNCTION STUFF ################### if act_func == 'relu': C = 1 print(">> ||| USING RELU ACTIVATION FUNCTION ||| <<") elif act_func == 'maxout': C = 2 self.acti = maxout print(">> ||| USING maxout ACTIVATION FUNCTION ||| <<") elif act_func == 'lwta': C = 1 self.acti = lwta print(">> ||| USING LWTA ACTIVATION FUNCTION ||| <<") print(C) self.conv1 = nn.Conv2d(num_inputs, 32*C, 8, stride=4) self.conv2 = nn.Conv2d(32, 64*C, 4, stride=2) self.conv3 = nn.Conv2d(64, 32*C, 3, stride=1) self.linear1 = nn.Linear(32 * 7 * 7, 512) #if use_att: # self.att = att(256, 256) if use_gru: self.gru = nn.GRUCell(512, 256) self.critic_linear = nn.Linear(256, 1) if action_space.__class__.__name__ == "Discrete": # HARCODED CHAGING num_outputs = action_space.n self.dist = Categorical(256, num_outputs) elif action_space.__class__.__name__ == "Box": #print("Sampling from Box") num_outputs = action_space.shape[0] self.dist = DiagGaussian(256, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, args): super(PommeCNNPolicySmall, self).__init__() self.args = args self.conv1 = nn.Conv2d(num_inputs, args.num_channels, 3, stride=1, padding=1) self.conv2 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1) self.conv3 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1) self.conv4 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1) self.bn1 = nn.BatchNorm2d(args.num_channels) self.bn2 = nn.BatchNorm2d(args.num_channels) self.bn3 = nn.BatchNorm2d(args.num_channels) self.bn4 = nn.BatchNorm2d(args.num_channels) # XXX: or should it go straight to 512? self.fc1 = nn.Linear( args.num_channels * (args.board_size) * (args.board_size), 1024) self.fc_bn1 = nn.BatchNorm1d(1024) self.fc2 = nn.Linear(1024, 512) self.fc_bn2 = nn.BatchNorm1d(512) self.critic_linear = nn.Linear(512, 1) self.actor_linear = nn.Linear(512, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, obs_shape, action_shape, sensor_type, atom_num_inputs_o_env, dim, num_agents, unordered, independent, sigmoid, share, no_rnn): super(Policy, self).__init__() # Base network assert len(obs_shape) == 1, "We only handle flattened input." self.base = RNNBase(obs_shape[0], sensor_type, atom_num_inputs_o_env, dim, num_agents, unordered, independent, share, no_rnn) # Actor's final layer num_outputs = action_shape[0] if independent: self.dist = DiagGaussian(self.base.output_size, num_outputs, 1, sigmoid) else: self.dist = DiagGaussian(self.base.output_size, num_outputs, num_agents, sigmoid) self.state_size = self.base.state_size self.sigmoid = sigmoid
def __init__(self, action_space, architecture): super(Policy, self).__init__() self.encoder_output_size = architecture['encoder_output_size'] if action_space.__class__.__name__ == "Discrete": self.num_outputs = action_space.n self.dist = Categorical(self.encoder_output_size, self.num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.encoder_output_size, num_outputs) else: raise NotImplementedError self.train()
def __init__(self, obs_shape, action_space, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} self.base = MLPBase(obs_shape[0]) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.hidden_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.hidden_size, num_outputs) else: raise NotImplementedError
def __init__(self, obs_size, ac_size, inner_size): super().__init__() init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.actor_base = nn.Sequential(init_(nn.Linear(obs_size, inner_size)), nn.Tanh()) self.actor_out = DiagGaussian(inner_size, ac_size) self.critic = nn.Sequential(init_(nn.Linear(obs_size, inner_size)), nn.Tanh(), init_(nn.Linear(inner_size, inner_size)), nn.Tanh(), init_(nn.Linear(inner_size, 1)))
def __init__(self, num_inputs, action_space, use_gru, use_vae, use_batch_norm=False, use_residual=False, distribution = 'DiagGaussian'): super(CNNPolicy, self).__init__() print('num_inputs=%s' % str(num_inputs)) self.conv1 = CNNBlock(num_inputs, 32, 7, 2, 3, use_batch_norm, False) self.conv2 = CNNBlock(32, 32, 3, 2, 1, use_batch_norm, use_residual) self.conv3 = CNNBlock(32, 32, 3, 2, 1, use_batch_norm, use_residual) self.conv4 = CNNBlock(32, 32, 3, 2, 1, use_batch_norm, use_residual) self.conv5 = CNNBlock(32, 32, 3, 1, 1, use_batch_norm, use_residual) self.linear1_drop = nn.Dropout(p=0.3) self.linear1 = nn.Linear(32 * 10 * 8, 256) self.gruhdim = 256 if use_gru: self.gru = nn.GRUCell(self.gruhdim, self.gruhdim) self.use_vae = use_vae if use_vae: self.linearmean = nn.Linear(256, 256) self.linearvar = nn.Linear(256, 256) self.unlinearlatent = nn.Linear(256, 256) self.unlinear1 = nn.Linear(256, 32 * 10 * 8) self.unconv5 = CNNUnBlock(32, 32, 3, 1, 1, 0, use_batch_norm, use_residual) self.unconv4 = CNNUnBlock(32, 32, 3, 2, (1,1), (1,0), use_batch_norm, use_residual) self.unconv3 = CNNUnBlock(32, 32, 3, 2, 1, 1, use_batch_norm, use_residual) self.unconv2 = CNNUnBlock(32, 32, 3, 2, 1, 1, use_batch_norm, use_residual) self.unconv1 = CNNUnBlock(num_inputs, 32, 7, 2, 3, 1, False, False) self.critic_linear = nn.Linear(256, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(256, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] if distribution == 'DiagGaussian': self.dist = DiagGaussian(256, num_outputs) elif distribution == 'MixedDistribution': self.dist = MixedDistribution(256, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, obs_shape, action_space, base_kwargs=None): super(StateGen, self).__init__() if base_kwargs is None: base_kwargs = {} if len(obs_shape) == 1: self.base = StateMLPBase(obs_shape[0] + action_space.shape[0], **base_kwargs) else: raise NotImplementedError if action_space.__class__.__name__ == "Box": num_outputs = obs_shape[0] self.dist = DiagGaussian(self.base.hidden_size, num_outputs) else: raise NotImplementedError
def __init__(self, num_inputs, action_space, act_func, drop, num_updates): super(MLPPolicy, self).__init__() self.drop = drop self.act_func = act_func self.num_updates = num_updates self.counter = num_updates ############## SETTING ACTIVATION FUNCTION STUFF ################### if act_func == 'tanh': C = 1 print(">> ||| USING tanh ACTIVATION FUNCTION ||| <<") elif act_func == 'maxout': C = 2 self.acti = maxout print(">> ||| USING maxout ACTIVATION FUNCTION ||| <<") elif act_func == 'lwta': self.acti = lwta C = 1 print(">> ||| USING LWTA ACTIVATION FUNCTION ||| <<") elif act_func == 'relu': self.acti = F.relu C = 1 print(">> ||| USING RELU ACTIVATION FUNCTION ||| <<") print(C) self.action_space = action_space self.a_fc1 = nn.Linear(num_inputs, 64*C) self.a_fc2 = nn.Linear(64, 64*C) self.v_fc1 = nn.Linear(num_inputs, 64*C) self.v_fc2 = nn.Linear(64, 64*C) self.v_fc3 = nn.Linear(64, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(64, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()