def __init__(self,img_params,model_params,latent_params): super(CatVAE, self).__init__() image_dim = img_params['image_dim'] image_size = img_params['image_size'] n_downsample = model_params['n_downsample'] dim = model_params['dim'] n_res = model_params['n_res'] norm = model_params['norm'] activ = model_params['activ'] pad_type = model_params['pad_type'] n_mlp = model_params['n_mlp'] mlp_dim = model_params['mlp_dim'] self.continious_dim = latent_params['continious'] self.prior_cont = Gaussian(self.continious_dim) self.categorical_dim = latent_params['categorical'] self.prior_catg = Categorical(self.categorical_dim) self.gumbel = Gumbel(self.categorical_dim) self.encoder = CatEncoder(n_downsample,n_res,n_mlp,image_size,image_dim,dim,mlp_dim, latent_params,norm,activ,pad_type) conv_inp_size = image_size // (2**n_downsample) decoder_inp_dim = self.continious_dim + self.categorical_dim self.decoder = Decoder(n_downsample,n_res,n_mlp,decoder_inp_dim,mlp_dim,conv_inp_size, dim,image_dim,norm,activ,pad_type)
def test_categorical(): cat = Categorical(3) new_prob = np.array([random_softmax(3), random_softmax(3)], ) old_prob = np.array([random_softmax(3), random_softmax(3)], ) x = np.array([ [0, 1, 0], [0, 0, 1], ], dtype=np.float32) new_prob_sym = tf.constant(new_prob) old_prob_sym = tf.constant(old_prob) x_sym = tf.constant(x) new_info_sym = dict(prob=new_prob_sym) old_info_sym = dict(prob=old_prob_sym) np.testing.assert_allclose( cat.kl(new_info_sym, new_info_sym).eval(session=sess), np.array([0., 0.])) np.testing.assert_allclose( cat.kl(old_info_sym, new_info_sym).eval(session=sess), np.sum(old_prob * (np.log(old_prob + 1e-8) - np.log(new_prob + 1e-8)), axis=-1)) np.testing.assert_allclose( cat.logli(x_sym, old_info_sym).eval(session=sess), [np.log(old_prob[0][1] + 1e-8), np.log(old_prob[1][2] + 1e-8)], rtol=1e-5)
def __init__(self, num_inputs, action_space): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.act_func = F.leaky_relu # F.tanh ## F.elu F.relu F.softplus self.linear1 = nn.Linear(32 * 7 * 7, 512) self.critic_linear = nn.Linear(512, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: # raise NotImplementedError self.dist = Categorical(512, action_space) self.train() self.reset_parameters()
def __init__(self, obs_shape, action_space, recurrent_policy=False, dataset=None, resnet=False, pretrained=False): super(myNet, self).__init__() self.dataset = dataset if len(obs_shape) == 3: #our mnist case self.base = model.CNNBase(obs_shape[0], recurrent_policy, dataset=dataset) elif len(obs_shape) == 1: assert not recurrent_policy, \ "Recurrent policy is not implemented for the MLP controller" self.base = MLPBase(obs_shape[0]) else: raise NotImplementedError if action_space.__class__.__name__ == "Discrete": # our case num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) else: raise NotImplementedError if dataset in ['mnist', 'cifar10']: self.clf = Categorical(self.base.output_size, 2) #10) self.state_size = self.base.state_size
def __init__(self, obs_shape, action_space, recurrent_policy, hidden_size, args): super(Policy, self).__init__() if len(obs_shape) == 3: self.base = CNNBase(obs_shape[0], recurrent_policy) elif len(obs_shape) == 1: assert not recurrent_policy, \ "Recurrent policy is not implemented for the MLP controller" self.base = MLPBase(obs_shape[0], hidden_size, args) else: raise NotImplementedError if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) else: raise NotImplementedError self.state_size = self.base.state_size self.leaky = args.leaky self.scale = 1.
def __init__(self, num_inputs, action_space, num_heads=1, hidden_size=512): super(CNNPolicy, self).__init__() self.num_heads = num_heads self.representations = [] self.critics = [] for _ in range(num_heads): self.representations.append( self.build_representation(num_inputs, hidden_size=hidden_size)) self.critics.append(self.build_critic(hidden_size, 1)) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(hidden_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(hidden_size, num_outputs) else: raise NotImplementedError self.critics = nn.ModuleList(self.critics) self.representations = nn.ModuleList(self.representations) self.param_groups = [list(self.parameters())] self.train() self.reset_parameters()
def main(): from meta import MouselabEnv from distributions import Categorical env = MouselabEnv(2, 2, reward=Categorical([0, 1])) Q, V, pi, info = solve(env) V(env._state)
def actor(self, states, name='actor', reuse=False, trainable=True): with tf.variable_scope(name, reuse=reuse): features = self.actor_net(states, self.drop_rate, trainable=trainable) if isinstance(self.act_space, gym.spaces.Discrete): logits = Dense(self.act_space.n, None, trainable=trainable, name="layer_logits")(features) distribution = Categorical(logits) else: mean = Dense(self.act_space.shape[0], None, trainable=trainable, name='mean')(features) logstd = tf.get_variable( 'logstd', initializer=-0.5 * np.ones(self.act_space.shape[0], dtype=np.float32)) # logstd = Dense(self.act_space.shape[0], None, trainable=trainable, name='logstd')(features) distribution = Normal(mean=mean, logstd=logstd) return distribution
def __init__(self, obs_shape, action_space, one_hot, hid_size, recurrent_policy, label): super(EHRL_Policy, self).__init__() self.hid_size = hid_size self.label = label # self.num_hid_layers = num_hid_layers # self.num_subpolicies = num_subpolicies # self.gaussian_fixed_var = gaussian_fixed_var if len(obs_shape) == 3: self.base = CNNBase(obs_shape[0], one_hot, self.hid_size, recurrent_policy) elif len(obs_shape) == 1: assert not recurrent_policy, \ "Recurrent policy is not implemented for the MLP controller" self.base = MLPBase(obs_shape[0], one_hot, self.hid_size) else: raise NotImplementedError if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) else: raise NotImplementedError self.state_size = self.base.state_size
def __init__(self, num_inputs, action_space, use_rp=False, num_heads=1): super(CNNPolicy, self).__init__() self.use_rp = use_rp self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.linear1 = nn.Linear(32 * 7 * 7, 512) self.critic_linear = nn.Linear(512, num_heads) num_outputs = action_space.n self.dist = Categorical(512, num_outputs) if use_rp: self.extra_conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.extra_conv2 = nn.Conv2d(32, 64, 4, stride=2) self.extra_conv3 = nn.Conv2d(64, 32, 3, stride=1) self.extra_hidden = nn.Linear(32 * 7 * 7, 512) self.extra_critics = nn.Linear(512, 1) len_params = len( list(self.extra_conv1.parameters()) + list(self.extra_conv2.parameters()) + list(self.extra_conv3.parameters()) + list(self.extra_hidden.parameters()) + list(self.extra_critics.parameters())) self.param_groups = [ list(self.parameters())[-len_params:], list(self.parameters())[:-len_params] ] else: self.param_groups = [list(self.parameters())] self.train() self.reset_parameters()
def __init__(self, num_inputs, num_actions, use_gru, input_shape): super(CNNPolicy, self).__init__() # self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) # self.relu1 = nn.ReLU(True) # self.conv2 = nn.Conv2d(32, 64, 4, stride=2) # self.relu2 = nn.ReLU(True) # self.conv3 = nn.Conv2d(64, 32, 3, stride=1) # self.relu3 = nn.ReLU() self.h = None self.conv_head = nn.Sequential(nn.Conv2d(num_inputs, 32, 8, stride=4), nn.ReLU(True), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(True), nn.Conv2d(64, 32, 3, stride=1), nn.ReLU()) conv_input = torch.autograd.Variable(torch.randn((1, ) + input_shape)) self.conv_out_size = self.conv_head(conv_input).nelement() self.hidden_size = 512 self.linear1 = nn.Linear(self.conv_out_size, self.hidden_size) if use_gru: self.gru = nn.GRUCell(512, 512) self.critic_linear = nn.Linear(512, 1) self.dist = Categorical(512, num_actions) self.eval() self.reset_parameters()
def __init__(self, num_inputs, input_shape, params): super(CNNPolicy, self).__init__() self.conv_head = nn.Sequential( nn.Conv2d(num_inputs, params.conv1_size, 8, stride=4), nn.ReLU(True), nn.Conv2d(params.conv1_size, params.conv2_size, 4, stride=2), nn.ReLU(True), nn.Conv2d(params.conv2_size, params.conv3_size, 3, stride=1), nn.ReLU(True)) conv_input = torch.Tensor(torch.randn((1, ) + input_shape)) print(conv_input.size(), self.conv_head(conv_input).size(), self.conv_head(conv_input).size()) self.conv_out_size = self.conv_head(conv_input).nelement() self.hidden_size = params.hidden_size self.linear1 = nn.Linear(self.conv_out_size, self.hidden_size) if params.recurrent_policy: #self.gru = MaskedGRU(self.hidden_size, self.hidden_size) TODO: check speedup with masked GRU optimization self.gru = nn.GRUCell(self.hidden_size, self.hidden_size) self.critic_linear = nn.Linear(self.hidden_size, 1) self.dist = Categorical(self.hidden_size, params.num_actions) self.params = params self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, n_contexts): super(CNNPolicy, self).__init__() # if action_space.__class__.__name__ == "Discrete": # num_outputs = action_space.n # self.dist = Categorical(512, num_outputs) # elif action_space.__class__.__name__ == "Box": # num_outputs = action_space.shape[0] # self.dist = DiagGaussian(512, num_outputs) # else: # raise NotImplementedError num_outputs = action_space.n # print (num_outputs) # fda # self.dist = Categorical(num_outputs) self.dist = Categorical() self.num_inputs = num_inputs #num of stacked frames self.num_outputs = num_outputs #action size self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) l_size = 10 #512 self.linear1 = nn.Linear(32 * 7 * 7, l_size) n_contexts = 2 self.action_linear = nn.Linear(l_size + n_contexts, 4) self.action_linear2 = nn.Linear(4, num_outputs) self.critic_linear = nn.Linear(l_size + n_contexts, 1)
def __init__(self, num_inputs, action_space): super(BPW_MLPPolicy, self).__init__() self.action_space = action_space self.fc1 = nn.Linear(num_inputs, 256) self.lrelu1 = nn.LeakyReLU(0.1) self.fc2 = nn.Linear(256, 256) self.lrelu2 = nn.LeakyReLU(0.1) self.fc3 = nn.Linear(256, 128) self.lrelu3 = nn.LeakyReLU(0.1) self.fc4 = nn.Linear(128, 128) self.lrelu4 = nn.LeakyReLU(0.1) self.value = nn.Linear(128, 1) self.policy = nn.Linear(128, 64) self.lrelu_policy = nn.LeakyReLU(0.1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(64, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, components, alpha_0=None, a_0=None, b_0=None, weights=None, weights_obj=None): assert len(components) > 0 assert (alpha_0 is not None) ^ (a_0 is not None and b_0 is not None) \ ^ (weights_obj is not None) self.components = components if alpha_0 is not None: self.weights = Categorical(alpha_0=alpha_0, K=len(components), weights=weights) elif weights_obj is not None: self.weights = weights_obj else: self.weights = CategoricalAndConcentration(a_0=a_0, b_0=b_0, K=len(components), weights=weights) self.labels_list = []
def __init__(self, num_inputs, action_space): super(MLPPolicy, self).__init__() self.action_space = action_space self.input_norm = WelfordNormalization(num_inputs) self.a_fc1 = nn.Linear(num_inputs, 64) self.a_fc2 = nn.Linear(64, 64) self.v_fc1 = nn.Linear(num_inputs, 64) self.v_fc2 = nn.Linear(64, 64) self.v_fc3 = nn.Linear(64, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(64, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, use_gru): super(CNNPolicy, self).__init__() #print('num_inputs=%s' % str(num_inputs)) self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=2) self.conv2 = nn.Conv2d(32, 32, 4, stride=2) self.conv3 = nn.Conv2d(32, 32, 4, stride=2) self.conv4 = nn.Conv2d(32, 32, 4, stride=1) self.linear1 = nn.Linear(32 * 2 * 2, 256) if use_gru: self.gru = nn.GRUCell(512, 512) self.critic_linear = nn.Linear(256, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(256, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(256, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space): super(CNNPolicy2, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.linear1 = nn.Linear(32 * 7 * 7, 512) self.critic_linear1 = nn.Linear(512, 200) self.critic_linear2 = nn.Linear(200, 1) self.actor_linear1 = nn.Linear(512, 200) # self.actor_linear2 = nn.Linear(200, 200) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(200, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(200, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, use_gru): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 2, stride=1) self.conv2 = nn.Conv2d(32, 32, 2, stride=1) self.conv3 = nn.Conv2d(32, 32, 2, stride=1) self.linear1 = nn.Linear(32 * 4 * 4, 512) if use_gru: self.gru = nn.GRUCell(512, 512) #self.lstm.register_forward_hook(printstat) self.critic_linear = nn.Linear(512, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError self.train() self.reset_parameters()
def __init__(self, obs_shape, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() if base_kwargs is None: base_kwargs = {} if base is None: if len(obs_shape) == 3: base = CNNBase elif len(obs_shape) == 1: base = MLPBase else: raise NotImplementedError self.base = base(obs_shape[0], **base_kwargs) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n num_action_outputs = 512 self.dist = Categorical(num_action_outputs,num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(self.base.output_size, num_outputs) elif action_space.__class__.__name__ == "MultiBinary": num_outputs = action_space.shape[0] self.dist = Bernoulli(self.base.output_size, num_outputs) else: raise NotImplementedError
def __init__(self, num_inputs, num_actions, use_gru, input_shape): super(CNNDepthPolicy, self).__init__() self.conv_head = nn.Sequential(nn.Conv2d(num_inputs, 32, 8, stride=4), nn.ReLU(True), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(True), nn.Conv2d(64, 32, 3, stride=1), nn.ReLU()) self.depth_head = nn.Conv2d(32, 8, 1, 1) conv_input = torch.autograd.Variable(torch.randn((1, ) + input_shape)) print(conv_input.size(), self.conv_head(conv_input).size()) self.conv_out_size = self.conv_head(conv_input).nelement() self.linear1 = nn.Linear(self.conv_out_size, 512) if use_gru: self.gru = nn.GRUCell(512, 512) self.critic_linear = nn.Linear(512, 1) self.dist = Categorical(512, num_actions) self.train() self.reset_parameters()
def test_kl_sym(): old_id_0_prob = np.array([random_softmax(5)]) old_id_1_prob = np.array([random_softmax(3)]) new_id_0_prob = np.array([random_softmax(5)]) new_id_1_prob = np.array([random_softmax(3)]) old_dist_info_vars = dict(id_0_prob=tf.constant(old_id_0_prob), id_1_prob=tf.constant(old_id_1_prob)) new_dist_info_vars = dict(id_0_prob=tf.constant(new_id_0_prob), id_1_prob=tf.constant(new_id_1_prob)) np.testing.assert_allclose( dist1.kl(old_dist_info_vars, new_dist_info_vars).eval(session=sess), Categorical(5).kl(dict(prob=old_id_0_prob), dict(prob=new_id_0_prob)).eval(session=sess) + Categorical(3).kl(dict(prob=old_id_1_prob), dict(prob=new_id_1_prob)).eval(session=sess))
def __init__(self, num_inputs, action_space, num_heads=1, reward_predictor=False, use_s=True, use_s_a=False, use_s_a_sprime=False): assert use_s + use_s_a + use_s_a_sprime <= 1 super(MLPPolicy, self).__init__() self.use_s = use_s self.use_s_a = use_s_a self.use_s_a_sprime = use_s_a_sprime self.num_heads = num_heads self.action_space = action_space self.a_fc1 = nn.Linear(num_inputs, 64) self.a_fc2 = nn.Linear(64, 64) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(64, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(64, num_outputs) else: raise NotImplementedError self.critics = [] self.param_groups = [list(self.parameters())] cur_critic = self.build_critic(num_inputs, num_outputs=num_heads, hidden_size=64) self.critics.append(cur_critic) self.critics = nn.ModuleList(self.critics) for critic in list(self.critics): self.param_groups.append(list(critic.parameters())) if reward_predictor: if self.use_s: r_hat_input_size = num_inputs elif self.use_s_a: r_hat_input_size = num_inputs + num_outputs else: r_hat_input_size = num_inputs * 2 + num_outputs self.rp = self.build_critic(r_hat_input_size, num_outputs=1, hidden_size=64) self.param_groups.append(list(self.rp.parameters())) self.train() self.reset_parameters()
def __init__(self, num_inputs, num_outputs, action_space, use_gru): super(OptionPolicy, self).__init__() if num_outputs == None: if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: raise NotImplementedError else: self.dist = Categorical(512, num_outputs) self.conv1 = nn.Conv2d(num_inputs, 16, 3, stride=1, padding=1) self.linear1 = nn.Linear(400, 512) self.linear_critic = nn.Linear(512, 1) self.train() self.reset_parameters()
def build_dist(self, action_space): if isinstance(action_space, Discrete): num_outputs = action_space.n return Categorical(self.recurrent_module.output_size, num_outputs) elif isinstance(action_space, Box): num_outputs = action_space.shape[0] return DiagGaussian(self.recurrent_module.output_size, num_outputs) else: raise NotImplementedError
def log_likelihood(self,x, K_extra=1): """ Estimate the log likelihood with samples from the model. Draw k_extra components which were not populated by the current model in order to create a truncated approximate mixture model. """ x = np.asarray(x) ks = self._get_occupied() K = len(ks) K_total = K + K_extra # Sample observation distributions given current labels obs_distns = [] for k in range(K): o = copy.deepcopy(self.obs_distn) o.resample(data=self._get_data_withlabel(k)) obs_distns.append(o) # Sample extra observation distributions from prior for k in range(K_extra): o = copy.deepcopy(self.obs_distn) o.resample() obs_distns.append(o) # Sample a set of weights weights = Categorical(alpha_0=self.alpha_0, K=K_total, weights=None) assert len(self.labels_list) == 1 weights.resample(data=self.labels_list[0].z) # Now compute the log likelihood vals = np.empty((x.shape[0],K_total)) for k in range(K_total): vals[:,k] = obs_distns[k].log_likelihood(x) vals += weights.log_likelihood(np.arange(K_total)) assert not np.isnan(vals).any() return np.logaddexp.reduce(vals,axis=1).sum()
def __init__(self, num_action, input_shape=(120, 160, 3), batch_size=64, training=True, model_path=None, k=4, clip=0.2, use_clipped=True, entropy_coef=0.01, max_grad_norm=0.5, value_loss_coef=0.5, *args, **kwargs): super(PPOAgent, self).__init__(*args, **kwargs) self.steer = [-0.3, -0.15, 0, 0.15, 0.3] self.throttle = [0, 0.2, 0.4, 0.6, 0.8] self.perception = ImpalaPerception() self.actor_critic = ImpalaActorCritic(1216, 128) self.actor_critic_target = ImpalaActorCritic(1216, 128) # self.perception = Perception() # self.actor_critic = ActorCritic(num_processed=1216,num_hidden=128) # self.actor_critic_target = ActorCritic(num_processed=1216, num_hidden=128) # load model self.actor_critic_target.load_state_dict(self.actor_critic.state_dict()) self.tau = 1e-3 # set optimizer self.lr = 0.001 #self.decay = -5000 self.actor_critic_optim = optim.Adam(self.actor_critic.parameters(), lr=self.lr,) self.perc_optim = optim.Adam(self.perception.parameters(), lr=self.lr,) # common settings self.gamma = 0.99 self.memory = Memory(batch_size=batch_size, img_shape=input_shape) self.model_path = model_path self.n = 1 self.train_step = 0 self.r_sum = 0 self.last_state = None self.last_actions = None self.batch_size = batch_size # about PPO self.k = k self.clip = clip self.entropy_coef = entropy_coef self.use_clipped = use_clipped self.max_grad_norm = max_grad_norm self.value_loss_coef = value_loss_coef self.dist1 = Categorical(self.actor_critic.num_hidden, len(self.steer)) self.dist2 = Categorical(self.actor_critic.num_hidden, len(self.throttle))
def log_likelihood(self,x, K_extra=1): """ Estimate the log likelihood with samples from the model. Draw k_extra components which were not populated by the current model in order to create a truncated approximate mixture model. """ x = np.asarray(x) ks = self._get_occupied() K = len(ks) K_total = K + K_extra # Sample observation distributions given current labels obs_distns = [] for k in range(K): o = copy.deepcopy(self.obs_distn) o.resample(data=self._get_data_withlabel(k)) obs_distns.append(o) # Sample extra observation distributions from prior for k in range(K_extra): o = copy.deepcopy(self.obs_distn) o.resample() obs_distns.append(o) # Sample a set of weights weights = Categorical(alpha_0=self.alpha_0, K=K_total, weights=None) assert len(self.labels_list) == 1 weights.resample(data=self.labels_list[0].z) # Now compute the log likelihood vals = np.empty((x.shape[0],K_total)) for k in range(K_total): vals[:,k] = obs_distns[k].log_likelihood(x) vals += weights.log_likelihood(np.arange(K_total)) assert not np.isnan(vals).any() return logsumexp(vals,axis=1).sum()
def __init__(self, num_inputs, action_space): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.linear1 = nn.Linear(32 * 7 * 7, 512) self.critic_linear = nn.Linear(512, 1) self.dist = Categorical(512, action_space.n) self.train() self.reset_parameters()
def __init__(self, obs_space, action_space, base=None, base_kwargs=None): super(Policy, self).__init__() obs_shape = obs_space.shape if base_kwargs is None: base_kwargs = {} self.base = MLPBase(obs_shape[0], **base_kwargs) num_outputs = action_space.n self.dist = Categorical(self.base.output_size, num_outputs)
def __init__(self, min_tactus, max_tactus): intervals = list(range(min_tactus, max_tactus)) conditionals = intervals distributions = [] for first in intervals: params = [] for second in intervals: params.append(np.exp(-(.5 * abs(first - second)) ** 2)) normalised = [p/sum(params) for p in params] distribution = Categorical(intervals, normalised[:-1]) distributions.append(distribution) super().__init__(conditionals, distributions)
def __init__(self, num_inputs, action_size): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) # self.conv1_bn = nn.BatchNorm2d(32) # self.conv2_bn = nn.BatchNorm2d(64) # self.conv3_bn = nn.BatchNorm2d(32) self.act_func = F.leaky_relu # F.tanh ## F.elu F.relu F.softplus # print (num_inputs) # fasd if num_inputs == 6: self.intermediate_size = 11264 else: self.intermediate_size = 32*7*7 # self.linear1 = nn.Linear(32 * 7 * 7, 512) self.linear1 = nn.Linear(self.intermediate_size, 512) self.critic_linear = nn.Linear(512, 1) num_outputs = action_size # action_space.n self.dist = Categorical(512, num_outputs) # if action_space.__class__.__name__ == "Discrete": # num_outputs = action_space.n # self.dist = Categorical(512, num_outputs) # elif action_space.__class__.__name__ == "Box": # num_outputs = action_space.shape[0] # self.dist = DiagGaussian(512, num_outputs) # else: # raise NotImplementedError self.train() self.reset_parameters()
class CNNPolicy(nn.Module): def __init__(self, num_inputs, action_size): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) # self.conv1_bn = nn.BatchNorm2d(32) # self.conv2_bn = nn.BatchNorm2d(64) # self.conv3_bn = nn.BatchNorm2d(32) self.act_func = F.leaky_relu # F.tanh ## F.elu F.relu F.softplus # print (num_inputs) # fasd if num_inputs == 6: self.intermediate_size = 11264 else: self.intermediate_size = 32*7*7 # self.linear1 = nn.Linear(32 * 7 * 7, 512) self.linear1 = nn.Linear(self.intermediate_size, 512) self.critic_linear = nn.Linear(512, 1) num_outputs = action_size # action_space.n self.dist = Categorical(512, num_outputs) # if action_space.__class__.__name__ == "Discrete": # num_outputs = action_space.n # self.dist = Categorical(512, num_outputs) # elif action_space.__class__.__name__ == "Box": # num_outputs = action_space.shape[0] # self.dist = DiagGaussian(512, num_outputs) # else: # raise NotImplementedError self.train() self.reset_parameters() def reset_parameters(self): self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.linear1.weight.data.mul_(relu_gain) if self.dist.__class__.__name__ == "DiagGaussian": self.dist.fc_mean.weight.data.mul_(0.01) def encode(self, inputs): x = self.conv1(inputs)# / 255.0) # x = self.conv1_bn(self.conv1(inputs / 255.0)) # x = F.relu(x) # x = F.elu(x) # x = F.softplus(x) # x = F.tanh(x) x = self.act_func(x) x = self.conv2(x) # x = self.conv2_bn(self.conv2(x)) # x = F.relu(x) # x = F.elu(x) # x = F.softplus(x) x = self.act_func(x) x = self.conv3(x) # x = self.conv3_bn(self.conv3(x)) # x = F.relu(x) # x = F.elu(x) # x = F.softplus(x) x = self.act_func(x) x = x.view(-1, self.intermediate_size) x = self.linear1(x) return x def predict_for_action(self, inputs): # for_action = F.relu(inputs) # for_action = F.elu(inputs) # for_action = F.softplus(inputs) for_action = self.act_func(inputs) return for_action def predict_for_value(self, inputs): # x = F.relu(inputs) # x = F.elu(inputs) # x = F.softplus(inputs) x = self.act_func(inputs) for_value= self.critic_linear(x) return for_value def forward(self, inputs): x = self.encode(inputs) for_action = self.predict_for_action(x) for_value = self.predict_for_value(x) return for_value, for_action def action_dist(self, inputs): x = self.encode(inputs) for_action = self.predict_for_action(x) return self.dist.action_probs(for_action) def action_logdist(self, inputs): x = self.encode(inputs) for_action = self.predict_for_action(x) dist = self.dist.action_logprobs(for_action) return dist def act(self, inputs, deterministic=False): # print ('sss') value, x_action = self(inputs) # action = self.dist.sample(x_action, deterministic=deterministic) # action_log_probs, dist_entropy = self.dist.evaluate_actions(x_action, actions) # x_action.mean().backward() # fsadf action, action_log_probs, dist_entropy = self.dist.sample2(x_action, deterministic=deterministic) # action_log_probs.mean().backward() # fsadf # print (value) # print (action) # fdsfa return value, action, action_log_probs, dist_entropy
class CNNPolicy(nn.Module): def __init__(self, num_inputs, action_space): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.act_func = F.leaky_relu # F.tanh ## F.elu F.relu F.softplus self.linear1 = nn.Linear(32 * 7 * 7, 512) self.critic_linear = nn.Linear(512, 1) if action_space.__class__.__name__ == "Discrete": num_outputs = action_space.n self.dist = Categorical(512, num_outputs) elif action_space.__class__.__name__ == "Box": num_outputs = action_space.shape[0] self.dist = DiagGaussian(512, num_outputs) else: # raise NotImplementedError self.dist = Categorical(512, action_space) self.train() self.reset_parameters() def reset_parameters(self): self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.linear1.weight.data.mul_(relu_gain) if self.dist.__class__.__name__ == "DiagGaussian": self.dist.fc_mean.weight.data.mul_(0.01) def encode(self, inputs): # x = self.conv1(inputs / 255.0) x = self.conv1(inputs ) x = self.act_func(x) x = self.conv2(x) x = self.act_func(x) x = self.conv3(x) x = self.act_func(x) x = x.view(-1, 32 * 7 * 7) x = self.linear1(x) return x def predict_for_action(self, inputs): for_action = self.act_func(inputs) return for_action def predict_for_value(self, inputs): x = self.act_func(inputs) for_value= self.critic_linear(x) return for_value def forward(self, inputs): x = self.encode(inputs) for_action = self.predict_for_action(x) for_value = self.predict_for_value(x) return for_value, for_action def action_dist(self, inputs): x = self.encode(inputs) for_action = self.predict_for_action(x) dist = self.dist.action_probs(for_action) # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.linear1.weight)[0])) #nonzero # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv3.weight)[0])) #nonzero # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv2.weight)[0])) # ZERO # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv1.weight)[0])) # ZERO # fdsa return dist def action_logdist(self, inputs): x = self.encode(inputs) for_action = self.predict_for_action(x) dist = self.dist.action_logprobs(for_action) # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.linear1.weight)[0])) #nonzero # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv3.weight)[0])) #nonzero # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv2.weight)[0])) # ZERO # print (torch.sum(torch.autograd.grad(torch.sum(torch.log(dist)), self.conv1.weight)[0])) # ZERO # fdsa return dist def act(self, inputs, deterministic=False): value, x_action = self(inputs) # action = self.dist.sample(x_action, deterministic=deterministic) # action_log_probs, dist_entropy = self.dist.evaluate_actions(x_action, actions) # x_action.mean().backward() # fsadf action, action_log_probs, dist_entropy = self.dist.sample2(x_action, deterministic=deterministic) # action_log_probs.mean().backward() # fsadf return value, action, action_log_probs, dist_entropy