def _build_net(self): with tf.name_scope('inputs'): self.obs = tf.placeholder(tf.float32, [None, self.n_features], name="observations") self.actions = tf.placeholder(tf.int32, [ None, ], name="actions_num") self.dis_return = tf.placeholder(tf.float32, [None], name="return") # self.prediction = tf.placeholder(tf.float32, [None, ], name="actions_value") with tf.name_scope('Actor'): self.w_u = tf.Variable(tf.random_uniform( [self.n_features, self.n_actions]), dtype=tf.float32, name="w_u") self.action = tf.matmul(self.obs, self.w_u) with tf.name_scope('Critic'): self.w_v = tf.Variable(tf.random_uniform([self.n_features, 1]), dtype=tf.float32, name="w_v") self.prediction = tf.matmul(self.obs, self.w_v) # # fc1 # layer = tf.layers.dense( # inputs=self.tf_obs, # units=self.n_features, # activation=None, # tanh activation # kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.), # bias_initializer=tf.constant_initializer(0.), # name='fc1' # ) # # fc2 # all_act = tf.layers.dense( # inputs=layer, # units=self.n_actions, # activation=None, # kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.), # bias_initializer=tf.constant_initializer(0.), # name='fc2' # ) self.all_act_prob = tf.nn.softmax(self.action, name='act_prob') with tf.name_scope('loss'): # to maximize total reward (log_p * R) is to minimize -(log_p * R), and the tf only have minimize(loss) self.neg_log_prob = tf.reduce_sum( -tf.log(self.all_act_prob) * tf.one_hot(self.actions, self.n_actions), axis=1) # this is negative log of chosen action delta = self.dis_return - self.prediction loss_v = tf.reduce_mean(tf.square(delta)) # loss_u = tf.reduce_mean(self.neg_log_prob * delta) # reward guided loss with tf.name_scope('update'): self.w_u = tf.assign_add( self.w_u, self.lr_actor * delta * tf.gradient(self.neg_log_prob, self.w_u)) self.w_v = tf.assign_add( self.w_v, self.lr_critic * tf.gradients(loss_v, self.w_v))
def test_gradient(): x = 2 z = 1 y = func_y with tf.GradientTape as t: k = x - 1 t.watch(k) y = func_y(k, z) grad = tf.gradient(y, t)
def CG(b, x0, TOLERANCE=1.0e-10, MAX_ITERATIONS=100): """ A function to solve [A]{x} = {b} linear equation system with the conjugate gradient method. More at: http://en.wikipedia.org/wiki/Conjugate_gradient_method ========== Parameters ========== A : array A real symmetric positive definite matrix. In our case this will be the Hessian. We want to avoid using this, so it will not be used. We will use a finite differences to calculate H*d by calling finite_differences(x,d) where x is the current point and d is the direction of movement b : vector The right hand side (RHS) vector of the system. In our case it will be the gradient of a specific point We need to be able to calculate gradients at more than Just that one point Therefore we will pass this as a function so we can evaluate the gradient of the function at that point x0 : vector The starting guess for the solution. Anything will do. MAX_ITERATIONS : integer Maximum number of iterations. Iteration will stop after maxiter steps even if the specified tolerance has not been achieved. TOLERANCE : float Tolerance to achieve. The algorithm will terminate when either the relative or the absolute residual is below TOLERANCE. """ # Initializations x = x0 d = -tf.gradient(Loss, x) r0 = b - finite_differences(x,d) # Start iterations for i in range(MAX_ITERATIONS): a = float(np.dot(d.T, r0) / np.dot(d.T, finite_differences(x, d))) x = x - a * gradient(x) ri = r0 - np.dot(finite_differences(x, d), d) # print i, np.linalg.norm(ri) # Checks stopping condition if np.linalg.norm(ri) < TOLERANCE: return x # Otherwise go on to find new direction b = float(np.dot(gradient(x).T, finite_differences(x, d))) d = - gradient(x) + b * d r0 = ri return x
def mle_loss(B, W, mu, tau): """ Calculate maximize log-likelihood Parameters: -------- B: N by 1 vector. dtype: tf.float32 W: N by N matrix (?). dtype: tf.float32 mu: a scalar. dtype: tf.float32 tau: a scalar, same as sigma in gaussian distribution. dtype: tf.float32 """ J = tf.gradient(B, [W]) _B = tf.math.squre(B[1:] - B[:-1] - mu) - tf.math.log(tau) # TODO: multiply by Jaccobia matrix, unclear log_prob = _B * tf.linalg.det(J) neg_log_likelihood = tf.reduce_sum(log_prob) return neg_log_likelihood
def wasserstein_loss(real_scores, fake_scores): batch_size = real_scores.shape[0] avg_real_scores = tf.math.reduce_mean(real_scores) avg_fake_scores = tf.math.reduce_mean(avg_fake_scores) gen_loss = -avg_fake_score alpha = tf.random.uniform([batch_size, 1, 1, 1]) interpolated = (alpha * generated) + ((1 - alpha) * real) critic_interpolated = discriminator(interpolated) critic_gradient = tf.gradient(critic_interpolated, interpolated) norm_critic_gradient = tf.math.sqrt( tf.reduce_sum(tf.math.square(critic_gradient), [1, 2, 3])) norm_critic_center = norm_critic_gradient - 1 gradient_penalty = tf.math.square(norm_critic_center) discrim_loss = -avg_real_scores + avg_fake_scores + (gp_weight * gradient_penalty) return gen_loss, discrim_loss
def wgangp_loss(logits_real, logits_fake, batch_size, x, G_sample): """Compute the WGAN-GP loss. Inputs: - logits_real: Tensor, shape [batch_size, 1], output of discriminator Log probability that the image is real for each real image - logits_fake: Tensor, shape[batch_size, 1], output of discriminator Log probability that the image is real for each fake image - batch_size: The number of examples in this batch - x: the input (real) images for this batch - G_sample: the generated (fake) images for this batch [batch_size,784] Returns: - D_loss: discriminator loss scalar - G_loss: generator loss scalar """ # TODO: compute D_loss and G_loss D_loss = tf.reduce_mean(logits_fake-logits_real) G_loss = -tf.reduce_mean(logits_fake) # lambda from the paper lam = 10 # random sample of batch_size (tf.random_uniform) eps = tf.random_uniform([batch_size,1]) x_hat = eps * x + (1-eps)* G_sample # Gradients of Gradients is kind of tricky! with tf.variable_scope('',reuse=True) as scope: D_x_hat = discriminator(x_hat) grad_D_x_hat = tf.gradient(D_x_hat,x_hat)[0] grad_norm = tf.norm(grad_D_x_hat,axis = 1) grad_pen = lam * tf.reduce_mean((grad_norm-1)**2) D_loss += grad_pen return D_loss, G_loss
def network_mnist(images, labels, mode): #features=images,labels,mode=TEST or TRAIN # Input Layer input_layer = tf.reshape(images["x"], [-1, 28, 28, 1]) # Convolutional Layer #1 conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 5], padding="same", activation=tf.nn.relu) # Pooling Layer #1 pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) # Convolutional Layer #2 and Pooling Layer #2 conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding="same", activation=tf.nn.relu) pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) # Dense Layer pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) dense = tf.layers.dense(inputs=pool2_flat, units=128, activation=tf.nn.relu) dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN) # Logits Layer logits = tf.layers.dense(inputs=dropout, units=10) #Returns logits and representer return logits, tf.gradient(dense, input_layer)
def eval_deriv(self, bhat, b): grads = tf.gradient(self.eval(bhat, b), bhat) return grads[0]
def __init__(self, scope, globalAC): # in the __init__ , we defined some important variables to make some function # such as define loss, train_op # define some important tensor graph ... if scope == GLOBAL_NET_SCOPE: # let us make a global net with tf.variable_scope(scope): # give me some placeholders, come on ! self.s = tf.placeholder(tf.float32, [None, N_S],'S') # the network will return para according to self.s # para of action net and critic net self.a_para, self.c_para = self._build_net(scope)[-2:] else: # let us make a local worker network with tf.variable_scope(scope): # give me some placeholder to give the net # this is the input of net self.s = self.s = tf.placeholder(tf.float32, [None, N_S],'S') # this is the action from memory self.a_memory = tf.placeholder(tf.float32, [None, A_S],'A') # this is the value target of q_value self.v_target = tf.placeholder(tf.float32, [None, 1],'v_target') # the network will return para according to self.s # para of action net and critic net # mu and sigma are the output about chosen action from actio_net # mu and sigma are the parameters of a normal distribution # self.v is the value of this statement mu, sigma, self.v, self.a_para, self.c_para = self._build_net(scope) # we need self,v_target and self.v to grt c_loss td = tf.subtract(self.v_target, self.v, name ='td_error') # this is the the loss for q_learning , for the train_operation of critic_net with tf.variable_scope('c_loss'): self.c_loss = tf.reduce_mean(tf.squared(td)) with tf.variable_scope('get_action_distribution'): mu = mu*A_BOUND[1] sigma += 1e-4 normal_dist = tf.distributions.Normal(mu, sigma) with tf.variable_scope('a_loss'): # we need the action from memory to get a_loss log_prob = normal.dist.log_prob(self.a_memory) error = log_prob*td entropy = normal_dist.entropy() # encourage exploration error = ENTROPY_BETA * entropy + error self.a_loss = tf.reduce_mean(error) with tf.variable_scope('chosen_action'): # use the action_net of local net to choose action self.a = tf.clip_by_value( tf.squeeze( normal_dist.sample(1), axis = 0 ), A_BOUND[0], A_BOUND[1] ) with tf.variable_scope('local_gradient'): # get the gradient of local net # to train local network and update global network self.a_grad = tf.gradient(self.a_loss, self.a_para) self.c_grad = tf.gradient(self.c_loss, self.c_para) with tf.variable_scope('sync'): # todo with tf.variable_scope('pull'): # pull the para of global action_net to the local action_net self.pull_a_para_op = [local_para.assign(global_para) for local_para, global_para in zip(self.a_para, globalAC.a_para)] # pull the para of global critic_net to the local critic_net self.pull_c_para_op = [local_para.assign(global_para) for local_para, global_para in zip(self.c_para, globalAC.c_para)] with tf.variable_scope('push'): # push the gradients of training to the global net # use the gradients caculated from local net to train global net self.update_gradient_action_op = optimizer_action.apply_gradients(zip(self.a_grad, globalAC.a_para)) self.update_gradient_critic_op = optimizer_critic.apply_gradients(zip(self.c_para, globalAC.c_para)) def _build_net(self, scope): # to define a network structure for action_net ,critic_net in global and local network w_init = tf.random_normal_initializer(0.0, 0.1) with tf.variable_scope('actor'): # we will get some normal_distributions of action, number of distributions is N_A output_a = tf.layers.dense( self.s, 20, tf.nn.relu6, kernel_initializer = w_init, name = 'output_a' ) mu = tf.layers.dense( # get the mu of a normal distribution of action, dim of mu is N_A output_a, N_A, tf.nn.tanh, kernel_initializer = w_init, name = 'mu' ) sigma = tf.layers.dense( # get the sigma of a normal distribution of action, dim of sigma is N_A output_a, N_A, tf.nn.softplus, kernel_initializer = w_init, name = 'sigma' ) with tf.variable_scope('critic'): output_c = tf.layers.dense( self.s, 20, tf.nn.relu6, kernel_initializer = w_init, name = 'output_c' ) v = tf.layers.dense( # we get the value of this statement self.s output_c, 1, kernel_initializer = w_init, name = 'v' ) a_para = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope+'/actor') c_para = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope+'/critic') return mu, sigma, v, a_para, c_para def update_global(self, feed_dict): # push the gradients to the global net to train # to train global net using the gradiients caculated from local net SESS.run([self.update_gradient_action_op, self.update_gradient_critic_op], feed_dict) # some data is from placeholder def pull_global(self): #pull the new para from global net to local net SESS.run([self.pull_a_para_op, self.pull_c_para_op]) def choose_action(self, s): # we need the statement of this moment to caculate a action s = s[np.new.axis, :] return SESS.run(self.a, {self.s:s})[0]
tf.ones([self.batch_size, self.num_steps], dtype=tf.float32), ) self.cost = tf.reduce_sum(loss) # self.relu_out = tf.nn.relu(tf.reshape(logits, [-1, coord_size])) # #self.softmax_out = tf.nn.softmax(tf.reshape(logits, [-1, coord_size])) # self.predict = tf.cast(self.relu_out, tf.int32) correct_prediction = tf.equal(self.dense, tf.reshape(self.input_obj.targets, [-1])) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) if not is_training: return self.learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradient(self.cost, tvars), 5) optimizer = tf.train.AdamOtimizer(self.learning_rate) self.train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) ) self.new_lr = tf.placeholder(tf.float32, shape=[]) self.lr_update = tf.assign(self.learning_rate, self.new_lr) def assign_lr(self, session, lr_value): session.run(self.lr_update) def train(train_data, num_epochs, num_layer, batch_size, model_save_name, learning_rate=1.0, max_lr_epoch=10, lr_decay=0.93):
score = tf.matmul(layer1, W2) probability = tf.nn.sigmoid(score) tvars = tf.trainable_variables() input_y = tf.placeholder(name='input_y', shape=[None, 1], dtype=tf.float32) advantages = tf.placeholder(name='reward_signal', dtype=tf.float32) W1grad = tf.placeholder(name='batch_grad1', dtype=tf.float32) W2grad = tf.placeholder(name='batch_grad2', dtype=tf.float32) batchGrad = [W1grad, W2grad] loglik = tf.log(input_y*(input_y-probability) + (1-input_y)*(input_y+probability)) adam = tf.train.AdamOptimizer(learning_rate=lr) loss = -tf.reduce_mean(loglik * advantages) newGrads = tf.gradient(loss, tvars) updateGrads = adam.apply_gradients(zip(batchGrad, tvars)) """ Model Network """ h_model = 256 input_data = tf.placeholder(name='input_data', shape=[None, 5], dtype=tf.float32) with tf.variable_scope('rnnlm'): # weights and biases softmax_w = tf.get_variable('softmax_w', shape=[h_model, 50]) softmax_b = tf.get_variable('softmax_b', shape=[50]) previous_state = tf.placeholder(name='previous_state', shape=[None, 5], dtype=tf.float32) W1M = tf.get_variable(name='W1M', shape=[5, h_model], initializer=tf.contrib.layers.xavier_initializer())
def trainModel(self, on_policy=False, target=False): # sampling if on_policy: mini_batch = [self.memory[-1]] else: mini_batch = random.sample(self.memory, self.batch_size) states = [x[0] for x in mini_batch] actions = [[x[1]] for x in mini_batch] rewards = [x[2] for x in mini_batch] #next_states = [x[3] for x in mini_batch] mus = [x[3] for x in mini_batch] dones = [int(x[4]) for x in mini_batch] states = np.asarray(states) actions = np.asarray(actions) mus = np.asarray(mus) states = tf.convert_to_tensor(states, dtype=tf.float32) actions = tf.convert_to_tensor(actions, dtype=tf.int32) mus = tf.convert_to_tensor(mus, dtype=tf.float32) q = self.critic(states) pi = self.actor(states) pi_avg = self.polyak(states) #q_a = get_by_index(q, actions) # get_by_index might not work # might need to implement tf.gather another way #pi_a = get_by_index(pi, actions) a_index = tf.stack([tf.range(tf.shape(actions)[0]), actions[:, 0]], axis=-1) q_a = tf.gather_nd(q, a_index) pi_a = tf.gather_nd(pi, a_index) v = tf.reduce_sum(q * pi, axis=-1) # might need ,axis = -1 rho = pi / (mu + 1e-6) rho_a = tf.gather_nd(rho, a_index) rho_bar = tf.minimum(1.0, rho_a) print(v, '\n ############################################## \n', dones) q_ret = v[-1] * dones[-1] q_rets = [] for i in reversed(range(len(rewards))): q_ret = rewards[i] + self.gamma * q_ret q_rets.append(q_ret) q_ret = (rho_bar[i] * (q_ret - q_a[i])) + v[i] # (edit1?) need correction for when new sequence is beginning ?? q_rets.reverse() #q_ret = tf.reshape(tf.stack(values=q_rets, axis=1), [-1]) # (edit1) in reference to seq_to_batch # OpenAI baseline a2c.utils print(q_ret) print('#############################################') print(q_ret.shape) q_ret = tf.expand_dims(tf.convert_to_tensor(q_ret, dtype=tf.float32), axis=1) # adv = q_ret - v loss_f = -rho_bar * tf.log(pi_a + 1e-6) * (q_ret - v) #loss_f = tf.reduce_mean(loss_f) loss_bc = -tf.maximum((1 - c / rho), 0.0) * pi * tf.log(pi) * ( q - v) # note that tf.____ functions might need to be # tf.math.____ # might need to reshape either q or v #loss_bc = tf.reduce_mean(loss_bc) loss_q = tf.reduce_mean(tf.square(tf.stop_gradient(q_ret) - q_a) * 0.5) # (edit1) in reference t g = tf.gradients(-(loss_f + loss_bc), pi) k = pi_avg / (pi + 1e-6) #k_dot_g = tf.reduce_sum(k*g, axis=-1) grad_pi = tf.maximum(0.0, (tf.reduce_sum(k * g, axis=-1) - self.delta) / (tf.reduce_sum(tf.square(k), axis=-1) + 1e-6)) grad_pi = tf.gradient(grad_pi, self.actor.trainable_variables) grad_v = tf.gradients(loss_q, self.critic.trainable_variables) trainer_pi = tf.train.Adam(learning_rate=self.learning_rate) trainer_v = tf.train.Adam(learning_rate=self.learning_rate) trainer_pi.apply_gradients(grad_pi) trainer_v.apply_gradients(grad_v) self.update_polyak()
def __init__(self, scope, globalAC): # in the __init__ , we defined some important variables to make some function # such as define loss, train_op # define some important tensor graph ... if scope == GLOBAL_NET_SCOPE: # let us make a global net with tf.variable_scope(scope): # give me some placeholders, come on ! self.s = tf.placeholder(tf.float32, [None, N_S], 'S') # the network will return para according to self.s # para of action net and critic net self.a_para, self.c_para = self._build_net(scope)[-2:] else: # let us make a local worker network with tf.variable_scope(scope): # give me some placeholder to give the net # this is the input of net self.s = self.s = tf.placeholder(tf.float32, [None, N_S], 'S') # this is the action from memory self.a_memory = tf.placeholder(tf.float32, [None, A_S], 'A') # this is the value target of q_value self.v_target = tf.placeholder(tf.float32, [None, 1], 'v_target') # the network will return para according to self.s # para of action net and critic net # mu and sigma are the output about chosen action from actio_net # mu and sigma are the parameters of a normal distribution # self.v is the value of this statement mu, sigma, self.v, self.a_para, self.c_para = self._build_net( scope) # we need self,v_target and self.v to grt c_loss td = tf.subtract(self.v_target, self.v, name='td_error') # this is the the loss for q_learning , for the train_operation of critic_net with tf.variable_scope('c_loss'): self.c_loss = tf.reduce_mean(tf.squared(td)) with tf.variable_scope('get_action_distribution'): mu = mu * A_BOUND[1] sigma += 1e-4 normal_dist = tf.distributions.Normal(mu, sigma) with tf.variable_scope('a_loss'): # we need the action from memory to get a_loss log_prob = normal.dist.log_prob(self.a_memory) error = log_prob * td entropy = normal_dist.entropy() # encourage exploration error = ENTROPY_BETA * entropy + error self.a_loss = tf.reduce_mean(error) with tf.variable_scope('chosen_action'): # use the action_net of local net to choose action self.a = tf.clip_by_value( tf.squeeze(normal_dist.sample(1), axis=0), A_BOUND[0], A_BOUND[1]) with tf.variable_scope('local_gradient'): # get the gradient of local net # to train local network and update global network self.a_grad = tf.gradient(self.a_loss, self.a_para) self.c_grad = tf.gradient(self.c_loss, self.c_para) with tf.variable_scope('sync'): # todo pass with tf.variable_scope('pull'): # pull the para of global action_net to the local action_net self.pull_a_para_op = [ local_para.assign(global_para) for local_para, global_para in zip( self.a_para, globalAC.a_para) ] # pull the para of global critic_net to the local critic_net self.pull_c_para_op = [ local_para.assign(global_para) for local_para, global_para in zip( self.c_para, globalAC.c_para) ] with tf.variable_scope('push'): # push the gradients of training to the global net # use the gradients caculated from local net to train global net self.update_gradient_action_op = optimizer_action.apply_gradients( zip(self.a_grad, globalAC.a_para)) self.update_gradient_critic_op = optimizer_critic.apply_gradients( zip(self.c_para, globalAC.c_para))
def backward(self, input, *args, **kwargs): if self.identity: return input if self.mas: return input * tf.cast(self.mask, tf.float32) else: return tf.gradient(self, self.input, input)[0]
def backward(self, input): return tf.gradient(self, self.input, input)[0]
logits = tf.matmul(h_fc1,W_fc2) + b_fc2 #var = [noise] var = [x_noise] with tf.name_scope("cross_entropy"): #cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits,labels = y)) cost = tf.reduce_sum(tf.multiply(logits,y)) print cost tf.summary.scalar('cross entropy',cost) with tf.name_scope("train"): #k = tf.Variable() grad = tf.gradient(cost,x_noise) mean, var = tf.nn.moments(grad, axes=[0]) learning_rate = tf.reciprocal(tf.sqrt(var)) train_step = tf.train.AdamOptimizer(learning_rate).minimize(-cost,var_list=var) with tf.name_scope("accuracy"): correct_prediction = tf.equal(tf.argmax(logits,1),tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) tf.summary.scalar('accuracy',accuracy) #with tf.name_scope("test_accuracy"): # correct_prediction = tf.equal(tf.argmax(logits,1),tf.argmax(y,1)) # test_accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) # tf.summary.scalar('test_accuracy',test_accuracy)
def mle_gradient(loss, W, mu, tau, P0): return tf.gradient(loss, [W, mu, tau, P0])