def weights(shape, initializer='variance_scaling', reg_norm=True, reg_coef=0.001, name='variable'): if initializer == 'variance_scaling': init = tf.variance_scaling_initializer() else: init = tf.glorot_normal_initializer() weight = tf.Variable(init(shape), name=name) if reg_norm: l2_norm = reg_coef * tf.reduce_sum(tf.squared(weight)) return weight, l2_norm return weight
return output model = tf.keras.Sequential([ tf.keras.layers.Dense(inputs_dim), tf.keras.layers.Dense( number_of_hidden_neurons), # this row can be applied multiple times tf.keras.layers.Dense(output_dim) ]) #softmax binary cross entropy loss loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, predicted)) #mean squared error loss loss = tf.reduce_mean(tf.squared(tf.subtract(y, predicted))) loss = tf.keras.losses.MSE(y, predicted) #Gradient Descent weights = tf.Variable([tf.random.normal()]) optimizer = tf.keras.optimizers.SGD() while True: prediction = model(x) with tf.GradientTape() as g: loss = compute_loss(weights) gradient = g.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables))
def __init__(self, scope, globalAC): # in the __init__ , we defined some important variables to make some function # such as define loss, train_op # define some important tensor graph ... if scope == GLOBAL_NET_SCOPE: # let us make a global net with tf.variable_scope(scope): # give me some placeholders, come on ! self.s = tf.placeholder(tf.float32, [None, N_S],'S') # the network will return para according to self.s # para of action net and critic net self.a_para, self.c_para = self._build_net(scope)[-2:] else: # let us make a local worker network with tf.variable_scope(scope): # give me some placeholder to give the net # this is the input of net self.s = self.s = tf.placeholder(tf.float32, [None, N_S],'S') # this is the action from memory self.a_memory = tf.placeholder(tf.float32, [None, A_S],'A') # this is the value target of q_value self.v_target = tf.placeholder(tf.float32, [None, 1],'v_target') # the network will return para according to self.s # para of action net and critic net # mu and sigma are the output about chosen action from actio_net # mu and sigma are the parameters of a normal distribution # self.v is the value of this statement mu, sigma, self.v, self.a_para, self.c_para = self._build_net(scope) # we need self,v_target and self.v to grt c_loss td = tf.subtract(self.v_target, self.v, name ='td_error') # this is the the loss for q_learning , for the train_operation of critic_net with tf.variable_scope('c_loss'): self.c_loss = tf.reduce_mean(tf.squared(td)) with tf.variable_scope('get_action_distribution'): mu = mu*A_BOUND[1] sigma += 1e-4 normal_dist = tf.distributions.Normal(mu, sigma) with tf.variable_scope('a_loss'): # we need the action from memory to get a_loss log_prob = normal.dist.log_prob(self.a_memory) error = log_prob*td entropy = normal_dist.entropy() # encourage exploration error = ENTROPY_BETA * entropy + error self.a_loss = tf.reduce_mean(error) with tf.variable_scope('chosen_action'): # use the action_net of local net to choose action self.a = tf.clip_by_value( tf.squeeze( normal_dist.sample(1), axis = 0 ), A_BOUND[0], A_BOUND[1] ) with tf.variable_scope('local_gradient'): # get the gradient of local net # to train local network and update global network self.a_grad = tf.gradient(self.a_loss, self.a_para) self.c_grad = tf.gradient(self.c_loss, self.c_para) with tf.variable_scope('sync'): # todo with tf.variable_scope('pull'): # pull the para of global action_net to the local action_net self.pull_a_para_op = [local_para.assign(global_para) for local_para, global_para in zip(self.a_para, globalAC.a_para)] # pull the para of global critic_net to the local critic_net self.pull_c_para_op = [local_para.assign(global_para) for local_para, global_para in zip(self.c_para, globalAC.c_para)] with tf.variable_scope('push'): # push the gradients of training to the global net # use the gradients caculated from local net to train global net self.update_gradient_action_op = optimizer_action.apply_gradients(zip(self.a_grad, globalAC.a_para)) self.update_gradient_critic_op = optimizer_critic.apply_gradients(zip(self.c_para, globalAC.c_para)) def _build_net(self, scope): # to define a network structure for action_net ,critic_net in global and local network w_init = tf.random_normal_initializer(0.0, 0.1) with tf.variable_scope('actor'): # we will get some normal_distributions of action, number of distributions is N_A output_a = tf.layers.dense( self.s, 20, tf.nn.relu6, kernel_initializer = w_init, name = 'output_a' ) mu = tf.layers.dense( # get the mu of a normal distribution of action, dim of mu is N_A output_a, N_A, tf.nn.tanh, kernel_initializer = w_init, name = 'mu' ) sigma = tf.layers.dense( # get the sigma of a normal distribution of action, dim of sigma is N_A output_a, N_A, tf.nn.softplus, kernel_initializer = w_init, name = 'sigma' ) with tf.variable_scope('critic'): output_c = tf.layers.dense( self.s, 20, tf.nn.relu6, kernel_initializer = w_init, name = 'output_c' ) v = tf.layers.dense( # we get the value of this statement self.s output_c, 1, kernel_initializer = w_init, name = 'v' ) a_para = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope+'/actor') c_para = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope+'/critic') return mu, sigma, v, a_para, c_para def update_global(self, feed_dict): # push the gradients to the global net to train # to train global net using the gradiients caculated from local net SESS.run([self.update_gradient_action_op, self.update_gradient_critic_op], feed_dict) # some data is from placeholder def pull_global(self): #pull the new para from global net to local net SESS.run([self.pull_a_para_op, self.pull_c_para_op]) def choose_action(self, s): # we need the statement of this moment to caculate a action s = s[np.new.axis, :] return SESS.run(self.a, {self.s:s})[0]
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y,labels=y_)) training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function) sess = tf.Session() sess.run(init) mse_history = [] accuracy_history = [] for epochs in range(training_epochs): sess.run(training_step,feed_dict = {x:train_x,y_:train_y}) cost = sess.run(cost_function,feed_dict={x:train_x,y_:train_y}) cost_history = np.append(cost_history,cost) correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) #print('Accuracy: ',(sess.run(accuracy,feed_dict={xx:test_x,y_:test_y}))) pred_y = sess.run(y,feed_dict={x:test_x}) mse = tf.reduce_mean(tf.squared(pred_y - test_y)) mse_=sess.run(mse) mse_history.append(mse_) accuracy = (sess.run(accuracy,feed_dict={x:train_x,y_:train_y})) accuracy_history.append(accuracy) print('epoch : ',epochs,' - cost: ',cost,' - Mse: ',mse_,'- train accuracy',accuracy) save_path = saver.save(sess,model_path) print('Model saved in file: %s'%save_path) plt.plot(mse_history,'r') plt.show() plt.plot(accuracy_history)
def __init__(self, scope, globalAC): # in the __init__ , we defined some important variables to make some function # such as define loss, train_op # define some important tensor graph ... if scope == GLOBAL_NET_SCOPE: # let us make a global net with tf.variable_scope(scope): # give me some placeholders, come on ! self.s = tf.placeholder(tf.float32, [None, N_S], 'S') # the network will return para according to self.s # para of action net and critic net self.a_para, self.c_para = self._build_net(scope)[-2:] else: # let us make a local worker network with tf.variable_scope(scope): # give me some placeholder to give the net # this is the input of net self.s = self.s = tf.placeholder(tf.float32, [None, N_S], 'S') # this is the action from memory self.a_memory = tf.placeholder(tf.float32, [None, A_S], 'A') # this is the value target of q_value self.v_target = tf.placeholder(tf.float32, [None, 1], 'v_target') # the network will return para according to self.s # para of action net and critic net # mu and sigma are the output about chosen action from actio_net # mu and sigma are the parameters of a normal distribution # self.v is the value of this statement mu, sigma, self.v, self.a_para, self.c_para = self._build_net( scope) # we need self,v_target and self.v to grt c_loss td = tf.subtract(self.v_target, self.v, name='td_error') # this is the the loss for q_learning , for the train_operation of critic_net with tf.variable_scope('c_loss'): self.c_loss = tf.reduce_mean(tf.squared(td)) with tf.variable_scope('get_action_distribution'): mu = mu * A_BOUND[1] sigma += 1e-4 normal_dist = tf.distributions.Normal(mu, sigma) with tf.variable_scope('a_loss'): # we need the action from memory to get a_loss log_prob = normal.dist.log_prob(self.a_memory) error = log_prob * td entropy = normal_dist.entropy() # encourage exploration error = ENTROPY_BETA * entropy + error self.a_loss = tf.reduce_mean(error) with tf.variable_scope('chosen_action'): # use the action_net of local net to choose action self.a = tf.clip_by_value( tf.squeeze(normal_dist.sample(1), axis=0), A_BOUND[0], A_BOUND[1]) with tf.variable_scope('local_gradient'): # get the gradient of local net # to train local network and update global network self.a_grad = tf.gradient(self.a_loss, self.a_para) self.c_grad = tf.gradient(self.c_loss, self.c_para) with tf.variable_scope('sync'): # todo pass with tf.variable_scope('pull'): # pull the para of global action_net to the local action_net self.pull_a_para_op = [ local_para.assign(global_para) for local_para, global_para in zip( self.a_para, globalAC.a_para) ] # pull the para of global critic_net to the local critic_net self.pull_c_para_op = [ local_para.assign(global_para) for local_para, global_para in zip( self.c_para, globalAC.c_para) ] with tf.variable_scope('push'): # push the gradients of training to the global net # use the gradients caculated from local net to train global net self.update_gradient_action_op = optimizer_action.apply_gradients( zip(self.a_grad, globalAC.a_para)) self.update_gradient_critic_op = optimizer_critic.apply_gradients( zip(self.c_para, globalAC.c_para))
def f1():return tf.reduce_mean(tf.squared(x-y)) def f2():return tf.reduce_sum(tf.abs(x-y))