Пример #1
0
def weights(shape, initializer='variance_scaling', reg_norm=True, reg_coef=0.001, name='variable'):
    if initializer == 'variance_scaling':
        init = tf.variance_scaling_initializer()
    else:
        init = tf.glorot_normal_initializer()
    weight = tf.Variable(init(shape), name=name)

    if reg_norm:
        l2_norm = reg_coef * tf.reduce_sum(tf.squared(weight))
        return weight, l2_norm

    return weight
Пример #2
0
        return output


model = tf.keras.Sequential([
    tf.keras.layers.Dense(inputs_dim),
    tf.keras.layers.Dense(
        number_of_hidden_neurons),  # this row can be applied multiple times
    tf.keras.layers.Dense(output_dim)
])

#softmax binary cross entropy loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, predicted))

#mean squared error loss
loss = tf.reduce_mean(tf.squared(tf.subtract(y, predicted)))
loss = tf.keras.losses.MSE(y, predicted)

#Gradient Descent
weights = tf.Variable([tf.random.normal()])
optimizer = tf.keras.optimizers.SGD()

while True:

    prediction = model(x)

    with tf.GradientTape() as g:
        loss = compute_loss(weights)

    gradient = g.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
Пример #3
0
	def __init__(self, scope, globalAC):

		# in the __init__ , we defined some important variables to make some function
		# such as define loss, train_op
		# define some important tensor graph ...

		if scope == GLOBAL_NET_SCOPE: # let us make a global net

			with tf.variable_scope(scope):

				# give me some placeholders, come on !
				self.s = tf.placeholder(tf.float32, [None, N_S],'S')

				# the network will return para according to self.s
				# para of action net and critic net
				self.a_para, self.c_para = self._build_net(scope)[-2:]

		else: # let us make a local worker network
			with tf.variable_scope(scope):

				# give me some placeholder to give the net

				# this is the input of net
				self.s = self.s = tf.placeholder(tf.float32, [None, N_S],'S')

				# this is the action from memory
				self.a_memory = tf.placeholder(tf.float32, [None, A_S],'A')

				# this is the value target of q_value
				self.v_target = tf.placeholder(tf.float32, [None, 1],'v_target')

				# the network will return para according to self.s
				# para of action net and critic net
				# mu and sigma are the output about chosen action from actio_net
				# mu and sigma are the parameters of a normal distribution
				# self.v is the value of this statement
				mu, sigma, self.v, self.a_para, self.c_para = self._build_net(scope)

				# we need self,v_target and self.v to grt c_loss 
				td = tf.subtract(self.v_target, self.v, name ='td_error')
				# this is the the loss for q_learning , for the train_operation of critic_net

				with tf.variable_scope('c_loss'):
					self.c_loss = tf.reduce_mean(tf.squared(td))


				with tf.variable_scope('get_action_distribution'):
					mu = mu*A_BOUND[1]
					sigma += 1e-4
					normal_dist = tf.distributions.Normal(mu, sigma)


				with tf.variable_scope('a_loss'):
					# we need the action from memory to get a_loss
					log_prob = normal.dist.log_prob(self.a_memory)

					error = log_prob*td

					entropy = normal_dist.entropy() # encourage exploration

					error = ENTROPY_BETA * entropy + error

					self.a_loss = tf.reduce_mean(error)

				with tf.variable_scope('chosen_action'):
					# use the action_net of local net to choose action
					self.a = tf.clip_by_value(
											tf.squeeze(
														normal_dist.sample(1),
														axis = 0
														),
											A_BOUND[0],
											A_BOUND[1]
											)

				with tf.variable_scope('local_gradient'):
					# get the gradient of local net
					# to train local network and update global network
					self.a_grad = tf.gradient(self.a_loss, self.a_para)
					self.c_grad = tf.gradient(self.c_loss, self.c_para)

				with tf.variable_scope('sync'):
					# todo


				with tf.variable_scope('pull'):
					# pull the para of global action_net to the local action_net
					self.pull_a_para_op = [local_para.assign(global_para) for local_para, global_para in zip(self.a_para, globalAC.a_para)]

					# pull the para of global critic_net to the local critic_net
					self.pull_c_para_op = [local_para.assign(global_para) for local_para, global_para in zip(self.c_para, globalAC.c_para)]


				with tf.variable_scope('push'):
					# push the gradients of training to the global net
					# use the gradients caculated from local net to train global net

					self.update_gradient_action_op = optimizer_action.apply_gradients(zip(self.a_grad, globalAC.a_para))
					self.update_gradient_critic_op = optimizer_critic.apply_gradients(zip(self.c_para, globalAC.c_para))



	def _build_net(self, scope):
		# to define a network structure for action_net ,critic_net in global and local network
		w_init = tf.random_normal_initializer(0.0, 0.1)

		with tf.variable_scope('actor'):
			# we will get some normal_distributions of action, number of distributions is N_A
			output_a = tf.layers.dense(
										self.s,
										20,
										tf.nn.relu6,
										kernel_initializer = w_init,
										name = 'output_a'
										)

			mu = tf.layers.dense(  # get the mu of a normal distribution of action, dim of mu is N_A
									output_a,
									N_A,
									tf.nn.tanh,
									kernel_initializer = w_init,
									name = 'mu'
								)

			sigma = tf.layers.dense( # get the sigma of a normal distribution of action, dim of sigma is N_A
									output_a,
									N_A,
									tf.nn.softplus,
									kernel_initializer = w_init,
									name = 'sigma'
								)

		with tf.variable_scope('critic'):
			output_c = tf.layers.dense(
										self.s,
										20,
										tf.nn.relu6,
										kernel_initializer = w_init,
										name = 'output_c'
										)

			v = tf.layers.dense(  # we get the value of this statement self.s
								output_c,
								1,
								kernel_initializer = w_init,
								name = 'v'
								)

		a_para = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope+'/actor')
		c_para = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = scope+'/critic')

		return mu, sigma, v, a_para, c_para

 
	def update_global(self, feed_dict): # push the gradients to the global net to train
		# to train global net using the gradiients caculated from local net

		SESS.run([self.update_gradient_action_op, self.update_gradient_critic_op], feed_dict)
		# some data is from placeholder

	def pull_global(self): #pull the new para from global net to local net
		SESS.run([self.pull_a_para_op, self.pull_c_para_op])

	def choose_action(self, s):
		# we need the statement of this moment to caculate a action
		s = s[np.new.axis, :]

		return SESS.run(self.a, {self.s:s})[0]
Пример #4
0
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y,labels=y_))
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
sess = tf.Session()
sess.run(init)
mse_history = []
accuracy_history = []

for epochs in range(training_epochs):
    sess.run(training_step,feed_dict = {x:train_x,y_:train_y})
    cost = sess.run(cost_function,feed_dict={x:train_x,y_:train_y})
    cost_history = np.append(cost_history,cost)
    correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    #print('Accuracy: ',(sess.run(accuracy,feed_dict={xx:test_x,y_:test_y})))
    pred_y = sess.run(y,feed_dict={x:test_x})
    mse = tf.reduce_mean(tf.squared(pred_y - test_y))
    mse_=sess.run(mse)
    mse_history.append(mse_)
    accuracy = (sess.run(accuracy,feed_dict={x:train_x,y_:train_y}))
    accuracy_history.append(accuracy)
    print('epoch : ',epochs,' - cost: ',cost,' - Mse: ',mse_,'- train accuracy',accuracy)
    
save_path = saver.save(sess,model_path)
print('Model saved in file: %s'%save_path)
plt.plot(mse_history,'r')
plt.show()
plt.plot(accuracy_history)
    


Пример #5
0
    def __init__(self, scope, globalAC):

        # in the __init__ , we defined some important variables to make some function
        # such as define loss, train_op
        # define some important tensor graph ...

        if scope == GLOBAL_NET_SCOPE:  # let us make a global net

            with tf.variable_scope(scope):

                # give me some placeholders, come on !
                self.s = tf.placeholder(tf.float32, [None, N_S], 'S')

                # the network will return para according to self.s
                # para of action net and critic net
                self.a_para, self.c_para = self._build_net(scope)[-2:]

        else:  # let us make a local worker network
            with tf.variable_scope(scope):

                # give me some placeholder to give the net

                # this is the input of net
                self.s = self.s = tf.placeholder(tf.float32, [None, N_S], 'S')

                # this is the action from memory
                self.a_memory = tf.placeholder(tf.float32, [None, A_S], 'A')

                # this is the value target of q_value
                self.v_target = tf.placeholder(tf.float32, [None, 1],
                                               'v_target')

                # the network will return para according to self.s
                # para of action net and critic net
                # mu and sigma are the output about chosen action from actio_net
                # mu and sigma are the parameters of a normal distribution
                # self.v is the value of this statement
                mu, sigma, self.v, self.a_para, self.c_para = self._build_net(
                    scope)

                # we need self,v_target and self.v to grt c_loss
                td = tf.subtract(self.v_target, self.v, name='td_error')
                # this is the the loss for q_learning , for the train_operation of critic_net

                with tf.variable_scope('c_loss'):
                    self.c_loss = tf.reduce_mean(tf.squared(td))

                with tf.variable_scope('get_action_distribution'):
                    mu = mu * A_BOUND[1]
                    sigma += 1e-4
                    normal_dist = tf.distributions.Normal(mu, sigma)

                with tf.variable_scope('a_loss'):
                    # we need the action from memory to get a_loss
                    log_prob = normal.dist.log_prob(self.a_memory)

                    error = log_prob * td

                    entropy = normal_dist.entropy()  # encourage exploration

                    error = ENTROPY_BETA * entropy + error

                    self.a_loss = tf.reduce_mean(error)

                with tf.variable_scope('chosen_action'):
                    # use the action_net of local net to choose action
                    self.a = tf.clip_by_value(
                        tf.squeeze(normal_dist.sample(1), axis=0), A_BOUND[0],
                        A_BOUND[1])

                with tf.variable_scope('local_gradient'):
                    # get the gradient of local net
                    # to train local network and update global network
                    self.a_grad = tf.gradient(self.a_loss, self.a_para)
                    self.c_grad = tf.gradient(self.c_loss, self.c_para)

                with tf.variable_scope('sync'):
                    # todo
                    pass

                with tf.variable_scope('pull'):
                    # pull the para of global action_net to the local action_net
                    self.pull_a_para_op = [
                        local_para.assign(global_para)
                        for local_para, global_para in zip(
                            self.a_para, globalAC.a_para)
                    ]

                    # pull the para of global critic_net to the local critic_net
                    self.pull_c_para_op = [
                        local_para.assign(global_para)
                        for local_para, global_para in zip(
                            self.c_para, globalAC.c_para)
                    ]

                with tf.variable_scope('push'):
                    # push the gradients of training to the global net
                    # use the gradients caculated from local net to train global net

                    self.update_gradient_action_op = optimizer_action.apply_gradients(
                        zip(self.a_grad, globalAC.a_para))
                    self.update_gradient_critic_op = optimizer_critic.apply_gradients(
                        zip(self.c_para, globalAC.c_para))
def f1():return tf.reduce_mean(tf.squared(x-y))
def f2():return tf.reduce_sum(tf.abs(x-y))