def __init__(self, state_dim, action_dim, name="critic"): """ Initialize critic network. The critic network maintains a copy of itself and target updating ops Args state_dim: dimension of input space, if is length one, we assume it is low dimension. action_dim: dimension of action space. """ super(CriticNetwork, self).__init__(state_dim, action_dim, name=name) self.update_op = self.create_update_op() # online critic self.network, self.state, self.action = self.network #target critic self.target_network, self.target_state, self.target_action = self.target_network # for critic network, the we need one more input variable: y to compute the loss # this input variable is fed by: r + gamma * target(s_t+1, action(s_t+1)) self.y = tf.placeholder(tf.float32, shape=None, name="target_q") self.mean_loss = tf.reduce_mean( tf.squared_difference(self.y, self.network)) self.loss = tf.squared_difference(self.y, self.network) # get gradients self.gradients = self.compute_gradient() # get action gradients self.action_gradient = self.compute_action_gradient() self.train = self.create_train_op()
def embedding_lookup(self, x, means): """Compute nearest neighbors and loss for training the embeddings. Args: x: Batch of encoder continuous latent states sliced/projected into shape [-1, num_blocks, block_dim]. means: Embedding means. Returns: The nearest neighbor in one hot form, the nearest neighbor itself, the commitment loss, embedding training loss. """ x_means_hot = self.nearest_neighbor(x, means) x_means_hot_flat = tf.reshape( x_means_hot, [-1, self.hparams.num_blocks, self.hparams.block_v_size]) x_means = tf.matmul(tf.transpose(x_means_hot_flat, perm=[1, 0, 2]), means) x_means = tf.transpose(x_means, [1, 0, 2]) q_loss = tf.reduce_mean( tf.squared_difference(tf.stop_gradient(x), x_means)) e_loss = tf.reduce_mean( tf.squared_difference(x, tf.stop_gradient(x_means))) return x_means_hot, x_means, q_loss, e_loss
def mean_squared_error(output, target, is_mean=False): """Return the TensorFlow expression of mean-squre-error of two distributions. Parameters ---------- output : 2D or 4D tensor. target : 2D or 4D tensor. is_mean : boolean, if True, use ``tf.reduce_mean`` to compute the loss of one data, otherwise, use ``tf.reduce_sum`` (default). References ------------ - `Wiki Mean Squared Error <https://en.wikipedia.org/wiki/Mean_squared_error>`_ """ with tf.name_scope("mean_squared_error_loss"): if output.get_shape().ndims == 2: # [batch_size, n_feature] if is_mean: mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), 1)) else: mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), 1)) elif output.get_shape().ndims == 4: # [batch_size, w, h, c] if is_mean: mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2, 3])) else: mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), [1, 2, 3])) return mse
def _build_net(self): """ Build the neuron network """ # ------------------ build evaluate_net ------------------ self._s = tf.placeholder(tf.float32, [None, self._n_features], name='s') # input self._q_target11 = tf.placeholder(tf.float32, [None, self._n_actions[0]], name='Q_target_11') self._q_target12 = tf.placeholder(tf.float32, [None, self._n_actions[1]], name='Q_target_12') self._q_target21 = tf.placeholder(tf.float32, [None, self._n_actions[2]], name='Q_target_21') self._q_target22 = tf.placeholder(tf.float32, [None, self._n_actions[3]], name='Q_target_22') with tf.variable_scope('eval_net'): # c_names(collections_names) are the collections to store variables c_names, w_initializer, b_initializer = ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], \ tf.random_normal_initializer(0., 0.3), tf.constant_initializer( 0.1) # config of layers n_l1, n_l2, n_l3, n_l4, n_l5, n_l6, n_l7, n_l8, n_l9 = 180, 360, 720, 910, 720, 288, 72, 36, 18 l11_8, l12_8, l21_8, l22_8 = self.build_sub_network(w_initializer, b_initializer, c_names, n_l1, n_l2, n_l3, n_l4, n_l5, n_l6, n_l7, n_l8, n_l9) self._q_eval11, self._q_eval12, self._q_eval21, self._q_eval22 = \ self.build_output_net(w_initializer, b_initializer, c_names, l11_8, l12_8, l21_8, l22_8, n_l9) with tf.variable_scope('loss'): self._loss11 = tf.reduce_mean(tf.squared_difference(self._q_target11, self._q_eval11)) self._loss12 = tf.reduce_mean(tf.squared_difference(self._q_target12, self._q_eval12)) self._loss21 = tf.reduce_mean(tf.squared_difference(self._q_target21, self._q_eval21)) self._loss22 = tf.reduce_mean(tf.squared_difference(self._q_target22, self._q_eval22)) if self._output_tensorboard: tf.summary.scalar('loss11', self._loss11) tf.summary.scalar('loss12', self._loss12) tf.summary.scalar('loss21', self._loss21) tf.summary.scalar('loss22', self._loss22) with tf.variable_scope('train'): self._train_op11 = tf.train.AdamOptimizer(self._lr).minimize(self._loss11) self._train_op12 = tf.train.AdamOptimizer(self._lr).minimize(self._loss12) self._train_op21 = tf.train.AdamOptimizer(self._lr).minimize(self._loss21) self._train_op22 = tf.train.AdamOptimizer(self._lr).minimize(self._loss22) # ------------------ build target_net ------------------ self._s_ = tf.placeholder(tf.float32, [None, self._n_features], name='s_') # input with tf.variable_scope('target_net'): # c_names(collections_names) are the collections to store variables c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] l11_8, l12_8, l21_8, l22_8 = self.build_sub_network(w_initializer, b_initializer, c_names, n_l1, n_l2, n_l3, n_l4, n_l5, n_l6, n_l7, n_l8, n_l9) self._q_next11, self._q_next12, self._q_next21, self._q_next22 = \ self.build_output_net(w_initializer, b_initializer, c_names, l11_8, l12_8, l21_8, l22_8, n_l9)
def _build_net(self): def build_layers(s, c_names, n_l1, w_initializer, b_initializer): with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(s, w1) + b1) with tf.variable_scope('Q'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) out = tf.matmul(l1, w2) + b2 return out # -------------- 创建 eval 神经网络, 及时提升参数 -------------- self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # 用来接收 observation self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # 用来接收 q_target 的值, 这个之后会通过计算得到 # c_names(collections_names) 是在更新 target_net 参数时会用到 #定义W,b的初始值 #############################prioritized#################################################### if self.prioritized: self.ISWeights = tf.placeholder(tf.float32, [None, 1], name='IS_weights')#重要性采样权重 #############################prioritized#################################################### with tf.variable_scope('eval_net'): c_names, n_l1, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 10, \ tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer) with tf.variable_scope('loss'): # 求误差 #############################prioritized#################################################### if self.prioritized: self.abs_errors = tf.reduce_sum(tf.abs(self.q_target - self.q_eval), axis=1) # for updating Sumtree self.loss = tf.reduce_mean(self.ISWeights * tf.squared_difference(self.q_target, self.q_eval))#定义一个w乘在 loss 前,来根据抽到的概率改变 loss 的缩放程度。 #############################prioritized#################################################### else: self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): # 梯度下降 self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) # ---------------- 创建 target 神经网络, 提供 target Q --------------------- self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # 接收下个 observation with tf.variable_scope('target_net'): #c_names(collections_names) 是在更新 target_net 参数时会用到 c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] self.q_next = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer)
def GMM_M_Step(X, Gama, ClusterNo, name='GMM_Statistics', **kwargs): D, h, s = tf.split(X, [1,1,1], axis=3) WXd = tf.multiply(Gama, tf.tile(D ,[1,1,1,ClusterNo])) WXa = tf.multiply(Gama, tf.tile(h ,[1,1,1,ClusterNo])) WXb = tf.multiply(Gama, tf.tile(s ,[1,1,1,ClusterNo])) S = tf.reduce_sum(tf.reduce_sum(Gama, axis=1), axis=1) S = tf.add(S, tensorflow.keras.backend.epsilon()) S = tf.reshape(S,[1, ClusterNo]) M_d = tf.div(tf.reduce_sum(tf.reduce_sum(WXd, axis=1), axis=1) , S) M_a = tf.div(tf.reduce_sum(tf.reduce_sum(WXa, axis=1), axis=1) , S) M_b = tf.div(tf.reduce_sum(tf.reduce_sum(WXb, axis=1), axis=1) , S) Mu = tf.split(tf.concat([M_d, M_a, M_b],axis=0), ClusterNo, 1) Norm_d = tf.squared_difference(D, tf.reshape(M_d,[1, ClusterNo])) Norm_h = tf.squared_difference(h, tf.reshape(M_a,[1, ClusterNo])) Norm_s = tf.squared_difference(s, tf.reshape(M_b,[1, ClusterNo])) WSd = tf.multiply(Gama, Norm_d) WSh = tf.multiply(Gama, Norm_h) WSs = tf.multiply(Gama, Norm_s) S_d = tf.sqrt(tf.div(tf.reduce_sum(tf.reduce_sum(WSd, axis=1), axis=1) , S)) S_h = tf.sqrt(tf.div(tf.reduce_sum(tf.reduce_sum(WSh, axis=1), axis=1) , S)) S_s = tf.sqrt(tf.div(tf.reduce_sum(tf.reduce_sum(WSs, axis=1), axis=1) , S)) Std = tf.split(tf.concat([S_d, S_h, S_s],axis=0), ClusterNo, 1) dist = list() for k in range(0, ClusterNo): dist = tfp.distributions.MultivariateNormalDiag(tf.reshape(Mu[k],[1,3]), tf.reshape(Std[k],[1,3])) PI = tf.split(Gama, ClusterNo, axis=3) Prob0 = list() ds = tf.expand_dims(dataset_tf(X), -2) for k in range(0, ClusterNo): Prob0.append(tf.multiply(tf.squeeze(dist.prob(ds[:, 0, :])), tf.squeeze(PI[k]))) Prob = tf.convert_to_tensor(Prob0, dtype=tf.float32) Prob = tf.minimum(tf.add(tf.reduce_sum(Prob, axis=0), tensorflow.keras.backend.epsilon()), tf.constant(1.0, tf.float32)) Log_Prob = tf.negative(tf.log(Prob)) Log_Likelihood = tf.reduce_mean(Log_Prob) return Log_Likelihood, Mu, Std
def tfmodel(x, y): W = tf.Variable(5.) b = tf.Variable(5.) pred = W * x + b cost = tf.squared_difference(pred, y) return pred, cost
def discriminator_loss(type, real, fake): n_scale = len(real) loss = [] real_loss = 0 fake_loss = 0 for i in range(n_scale): if type == 'lsgan': real_loss = tf.reduce_mean(tf.squared_difference(real[i], 1.0)) fake_loss = tf.reduce_mean(tf.square(fake[i])) if type == 'gan': real_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like( real[i]), logits=real[i])) fake_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like( fake[i]), logits=fake[i])) loss.append(real_loss + fake_loss) return sum(loss)
def _build_model(self): """ Build the MDN Model""" self.x_holder = tf.placeholder(tf.float32, [self.batch_size, self.num_steps, 1 ], name="x") self.y_holder = tf.placeholder(tf.float32, [self.batch_size, self.num_steps, 1], name="y") multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.LSTMCell(self.rnn_size) for _ in range(self.num_layers)], state_is_tuple=True) self.init_state = multi_rnn_cell.zero_state(self.batch_size, tf.float32) rnn_outputs, self.final_state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, inputs=self.x_holder, initial_state=self.init_state) w1 = tf.get_variable('w1', shape=[self.rnn_size, self.hidden_size], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.2)) b1 = tf.get_variable('b1', shape=[self.hidden_size], dtype=tf.float32, initializer=tf.constant_initializer()) h1 = tf.nn.sigmoid(tf.matmul(tf.reshape(rnn_outputs, [-1, self.rnn_size]), w1) + b1) w2 = tf.get_variable('w2', shape=[self.hidden_size, 1], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.2)) b2 = tf.get_variable('b2', shape=[1], dtype=tf.float32, initializer=tf.constant_initializer()) output_fc = tf.matmul(h1, w2) + b2 self.preds = tf.reshape(output_fc, [self.batch_size, self.num_steps, 1]) # self.final_c_state = final_state.c # self.final_h_state = final_state.h if self.is_training: self.optimizer = tf.train.AdamOptimizer() self.loss = tf.reduce_mean(tf.squared_difference(self.preds, self.y_holder)) self.train_op = self.optimizer.minimize(self.loss)
def __init__(self): self.states_ph = tf.placeholder(tf.float32, (None, state_dim)) self.actions_ph = tf.placeholder(tf.int32, (None, )) self.rewards_ph = tf.placeholder(tf.float32, (None, 1)) self.next_states_ph = tf.placeholder(tf.float32, (None, state_dim)) # done标志对next_q_value的处理放在了网络外,所以要传入 self.next_q_values_ph = tf.placeholder(tf.float32, (None, action_num)) # ———————— 神经网络定义 ———————— # with tf.variable_scope('main'): # q=f(s) layer = tf.layers.dense(self.states_ph, 20, tf.nn.relu) self.q_values = tf.layers.dense(layer, action_num, None) with tf.variable_scope('target'): # 由target_net负责计算next_q_value layer = tf.layers.dense(self.next_states_ph, 20, tf.nn.relu) self.next_q_values = tf.layers.dense(layer, action_num, None) # ———————— 训练更新定义 ———————— # q_target=r+gamma*max_a(q(s',a')) q_target = tf.stop_gradient(self.rewards_ph[0] + gamma * tf.reduce_max(self.next_q_values_ph[0], axis=0)) loss = tf.reduce_mean(tf.squared_difference(q_target, self.q_values[0][self.actions_ph[0]])) self.optimizer = tf.train.AdamOptimizer(lr).minimize(loss) main_vars = [var for var in tf.global_variables() if 'main' in var.name] target_vars = [var for var in tf.global_variables() if 'target' in var.name] self.target_update = [tf.assign(main_var, target_var) for main_var, target_var in zip(main_vars, target_vars)] self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def __init__(self, state_size, learning_rate, name='critic'): self.state_size = state_size self.learning_rate = learning_rate with tf.variable_scope(name): self.state = tf.placeholder(tf.float32, [None, self.state_size], name="state") self.R_t = tf.placeholder(tf.float32, name="total_rewards") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") self.W1 = tf.get_variable( "W1", [self.state_size, 12], initializer=tensorflow.initializers.variance_scaling(seed=0)) self.b1 = tf.get_variable("b1", [12], initializer=tf.zeros_initializer()) self.W2 = tf.get_variable( "W2", [12, 1], initializer=tensorflow.initializers.variance_scaling(seed=0)) self.b2 = tf.get_variable("b2", [1], initializer=tf.zeros_initializer()) self.Z1 = tf.add(tf.matmul(self.state, self.W1), self.b1) self.A1 = tf.nn.relu(self.Z1) self.output = tf.add(tf.matmul(self.A1, self.W2), self.b2) self.square_loss = tf.squared_difference(tf.squeeze(self.output), self.R_t) tvars = tf.trainable_variables() # trainable_vars = [var for var in tvars if '2' in var.name] trainable_vars = tvars self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize( self.square_loss, var_list=trainable_vars)
def training_losses(self, denoise_fn, x_start, t, noise=None): """ Training loss calculation """ # Add noise to data assert t.shape == [x_start.shape[0]] if noise is None: noise = tf.random_normal(shape=x_start.shape, dtype=x_start.dtype) assert noise.shape == x_start.shape and noise.dtype == x_start.dtype x_t = self.q_sample(x_start=x_start, t=t, noise=noise) # Calculate the loss if self.loss_type == 'kl': # the variational bound losses = self._vb_terms_bpd( denoise_fn=denoise_fn, x_start=x_start, x_t=x_t, t=t, clip_denoised=False, return_pred_xstart=False) elif self.loss_type == 'mse': # unweighted MSE assert self.model_var_type != 'learned' target = { 'xprev': self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t)[0], 'xstart': x_start, 'eps': noise }[self.model_mean_type] model_output = denoise_fn(x_t, t) assert model_output.shape == target.shape == x_start.shape losses = nn.meanflat(tf.squared_difference(target, model_output)) else: raise NotImplementedError(self.loss_type) assert losses.shape == t.shape return losses
def log_prob_fn(params): rho, alpha, sigma = tf.split(params, [num_features, 1, 1], -1) one = tf.ones(num_features) def indep(d): return tfd.Independent(d, 1) p_rho = indep(tfd.InverseGamma(5. * one, 5. * one)) p_alpha = indep(tfd.HalfNormal([1.])) p_sigma = indep(tfd.HalfNormal([1.])) rho_shape = tf.shape(rho) alpha_shape = tf.shape(alpha) x1 = tf.expand_dims(x, -2) x2 = tf.expand_dims(x, -3) exp = -0.5 * tf.squared_difference(x1, x2) exp /= tf.reshape(tf.square(rho), tf.concat([rho_shape[:1], [1, 1], rho_shape[1:]], 0)) exp = tf.reduce_sum(exp, -1, keep_dims=True) exp += 2. * tf.reshape(tf.log(alpha), tf.concat([alpha_shape[:1], [1, 1], alpha_shape[1:]], 0)) exp = tf.exp(exp[Ellipsis, 0]) exp += tf.matrix_diag(tf.tile(tf.square(sigma), [1, int(x.shape[0])]) + 1e-6) exp = tf.check_numerics(exp, "exp 2 has NaNs") with tf.control_dependencies([tf.print(exp[0], summarize=99999)]): exp = tf.identity(exp) p_y = tfd.MultivariateNormalFullCovariance( covariance_matrix=exp) log_prob = ( p_rho.log_prob(rho) + p_alpha.log_prob(alpha) + p_sigma.log_prob(sigma) + p_y.log_prob(y)) return log_prob
def _build_net(self): # ------------------ all inputs ------------------------ tf.compat.v1.disable_eager_execution() self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input State self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input Next State self.r = tf.placeholder(tf.float32, [None, ], name='r') # input Reward self.a = tf.placeholder(tf.int32, [None, ], name='a') # input Action w_initializer, b_initializer = tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # ------------------ build evaluate_net ------------------ with tf.variable_scope('eval_net'): e1 = tf.layers.dense(self.s, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='e1') self.q_eval = tf.layers.dense(e1, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='q') # ------------------ build target_net ------------------ with tf.variable_scope('target_net'): t1 = tf.layers.dense(self.s_, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='t1') self.q_next = tf.layers.dense(t1, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='t2') with tf.variable_scope('q_target'): q_target = self.r + self.gamma * tf.reduce_max(self.q_next, axis=1, name='Qmax_s_') # shape=(None, ) self.q_target = tf.stop_gradient(q_target) with tf.variable_scope('q_eval'): a_indices = tf.stack([tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a], axis=1) self.q_eval_wrt_a = tf.gather_nd(params=self.q_eval, indices=a_indices) # shape=(None, ) with tf.variable_scope('loss'): self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval_wrt_a, name='TD_error')) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
def tower_loss(self, x, y_, z_): y_conv, z_conv = self.construct_net(x) # Cast the nn result back to fp32 to avoid loss overflow/underflow if self.model_dtype != tf.float32: y_conv = tf.cast(y_conv, tf.float32) z_conv = tf.cast(z_conv, tf.float32) # Calculate loss on policy head cross_entropy = \ tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv) policy_loss = tf.reduce_mean(cross_entropy) # Loss on value head mse_loss = \ tf.reduce_mean(tf.squared_difference(z_, z_conv)) # Regularizer reg_variables = tf.get_collection(tf.GraphKeys.WEIGHTS) reg_term = self.l2_scale * tf.add_n( [tf.cast(tf.nn.l2_loss(v), tf.float32) for v in reg_variables]) # For training from a (smaller) dataset of strong players, you will # want to reduce the factor in front of self.mse_loss here. loss = 1.0 * policy_loss + 1.0 * mse_loss + reg_term return loss, policy_loss, mse_loss, reg_term, y_conv
def neuralNetwork(self): # 建立主神经网络 self.s = tf.placeholder(tf.float32, [None, self.features], name='state') self.t = tf.placeholder(tf.float32, [None, self.actions], name='Qtarget') with tf.variable_scope('mainnet'): # 建立参数集合 cNames, unitNum, wInit, bInit = ['main net parameters', tf.GraphKeys.GLOBAL_VARIABLES], 10, \ tf.random_normal_initializer(0.0, 0.3), tf.constant_initializer(0.1) with tf.variable_scope('layer1'): w1 = tf.get_variable('w1', [self.features, unitNum], initializer=wInit, collections=cNames) b1 = tf.get_variable('b1', [1, unitNum], initializer=bInit, collections=cNames) l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1) with tf.variable_scope('layer2'): w2 = tf.get_variable('w2', [unitNum, self.actions], initializer=wInit, collections=cNames) b2 = tf.get_variable('b2', [1, self.actions], initializer=bInit, collections=cNames) self.Qpredict = tf.matmul(l1, w2) + b2 with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.squared_difference(self.t, self.Qpredict)) with tf.variable_scope('train'): self.train = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) # 建立旧神经网络 self.ns = tf.placeholder(tf.float32, [None, self.features], name='newState') with tf.variable_scope('oldnet'): cNames = ['old net parameters', tf.GraphKeys.GLOBAL_VARIABLES] with tf.variable_scope('layer1'): w1 = tf.get_variable('w1', [self.features, unitNum], initializer=wInit, collections=cNames) b1 = tf.get_variable('b1', [1, unitNum], initializer=bInit, collections=cNames) l1 = tf.nn.relu(tf.matmul(self.ns, w1) + b1) with tf.variable_scope('layer2'): w2 = tf.get_variable('w2', [unitNum, self.actions], initializer=wInit, collections=cNames) b2 = tf.get_variable('b2', [1, self.actions], initializer=bInit, collections=cNames) self.QnextStatePredict = tf.matmul(l1, w2) + b2
def loss_som_s(self): """Computes the SOM loss of standard SOM for initialization.""" loss_som = tf.reduce_mean( tf.squared_difference( tf.expand_dims(tf.stop_gradient(self.sample_z_e), axis=1), self.z_q_neighbors)) tf.summary.scalar("loss_som_s", loss_som) return loss_som
def _get_lr_tensor(self): """Get lr minimizing the surrogate. Returns: The lr_t. """ lr = tf.squared_difference(1.0, tf.sqrt(self._mu)) / self._h_min return lr
def loss_som_old(self): """Computes the SOM loss.""" loss_som = tf.reduce_mean( tf.squared_difference( tf.expand_dims(tf.stop_gradient(self.z_e_sample), axis=1), self.z_q_neighbors)) tf.summary.scalar("loss_som_old", loss_som) return loss_som
def compute_noise_and_variance(wx, center, vote_conf, masses): noise = tf.squared_difference(wx, center) variance = min_var + tf.reduce_sum( vote_conf * noise, axis=[1, -1, -2], keepdims=True, name='variance_calculation') / masses return noise, variance
def normalized_mean_square_error(output, target): """Return the TensorFlow expression of normalized mean-squre-error of two distributions. Parameters ---------- output : 2D or 4D tensor. target : 2D or 4D tensor. """ with tf.name_scope("mean_squared_error_loss"): if output.get_shape().ndims == 2: # [batch_size, n_feature] nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=1)) nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=1)) elif output.get_shape().ndims == 4: # [batch_size, w, h, c] nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1,2,3])) nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1,2,3])) nmse = tf.reduce_mean(nmse_a / nmse_b) return nmse
def memAutoEnc(self, new_memory, info, control, name="", reuse=None): with tf.variable_scope("memAutoEnc" + name, reuse=reuse): # inputs to auto encoder features = info if cfg.autoEncMemInputs == "INFO" else new_memory features = ops.linear(features, self.memory_dim, self.control_dim, act=cfg.autoEncMemAct, name="aeMem") # reconstruct control if cfg.autoEncMemLoss == "CONT": loss = tf.reduce_mean(tf.squared_difference(control, features)) else: interactions, dim = ops.mul( self.question_contextual_word_embeddings, features, self.control_dim, concat={"x": cfg.autoEncMemCnct}, mulBias=cfg.mulBias, name="aeMem") logits = ops.linear(interactions, dim, 1, dropout=0., name="logits") logits = self.expMask(logits, self.question_lengths) # reconstruct word attentions if cfg.autoEncMemLoss == "PROB": loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=self.attentions["question"][-1], logits=logits)) # reconstruct control through words attentions else: attention = tf.nn.softmax(logits) summary = ops.att2Smry( attention, self.question_contextual_word_embeddings) loss = tf.reduce_mean( tf.squared_difference(control, summary)) return loss
def z_dist_flat(self): """Computes the distances between the centroids and the embeddings.""" z_dist = tf.squared_difference( tf.expand_dims(tf.expand_dims(self.sample_z_e, 1), 1), tf.expand_dims(self.embeddings, 0)) z_dist_red = tf.reduce_sum(z_dist, axis=-1) z_dist_flat = tf.reshape(z_dist_red, [-1, self.som_dim[0] * self.som_dim[1]]) return z_dist_flat
def build_model(x, lmbda, mode='training', layers=None, msssim_loss=False): """Builds the compression model.""" is_training = (mode == 'training') num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1])) if layers is None: num_filters = 192 analysis_transform = AnalysisTransform(num_filters) synthesis_transform = SynthesisTransform(num_filters) hyper_analysis_transform = HyperAnalysisTransform(num_filters) hyper_synthesis_transform = HyperSynthesisTransform(num_filters) entropy_bottleneck = tfc.EntropyBottleneck() layers = (analysis_transform, hyper_analysis_transform, entropy_bottleneck, hyper_synthesis_transform, synthesis_transform) else: analysis_transform, hyper_analysis_transform, entropy_bottleneck, \ hyper_synthesis_transform, synthesis_transform = layers y = analysis_transform(x) z = hyper_analysis_transform(y) z_tilde_hat, z_likelihoods = entropy_bottleneck(z, training=is_training) mean, sigma = hyper_synthesis_transform(z_tilde_hat) scale_table = np.exp(np.linspace( np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table, mean=mean) y_tilde_hat, y_likelihoods = conditional_bottleneck(y, training=is_training) x_tilde_hat = synthesis_transform(y_tilde_hat) if mode == "testing": side_string = entropy_bottleneck.compress(z_tilde_hat) string = conditional_bottleneck.compress(y_tilde_hat) else: string = None side_string = None bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) mse = tf.reduce_mean(tf.squared_difference(x, x_tilde_hat)) mse *= 255 ** 2 msssim = tf.reduce_mean(1 - tf.image.ssim_multiscale(x_tilde_hat, x, 1)) distortion = msssim if msssim_loss else mse loss = lmbda * distortion + bpp return loss, bpp, mse, msssim, x_tilde_hat, y_tilde_hat, z_tilde_hat, \ y, z, string, side_string, layers
def generator_loss(self, D, fake_y, use_lsgan=True): """ fool discriminator into believing that G(x) is real """ if use_lsgan: # use mean squared error loss = tf.reduce_mean(tf.squared_difference(D(fake_y), REAL_LABEL)) else: # heuristic, non-saturating loss loss = -tf.reduce_mean(ops.safe_log(D(fake_y))) / 2 return loss
def z_dist_flat_ng(self): """Computes the distances between the centroids and the embeddings stopping the gradient of the latent embeddings.""" z_dist = tf.squared_difference( tf.expand_dims(tf.expand_dims(tf.stop_gradient(self.z_e), 1), 1), tf.expand_dims(self.embeddings, 0)) z_dist_red = tf.reduce_sum(z_dist, axis=-1) # 1,32,8,8 z_dist_flat = tf.reshape( z_dist_red, [-1, self.som_dim[0] * self.som_dim[1]]) # 1,32,64 return z_dist_flat
def load_test_model_graph(checkpoint_dir): ''' model used in test mode. (entropy_bootleneck(training=False) ''' # inputs x = tf.placeholder(tf.float32, [1, None, None, 3]) orig_x = tf.placeholder(tf.float32, [1, None, None, 3]) # Instantiate model. analysis_transform = AnalysisTransform(192) synthesis_transform = SynthesisTransform(192) hyper_analysis_transform = HyperAnalysisTransform(192) hyper_synthesis_transform = HyperSynthesisTransform(192) entropy_bottleneck = tfc.EntropyBottleneck() # Transform and compress the image. y = analysis_transform(x) y_shape = tf.shape(y) z = hyper_analysis_transform(abs(y)) z_hat, z_likelihoods = entropy_bottleneck(z, training=False) sigma = hyper_synthesis_transform(z_hat) sigma = sigma[:, :y_shape[1], :y_shape[2], :] scale_table = np.exp( np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS)) conditional_bottleneck = tfc.GaussianConditional(sigma, scale_table) side_string = entropy_bottleneck.compress(z) string = conditional_bottleneck.compress(y) # Transform the quantized image back (if requested). y_hat, y_likelihoods = conditional_bottleneck(y, training=False) x_hat = synthesis_transform(y_hat) # eval bpp num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32) eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum( tf.log(z_likelihoods))) / (-np.log(2) * num_pixels) # reconstruction metric # Bring both images back to 0..255 range. orig_x_255 = orig_x * 255 x_hat = tf.clip_by_value(x_hat, 0, 1) x_hat = tf.round(x_hat * 255) mse = tf.reduce_mean(tf.squared_difference(orig_x_255, x_hat)) psnr = tf.squeeze(tf.image.psnr(x_hat, orig_x_255, 255)) msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, orig_x_255, 255)) # session sess = tf.Session() # load graph latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) tf.train.Saver().restore(sess, save_path=latest) return sess, x, orig_x, [ string, side_string ], eval_bpp, x_hat, mse, psnr, msssim, num_pixels, y, z
def _build_net(self): def build_layers(s, c_names, w_initializer, b_initializer): for i, h in enumerate(self.hidden): if i == 0: in_units, out_units, inputs = self.n_features, self.hidden[i], s else: in_units, out_units, inputs = self.hidden[i-1], self.hidden[i], l with tf.variable_scope('l%i' % i): w = tf.get_variable('w', [in_units, out_units], initializer=w_initializer, collections=c_names) b = tf.get_variable('b', [1, out_units], initializer=b_initializer, collections=c_names) l = tf.nn.relu(tf.matmul(inputs, w) + b) with tf.variable_scope('Value'): w = tf.get_variable('w', [self.hidden[-1], 1], initializer=w_initializer, collections=c_names) b = tf.get_variable('b', [1, 1], initializer=b_initializer, collections=c_names) self.V = tf.matmul(l, w) + b with tf.variable_scope('Advantage'): w = tf.get_variable('w', [self.hidden[-1], self.n_actions], initializer=w_initializer, collections=c_names) b = tf.get_variable('b', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.A = tf.matmul(l, w) + b with tf.variable_scope('Q'): out = self.V + (self.A - tf.reduce_mean(self.A, axis=1, keep_dims=True)) # Q = V(s) + A(s,a) # with tf.variable_scope('out'): # w = tf.get_variable('w', [self.hidden[-1], self.n_actions], initializer=w_initializer, collections=c_names) # b = tf.get_variable('b', [1, self.n_actions], initializer=b_initializer, collections=c_names) # out = tf.matmul(l, w) + b return out # ------------------ build evaluate_net ------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss self.ISWeights = tf.placeholder(tf.float32, [None, 1], name='IS_weights') with tf.variable_scope('eval_net'): c_names, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], \ tf.random_normal_initializer(0., 0.01), tf.constant_initializer(0.01) # config of layers self.q_eval = build_layers(self.s, c_names, w_initializer, b_initializer) with tf.variable_scope('loss'): self.abs_errors = tf.abs(tf.reduce_sum(self.q_target - self.q_eval, axis=1)) # for updating Sumtree self.loss = tf.reduce_mean(self.ISWeights * tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): self._train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) # ------------------ build target_net ------------------ self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input with tf.variable_scope('target_net'): c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] self.q_next = build_layers(self.s_, c_names, w_initializer, b_initializer)
def _build_net(self): tf.reset_default_graph() # ------------------ build evaluate_net ------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss with tf.variable_scope('eval_net'): # c_names(collections_names) are the collections to store variables c_names, n_l1, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 50, \ tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.q_eval = tf.matmul(l1, w2) + b2 # # output layer. collections is used later when assign to target net # with tf.variable_scope('l3'): # w3 = tf.get_variable('w3', [n_l2, self.n_actions], initializer=w_initializer, collections=c_names) # b3 = tf.get_variable('b3', [1, self.n_actions], initializer=b_initializer, collections=c_names) # self.q_eval = tf.matmul(l2, w3) + b3 with tf.variable_scope('loss'): self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) tf.summary.scalar('loss', self.loss) with tf.variable_scope('train'): # learning_rate = tf.train.exponential_decay() self._train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) # ------------------ build target_net ------------------ self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input with tf.variable_scope('target_net'): # c_names(collections_names) are the collections to store variables c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.q_next = tf.matmul(l1, w2) + b2
def lossfn(real_input, fake_input, compress, hparams, lsgan, name): """Loss function.""" eps = 1e-12 with tf.variable_scope(name): d1 = discriminator(real_input, compress, hparams, "discriminator") d2 = discriminator(fake_input, compress, hparams, "discriminator", reuse=True) if lsgan: dloss = tf.reduce_mean(tf.squared_difference( d1, 0.9)) + tf.reduce_mean(tf.square(d2)) gloss = tf.reduce_mean(tf.squared_difference(d2, 0.9)) loss = (dloss + gloss) / 2 else: # cross_entropy dloss = -tf.reduce_mean(tf.log(d1 + eps)) - tf.reduce_mean( tf.log1p(eps - d2)) gloss = -tf.reduce_mean(tf.log(d2 + eps)) loss = (dloss + gloss) / 2 return loss