def pose_smoothness(poses, global_only=False): """ # Poses is F x 24 x 3 x 3 Computes \sum ||p_i - p_{i+1}|| On the pose in Rotation matrices space. It compues the angle between the two rotations: (tr(R) - 1) / 2 = cos(theta) So penalize acos((tr(R) - 1) / 2) --> this nans So: minimize: (1 - tr(R_1*R_2')) / 2 = -cos(theta) of R_1*R_2' min at -1. """ # These are F-1 x 24 x 3 x 3 (Ok this is exactly the same..) curr_pose = poses[:-1] next_pose = poses[1:] RRt = tf.matmul(curr_pose, next_pose, transpose_b=True) # For min (1-tr(RR_T)) / 2 costheta = (tf.trace(RRt) - 1) / 2. target = tf.ones_like(costheta) if global_only: print('Pose smoothness increased on global!') weights_global = 10 * tf.expand_dims(tf.ones_like(costheta[:, 0]), 1) weights_joints = tf.ones_like(costheta[:, 1:]) weights = tf.concat([weights_global, weights_joints], 1) else: weights = tf.ones_like(costheta) return tf.losses.mean_squared_error(target, costheta, weights=weights)
def check_cam_coherence(path): """Check the coherence of a camera path.""" cam_gt = path + 'cam0_gt.visim' cam_render = path + 'cam0.render' lines = tf.string_split([tf.read_file(cam_render)], '\n').values lines = lines[3:] lines = tf.strided_slice(lines, [0], [lines.shape_as_list()[0]], [2]) fields = tf.reshape(tf.string_split(lines, ' ').values, [-1, 10]) timestamp_from_render, numbers = tf.split(fields, [1, 9], -1) numbers = tf.strings.to_number(numbers) eye, lookat, up = tf.split(numbers, [3, 3, 3], -1) up_vector = tf.nn.l2_normalize(up - eye) lookat_vector = tf.nn.l2_normalize(lookat - eye) rotation_from_lookat = lookat_matrix(up_vector, lookat_vector) lines = tf.string_split([tf.read_file(cam_gt)], '\n').values lines = lines[1:] fields = tf.reshape(tf.string_split(lines, ',').values, [-1, 8]) timestamp_from_gt, numbers = tf.split(fields, [1, 7], -1) numbers = tf.strings.to_number(numbers) position, quaternion = tf.split(numbers, [3, 4], -1) rotation_from_quaternion = from_quaternion(quaternion) assert tf.reduce_all(tf.equal(timestamp_from_render, timestamp_from_gt)) assert tf.reduce_all(tf.equal(eye, position)) so3_diff = (tf.trace( tf.matmul(rotation_from_lookat, rotation_from_quaternion, transpose_a=True)) - 1) / 2 tf.assert_near(so3_diff, tf.ones_like(so3_diff))
def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): m = tf.cast(K_XX.get_shape()[0], tf.float32) n = tf.cast(K_YY.get_shape()[0], tf.float32) if biased: mmd2 = (tf.reduce_sum(K_XX) / (m * m) + tf.reduce_sum(K_YY) / (n * n) - 2 * tf.reduce_sum(K_XY) / (m * n)) else: if const_diagonal is not False: trace_X = m * const_diagonal trace_Y = n * const_diagonal else: trace_X = tf.trace(K_XX) trace_Y = tf.trace(K_YY) mmd2 = ((tf.reduce_sum(K_XX) - trace_X) / (m * (m - 1)) + (tf.reduce_sum(K_YY) - trace_Y) / (n * (n - 1)) - 2 * tf.reduce_sum(K_XY) / (m * n)) return mmd2
def rotation_geodesic(r1, r2): """Return the geodesic distance (angle in radians) between two rotations. Args: r1: [BATCH, 3, 3] rotation matrices. r2: [BATCH, 3, 3] rotation matrices. Returns: [BATCH] radian angular difference between rotation matrices. """ diff = (tf.trace(tf.matmul(r1, r2, transpose_b=True)) - 1) / 2 angular_diff = tf.acos(tf.clip_by_value(diff, -1., 1.)) return angular_diff
def init_pose(pred_Rs, init_pose, weights=None): """ Should stay close to initial weights pred_Rs is N x 24 x 3 x 3 init_pose is 72D, need to conver to Rodrigues """ init_Rs = batch_rodrigues(tf.reshape(init_pose, [-1, 3])) init_Rs = tf.reshape(init_Rs, [-1, 24, 3, 3]) RRt = tf.matmul(init_Rs, pred_Rs, transpose_b=True) costheta = (tf.trace(RRt) - 1) / 2. target = tf.ones_like(costheta) if weights is None: weights = tf.ones_like(costheta) return tf.losses.mean_squared_error(target, costheta, weights=weights)
def tf_so8_sugra_potential(t_v70): """Returns dict with key tensors from the SUGRA potential's TF graph.""" tc_28_8_8 = tf.constant(su8.m_28_8_8) t_e7_generator_v70 = tf.einsum( 'v,vIJ->JI', tf.complex(t_v70, tf.constant([0.0] * 70, dtype=tf.float64)), tf.constant(e7.t_a_ij_kl[:70, :, :], dtype=tf.complex128)) t_complex_vielbein = tf.linalg.expm(t_e7_generator_v70) def expand_ijkl(t_ab): return 0.5 * tf.einsum('ijB,BIJ->ijIJ', tf.einsum('AB,Aij->ijB', t_ab, tc_28_8_8), tc_28_8_8) # t_u_ijIJ = expand_ijkl(t_complex_vielbein[:28, :28]) t_u_klKL = expand_ijkl(t_complex_vielbein[28:, 28:]) t_v_ijKL = expand_ijkl(t_complex_vielbein[:28, 28:]) t_v_klIJ = expand_ijkl(t_complex_vielbein[28:, :28]) # t_uv = t_u_klKL + t_v_klIJ t_uuvv = (tf.einsum('lmJK,kmKI->lkIJ', t_u_ijIJ, t_u_klKL) - tf.einsum('lmJK,kmKI->lkIJ', t_v_ijKL, t_v_klIJ)) t_T = tf.einsum('ijIJ,lkIJ->lkij', t_uv, t_uuvv) t_A1 = (-4.0 / 21.0) * tf.trace(tf.einsum('mijn->ijmn', t_T)) t_A2 = (-4.0 / (3 * 3)) * ( # Antisymmetrize in last 3 indices, taking into account antisymmetry # in last two indices. t_T + tf.einsum('lijk->ljki', t_T) + tf.einsum('lijk->lkij', t_T)) t_A1_real = tf.real(t_A1) t_A1_imag = tf.imag(t_A1) t_A2_real = tf.real(t_A2) t_A2_imag = tf.imag(t_A2) t_A1_potential = (-3.0 / 4) * (tf.einsum('ij,ij->', t_A1_real, t_A1_real) + tf.einsum('ij,ij->', t_A1_imag, t_A1_imag)) t_A2_potential = (1.0 / 24) * (tf.einsum('ijkl,ijkl->', t_A2_real, t_A2_real) + tf.einsum('ijkl,ijkl->', t_A2_imag, t_A2_imag)) t_potential = t_A1_potential + t_A2_potential # return dict(v70=t_v70, vielbein=t_complex_vielbein, tee_tensor=t_T, a1=t_A1, a2=t_A2, potential=t_potential)
def batch_rot2aa(Rs): """ Rs is B x 3 x 3 void cMathUtil::RotMatToAxisAngle(const tMatrix& mat, tVector& out_axis, double& out_theta) { double c = 0.5 * (mat(0, 0) + mat(1, 1) + mat(2, 2) - 1); c = cMathUtil::Clamp(c, -1.0, 1.0); out_theta = std::acos(c); if (std::abs(out_theta) < 0.00001) { out_axis = tVector(0, 0, 1, 0); } else { double m21 = mat(2, 1) - mat(1, 2); double m02 = mat(0, 2) - mat(2, 0); double m10 = mat(1, 0) - mat(0, 1); double denom = std::sqrt(m21 * m21 + m02 * m02 + m10 * m10); out_axis[0] = m21 / denom; out_axis[1] = m02 / denom; out_axis[2] = m10 / denom; out_axis[3] = 0; } } """ cos = 0.5 * (tf.trace(Rs) - 1) cos = tf.clip_by_value(cos, -1, 1) theta = tf.acos(cos) m21 = Rs[:, 2, 1] - Rs[:, 1, 2] m02 = Rs[:, 0, 2] - Rs[:, 2, 0] m10 = Rs[:, 1, 0] - Rs[:, 0, 1] denom = tf.sqrt(m21 * m21 + m02 * m02 + m10 * m10) axis0 = tf.where(tf.abs(theta) < 0.00001, m21, m21 / denom) axis1 = tf.where(tf.abs(theta) < 0.00001, m02, m02 / denom) axis2 = tf.where(tf.abs(theta) < 0.00001, m10, m10 / denom) return tf.expand_dims(theta, 1) * tf.stack([axis0, axis1, axis2], 1)
def latent_loss(self, prior): """ Analytic expression for latent loss which can be used when posterior and prior are Gaussian https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Kullback%E2%80%93Leibler_divergence :param prior: Vertexwise Prior instance which defines the ``mean`` and ``cov`` vertices attributes """ prior_cov_inv = tf.matrix_inverse(prior.cov) mean_diff = tf.subtract(self.mean, prior.mean) term1 = tf.trace(tf.matmul(prior_cov_inv, self.cov)) term2 = tf.matmul(tf.reshape(mean_diff, (self.nvertices, 1, -1)), prior_cov_inv) term3 = tf.reshape(tf.matmul(term2, tf.reshape(mean_diff, (self.nvertices, -1, 1))), [self.nvertices]) term4 = prior.log_det_cov() term5 = self.log_det_cov() return self.log_tf(tf.identity(0.5*(term1 + term3 - self.nparams + term4 - term5), name="%s_latent_loss" % self.name))
def vectors_alignment(vectors_list): """Computes the degree of alignment of location vectors. Args: vectors_list: List of length number of glimpses (ie length time), where each element is a tensor of shape [batch, 2] indicating x, y coordinates for glimpse locations. Returns: alignment_metric: Number indicating how consistent glimpse locations across the batch (0: glimpses are different. 1: glimpse locations are the same). """ # computes alignment of glimpse locations across the batch mtx = tf.concat(vectors_list, axis=1) # size batch x batch dims = mtx.shape.as_list()[0] mtx /= (tf.sqrt(tf.reduce_sum(mtx**2, axis=1, keepdims=True)) + 1e-8) # note here we do not differentiate between 0 and 180 degrees difference. dot_product_mtx = tf.abs(tf.matmul(mtx, tf.transpose(mtx))) alignment_metric = (tf.reduce_sum(dot_product_mtx) - tf.trace(dot_product_mtx)) / (dims * (dims - 1)) return alignment_metric
def train_model(): users = tf.placeholder(tf.int32, shape=[None]) items = tf.placeholder(tf.int32, shape=[None]) users_inputs = tf.placeholder(tf.int32, shape=[None, max_doc_length]) items_inputs = tf.placeholder(tf.int32, shape=[None, max_doc_length]) ratings = tf.placeholder(tf.float32, shape=[None, 1]) dropout_rate = tf.placeholder(tf.float32) text_embedding = tf.Variable(word_embedding_mtrx, dtype=tf.float32, name="review_text_embeds") padding_embedding = tf.Variable(np.zeros([1, word_latent_dim]), dtype=tf.float32) text_mask = tf.constant([1.0] * text_embedding.get_shape()[0] + [0.0]) word_embeddings = tf.concat([text_embedding, padding_embedding], 0) # TensorShape([805794, 100]) word_embeddings = word_embeddings * tf.expand_dims(text_mask, -1) # padding_embedding和text_mask的作用? #expand_dims(text_mask, -1),增加-1位置的维度为1 user_entity_embedding = tf.Variable(tf.random_normal([num_users, latent_dim], mean=0, stddev=0.02), name="user_entity_embeddings") item_entity_embedding = tf.Variable(tf.random_normal([num_items, latent_dim], mean=0, stddev=0.02), name="item_entity_embeddings") user_bias = tf.Variable(tf.random_normal([num_users, 1], mean=0, stddev=0.02), name="review_user_bias") item_bias = tf.Variable(tf.random_normal([num_items, 1], mean=0, stddev=0.02), name="review_item_bias") user_bs = tf.nn.embedding_lookup(user_bias, users) item_bs = tf.nn.embedding_lookup(item_bias, items) user_entity_embeds = tf.nn.embedding_lookup(user_entity_embedding, users) item_entity_embeds = tf.nn.embedding_lookup(item_entity_embedding, items) user_reviews_representation = tf.nn.embedding_lookup(word_embeddings, users_inputs) user_reviews_representation_expnd = tf.expand_dims(user_reviews_representation, -1) # TensorShape([None, 300, 100, 1]) 因为这里users_inputs未知,所以第一个维度为None item_reviews_representation = tf.nn.embedding_lookup(word_embeddings, items_inputs) item_reviews_representation_expnd = tf.expand_dims(item_reviews_representation, -1) # L_attn layers W_u = tf.Variable( tf.truncated_normal([window_size, word_latent_dim, 1, 1], stddev=0.3), name="Latten_W_u") W_i = tf.Variable( tf.truncated_normal([window_size, word_latent_dim, 1, 1], stddev=0.3), name="Latten_W_i") # user_reviews_represention_expand:TensorShape([None, 300, 100, 1]) u_scores,i_scores = L_atten(user_reviews_representation_expnd, item_reviews_representation_expnd, W_u, W_i) u_scores = tf.squeeze(u_scores,-1) # u_scores[None,300,1] i_scores = tf.squeeze(i_scores,-1) user_reviews_representation_expnd = tf.squeeze(user_reviews_representation_expnd,-1) item_reviews_representation_expnd = tf.squeeze(item_reviews_representation_expnd,-1) att_user = tf.multiply(user_reviews_representation_expnd,u_scores) # ([None, 300, 100]) att_item = tf.multiply(item_reviews_representation_expnd,i_scores) # Convolution Operation W_u1 = tf.Variable( tf.truncated_normal([window_size, word_latent_dim, 1, num_filters], stddev=0.3), name="Conv_W_u") W_i1 = tf.Variable( tf.truncated_normal([window_size, word_latent_dim, 1, num_filters], stddev=0.3), name="Conv_W_i") att_user = tf.expand_dims(att_user,-1) att_item = tf.expand_dims(att_item,-1) Conv_user,Conv_item = Conv(num_filters,att_user,att_item,W_u1,W_i1) # Conv_user:shape=(None, 300, 1, 50) Conv_user = tf.squeeze(Conv_user,2) # [None,300,50] Conv_item = tf.squeeze(Conv_item,2) # Mutual attention layer euclidean = EuclideanDistances(Conv_user,Conv_item) # (None, 300, 300) euclidean = 1/(1+euclidean) eu_user = tf.reduce_mean(euclidean,axis=2) # 按行求和 eu_item = tf.reduce_mean(euclidean,axis=1) # 按列求和 原文reduce_sum(),用FM输出rating时reduce_mean()输出结果才正确 eu_user = tf.expand_dims(eu_user,-1) # (None, 300, 1) eu_item = tf.expand_dims(eu_item,-1) Mul_user = Conv_user*eu_user Mul_item = Conv_item*eu_item # rand_matrix = tf.Variable(tf.truncated_normal([num_filters, num_filters], stddev=0.3), name="review_rand_matrix") # Mul_user_score,Mul_item_score = Mutual(rand_matrix,num_filters,Conv_user,Conv_item) # Mul_user = Conv_user*Mul_user_score # Mul_item = Conv_item*Mul_item_score # Local pooling layer dim = int(Mul_user.get_shape()[2]) # W_u2 = tf.Variable( # tf.truncated_normal([window_size, dim, 1, 1], stddev=0.3), name="Conv_W_u2") # W_i2 = tf.Variable( # tf.truncated_normal([window_size, dim, 1, 1], stddev=0.3), name="Conv_W_u2") W_u2 = tf.Variable( tf.truncated_normal([window_size, 1, 1, 1], stddev=0.3), name="Conv_W_u2") W_i2 = tf.Variable( tf.truncated_normal([window_size, 1, 1, 1], stddev=0.3), name="Conv_W_u2") # W_u3 = tf.Variable( # tf.truncated_normal([window_size, 1, 1, num_filters], stddev=0.3), name="Conv_W_u3") # W_i3 = tf.Variable( # tf.truncated_normal([window_size, 1, 1, num_filters], stddev=0.3), name="Conv_W_u3") W_u3 = tf.Variable( tf.truncated_normal([window_size, dim, 1, num_filters], stddev=0.3), name="Conv_W_u3") W_i3 = tf.Variable( tf.truncated_normal([window_size, dim, 1, num_filters], stddev=0.3), name="Conv_W_u3") Mul_user = tf.expand_dims(Mul_user,-1) Mul_item = tf.expand_dims(Mul_item,-1) user,item = Local(Mul_user,Mul_item,W_u2,W_i2,W_u3,W_i3) # MLP layer W_mlp_u = tf.Variable(tf.random_normal([num_filters, latent_dim], mean=0, stddev=0.2), name="review_W_mlp_u") #b_mlp_u = tf.Variable(tf.constant(0., shape=[batch_size,1]), name="review_b_mlp_u") b_mlp_u = tf.Variable(tf.constant(0., shape=[latent_dim]), name="review_b_mlp_u") W_mlp_i = tf.Variable(tf.random_normal([num_filters, latent_dim], mean=0, stddev=0.2), name="review_W_mlp_i") #b_mlp_i = tf.Variable(tf.constant(0., shape=[batch_size,1]), name="review_b_mlp_i") b_mlp_i = tf.Variable(tf.constant(0., shape=[latent_dim]), name="review_b_mlp_i") a = tf.matmul(user, W_mlp_u) user_embeds = tf.nn.relu(tf.matmul(user, W_mlp_u) + b_mlp_u) # [batch_size,latent_dim] item_embeds = tf.nn.relu(tf.matmul(item, W_mlp_i) + b_mlp_i) # Feature Interaction u_map = tf.Variable(tf.truncated_normal([latent_dim,latent_dim],stddev=0.3),name="Inter_u") i_map = tf.Variable(tf.truncated_normal([latent_dim,latent_dim],stddev=0.3),name="Inter_i") Interaction_u = tf.matmul(user_entity_embeds,u_map) Interaction_i = tf.matmul(item_entity_embeds,i_map) # FM layer final_user = user_embeds+Interaction_u final_item = item_embeds+Interaction_i embeds_sum = tf.concat([final_user, final_item], 1, name="concat_embed") # [batchsize,2*latent_dim] w_0 = tf.Variable(tf.zeros(1), name="review_w_0") w_1 = tf.Variable(tf.truncated_normal([1, latent_dim*2], stddev=0.3), name="review_w_1") v = tf.Variable(tf.truncated_normal([latent_dim * 2, v_dim], stddev=0.3), name="review_v") # [2*latent_dim,v_dim] J_1 = w_0 + tf.matmul(embeds_sum, w_1, transpose_b=True) # FM的线性部分 embeds_sum_1 = tf.expand_dims(embeds_sum, -1) # [batchsize,2*latent_dim,1] embeds_sum_2 = tf.expand_dims(embeds_sum, 1) # [batchsize,1,2*latent_dim] J_2 = tf.reduce_sum( tf.reduce_sum(tf.multiply(tf.matmul(embeds_sum_1, embeds_sum_2), tf.matmul(v, v, transpose_b=True)), 2), 1, keep_dims=True) # [200,1] J_3 = tf.trace(tf.multiply(tf.matmul(embeds_sum_1, embeds_sum_2), tf.matmul(v, v, transpose_b=True))) # tf.multiply() [batchsize,2*latent_dim,2*latent_dim] fz = 0.5*(J_2-tf.expand_dims(J_3,-1)) # [batchsize,1] predict_rating = J_1+fz+user_bs+item_bs loss1 = tf.reduce_mean(tf.squared_difference(predict_rating, ratings)) lamda = lambda_1*(tf.nn.l2_loss(final_user)+tf.nn.l2_loss(final_item)+tf.nn.l2_loss(v)) loss = loss1+lamda #loss += lambda_1*(tf.nn.l2_loss(W_u)+tf.nn.l2_loss(W_i)+tf.nn.l2_loss(W_u1)+tf.nn.l2_loss(W_i1)+tf.nn.l2_loss(W_u2)+tf.nn.l2_loss(W_i2)+tf.nn.l2_loss(W_u3)+tf.nn.l2_loss(W_i3)+tf.nn.l2_loss(v)+tf.nn.l2_loss(user_bs)+tf.nn.l2_loss(item_bs)+tf.nn.l2_loss(user_entity_embedding)+tf.nn.l2_loss(item_entity_embedding)+tf.nn.l2_loss(w_1)+tf.nn.l2_loss(user_bs)+tf.nn.l2_loss(item_bs)) train_step = tf.train.RMSPropOptimizer(learning_rate).minimize(loss) saver = tf.train.Saver(max_to_keep=1) # # MLP # dim = int(fz.get_shape()[1]) # W1 = tf.Variable(tf.truncated_normal([dim,mlp_dims[0]],stddev=0.3),name="W1") # bias1 = tf.Variable(tf.truncated_normal([mlp_dims[0]],stddev=0.002),name="b1") # W2 = tf.Variable(tf.truncated_normal([mlp_dims[0],mlp_dims[1]],stddev=0.3),name="W2") # bias2 = tf.Variable(tf.truncated_normal([mlp_dims[1]],stddev=0.002),name="b2") # hT = tf.Variable(tf.truncated_normal([1,mlp_dims[1]])) # mlp = MultiLayerPerceptron(fz,W1,bias1,W2,bias2,hT) # J_total = J_1 + mlp # #J_total = (J_1 + 0.5 * (J_2 - tf.expand_dims(J_3,-1))) # 0.5 * (J_2 - J_3)是FM的交互部分 # # predict_rating = J_total + user_bs + item_bs # loss = tf.reduce_mean(tf.squared_difference(predict_rating, ratings)) # ratings:[200,1] # loss += lambda_1*(tf.nn.l2_loss(W_u)+tf.nn.l2_loss(W_i)+tf.nn.l2_loss(W_u1)+tf.nn.l2_loss(W_i1)+tf.nn.l2_loss(W_u2)+tf.nn.l2_loss(W_i2)+tf.nn.l2_loss(W_u3)+tf.nn.l2_loss(W_i3)+tf.nn.l2_loss(v)+tf.nn.l2_loss(user_bs)+tf.nn.l2_loss(item_bs)+tf.nn.l2_loss(user_entity_embedding)+tf.nn.l2_loss(item_entity_embedding)+tf.nn.l2_loss(w_1)+tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(hT)+tf.nn.l2_loss(W_mlp_u)+tf.nn.l2_loss(W_mlp_i)) # train_step = tf.train.RMSPropOptimizer(learning_rate).minimize(loss) #saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for e in range(epochs): t = time() loss_total = 0.0 train_msetotal = 0.0 count = 0.0 all_predict = [] for i in range(int(math.ceil(len(user_input) / float(batch_size)))): # match.ceil(x):x的上入整数 user_batch, item_batch, user_input_batch, item_input_batch, rates_batch = get_train_instance_batch_change(i, batch_size,user_input, item_input, rateings, user_reviews,item_reviews) predict,los1,la,_, loss_val, words,scores,user,item = sess.run([predict_rating,loss1,lamda,train_step, loss, word_embeddings,euclidean,eu_user,eu_item], feed_dict={users : user_batch, items : item_batch, users_inputs: user_input_batch, items_inputs: item_input_batch, ratings: rates_batch, dropout_rate: drop_out}) #模型中需要训练的参数都要用sess.run执行 a = predict.squeeze(-1) all_predict.append(a) #pdb.set_trace() #print(loss_val) loss_total += loss_val train_msetotal += los1 count += 1.0 t1 = time() #print("epoch "+str(e)+" train_predict_mean "+str(np.mean(all_predict))+" train_predict_var "+str(np.var(all_predict))) #print("epoch%d loss = %.3f "%(e, loss_total/count)) # 加载保存了的模型参数 #saver.restore(sess,tf.train.latest_checkpoint('/home//Desktop/Demo/CARL-master/CARL-master/checkpoint/')) val_mses, val_maes = [], [] predict1 = [] for i in range(len(user_input_val)): # 遍历user_input_val中的每个batch eval_model(users, items, users_inputs, items_inputs, dropout_rate, predict_rating, sess, user_vals[i], item_vals[i], user_input_val[i], item_input_val[i], rating_input_val[i], val_mses, val_maes,predict1) val_mse = np.array(val_mses).mean() #print("epoch "+str(e)+" val_predict_mean "+str(np.mean(predict1))+" val_predict_var "+str(np.var(predict1))) t2 = time() mses, maes = [], [] predict2 = [] for i in range(len(user_input_test)): eval_model(users, items, users_inputs, items_inputs, dropout_rate, predict_rating, sess, user_tests[i], item_tests[i], user_input_test[i], item_input_test[i], rating_input_test[i], mses, maes,predict2) mse = np.array(mses).mean() mae = np.array(maes).mean() #print("epoch "+str(e)+" val_predict_mean "+str(np.mean(predict2))+" val_predict_var "+str(np.var(predict2))) t3 = time() print("epoch%d train time: %.3fs test time: %.3f loss = %.3f train_mse = %.3f val_mse = %.3f test_mse = %.3f test_mae = %.3f"%(e, (t1 - t), (t3 - t2), loss_total/count, train_msetotal/count, val_mse, mse, mae))
def train_model(): users = tf.placeholder(tf.int32, shape=[None]) items = tf.placeholder(tf.int32, shape=[None]) ratings = tf.placeholder(tf.float32, shape=[None, 1]) user_entity_embedding = tf.Variable(tf.random_normal( [num_users, latent_dim], mean=0, stddev=0.02), name="user_entity_embeddings") item_entity_embedding = tf.Variable(tf.random_normal( [num_items, latent_dim], mean=0, stddev=0.02), name="item_entity_embeddings") user_entity_embeds = tf.nn.embedding_lookup(user_entity_embedding, users) item_entity_embeds = tf.nn.embedding_lookup(item_entity_embedding, items) entity_embeds_sum = tf.concat([ tf.multiply(user_entity_embeds, item_entity_embeds), user_entity_embeds, item_entity_embeds ], 1) #FM layer w_entity_0 = tf.Variable(tf.zeros(1), name="entity_w_0") w_entity_1 = tf.Variable(tf.truncated_normal([1, latent_dim * 3], stddev=0.3), name="entity_w_1") v_entity = tf.Variable(tf.truncated_normal([latent_dim * 3, v_dim], stddev=0.3), name="entity_v") J_e_1 = w_entity_0 + tf.matmul( entity_embeds_sum, w_entity_1, transpose_b=True) entity_embeds_sum_1 = tf.expand_dims(entity_embeds_sum, -1) entity_embeds_sum_2 = tf.expand_dims(entity_embeds_sum, 1) J_e_2 = tf.reduce_sum(tf.reduce_sum( tf.multiply(tf.matmul(entity_embeds_sum_1, entity_embeds_sum_2), tf.matmul(v_entity, v_entity, transpose_b=True)), 2), 1, keep_dims=True) J_e_3 = tf.trace( tf.multiply(tf.matmul(entity_embeds_sum_1, entity_embeds_sum_2), tf.matmul(v_entity, v_entity, transpose_b=True))) J_e_total = (J_e_1 + 0.5 * (J_e_2 - J_e_3)) predict_rating = J_e_total loss1 = tf.reduce_mean(tf.squared_difference(predict_rating, ratings)) lamda = lambda_1 * (tf.nn.l2_loss(user_entity_embedding) + tf.nn.l2_loss(item_entity_embedding) + tf.nn.l2_loss(v_entity)) loss = loss1 + lamda #loss += lambda_1 * (tf.nn.l2_loss(user_entity_embedding) + tf.nn.l2_loss(item_entity_embedding) + tf.nn.l2_loss(v_entity)) train_step = tf.train.RMSPropOptimizer(learning_rate).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for e in range(epochs): t = time() loss_total = 0.0 all_trainmse = 0.0 count = 0.0 for i in range(int(math.ceil(len(user_input) / float(batch_size)))): user_batch, item_batch, user_input_batch, item_input_batch, rates_batch = get_train_instance_batch_change( i, batch_size, user_input, item_input, rateings, user_reviews, item_reviews) _, loss_val, los1 = sess.run([train_step, loss, loss1], feed_dict={ users: user_batch, items: item_batch, ratings: rates_batch }) loss_total += loss_val all_trainmse += los1 count += 1.0 t1 = time() mses, maes = [], [] for i in range(len(user_input_test)): mses, maes = eval_model(users, items, predict_rating, user_tests[i], item_tests[i], rating_input_test[i], sess, mses, maes) mse = np.array(mses).mean() mae = np.array(maes).mean() t2 = time() print( "epoch%d train time: %.3fs test time: %.3f loss = %.3f train_mse = %.3f test_mse = %.3f test_mae = %.3f" % (e, (t1 - t), (t2 - t1), loss_total / count, all_trainmse / count, mse, mae))
def create_model(self, x, y, *args): if self.process_y: self.f_mu = Regression().fit(x, y) self.Ymu = self.f_mu(x) self.Ys2 = np.std((y - self.Ymu)) y = (y - self.Ymu) / self.Ys2 self.t_X = tf.constant(x, dtype=self.dtype) self.t_Y = tf.constant(y, dtype=self.dtype) self.t_N = tf.shape(self.t_Y)[0] self.t_D = tf.shape(self.t_Y)[1] self.t_Q = tf.shape(self.t_X)[0] self.t_M = tf.shape(self.t_X)[1] self.M = x.shape[1] if self.kernel == 'Squared Exponential': self.kernel_function = self.sq_exp_kernel self.signal_var = self.init_variable(args[0][0], positive=True) self.lengthscale = self.init_variable([args[0][1]] * self.M, positive=True, multi=self.variable_l) self.noise_var = self.init_variable(args[0][2], positive=True) self.hparamd = ['Signal Variance', 'Lengthscale'] self.hparams = [self.signal_var, self.lengthscale] if self.kernel == 'Periodic': self.kernel_function = self.sq_exp_kernel self.signal_var = self.init_variable(args[0][0], True) self.gamma = self.init_variable(args[0][0], True) self.period = self.init_variable(args[0][0], True) self.noise_var = self.init_variable(args[0][0], True) self.p_mu = self.init_variable(tf.log(self.t_Y), False) self.p_s2 = self.init_variable(1.0, True) self.hparamd = ['Signal Variance', 'Gamma', 'Period'] self.hparams = [self.signal_var, self.gamma, self.period] self.create_kernel = lambda t_x1, t_x2: self.kernel_function( t_x1, t_x2, self.hparams) ### CREATING THE TRAINING MATRICES ### self.K_xx = self.create_kernel(self.t_X, self.t_X) + ( self.noise_var + self.jitter) * tf.eye(self.t_N, dtype=self.dtype) self.L_xx = tf.cholesky(self.K_xx) self.logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.L_xx))) self.Kinv_YYt = 0.5 * tf.reduce_sum( tf.square( tf.matrix_triangular_solve(self.L_xx, self.t_Y, lower=True))) ### Initialising loose priors ### self.hprior = 0 if self.variable_l: self.hprior += 0.5 * tf.square(tf.log(self.hparams[0])) self.hprior += tf.reduce_sum(0.5 * tf.square(tf.log(self.hparams[1]))) else: for i in self.hparams: self.hprior += 0.5 * tf.square(tf.log(i)) self.noise_prior = 0.5 * tf.square(tf.log(self.noise_var)) ### Negative marginal log likelihood under Gaussian assumption ### if self.distribution == 'Gaussian': pi_term = tf.constant(0.5 * np.log(2.0 * np.pi), dtype=self.dtype) self.term1 = pi_term * tf.cast(self.t_D, dtype = self.dtype) * tf.cast(self.t_N, dtype = self.dtype) \ + 0.5 * tf.cast(self.t_D, dtype = self.dtype) * self.logdet \ + self.Kinv_YYt if self.distribution == 'Poisson' and self.kernel == 'Periodic': self.Kinv = tf.cholesky_solve(self.L_xx, tf.eye(self.t_N, dtype=self.dtype)) self.term1 = -tf.reduce_sum(self.t_Y*self.p_mu - tf.exp(self.p_mu + self.p_s2/2)) \ + (1/2)*(tf.trace(self.Kinv @ (self.p_s2*tf.eye(self.t_N, dtype=self.dtype) + [email protected](self.p_mu))) \ - tf.cast(self.t_N, dtype = self.dtype) + self.logdet - tf.cast(self.t_N, dtype = self.dtype)*tf.log(self.p_s2)) self.objective = self.term1 + self.hprior + self.noise_prior
def inner_cca_objective(y_pred, y_true): """ It is the loss function of CCA as introduced in the original paper. There can be other formulations. It is implemented on Tensorflow based on github@VahidooX's cca loss on Theano. y_true is just ignored """ r1 = 1e-4 r2 = 1e-4 eps = 1e-12 o1 = o2 = int(y_pred.shape[1] // 2) print(y_pred.shape) batch_size = y_pred.shape[0] batch_corr = 0 for instance in range(batch_size): # unpack (separate) the output of networks for view 1 and view 2 H1 = tf.transpose(y_pred[instance, 0:o1]) H2 = tf.transpose(y_pred[instance, o1:o1 + o2]) print(H1.shape) H1 = tf.keras.backend.batch_flatten(H1) H2 = tf.keras.backend.batch_flatten(H2) print(H1.shape) m = tf.shape(H1)[1] print(m) H1bar = H1 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul( H1, tf.ones([m, m])) H2bar = H2 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul( H2, tf.ones([m, m])) SigmaHat12 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul( H1bar, H2bar, transpose_b=True) # [dim, dim] SigmaHat11 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul( H1bar, H1bar, transpose_b=True) + r1 * tf.eye(o1) SigmaHat22 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul( H2bar, H2bar, transpose_b=True) + r2 * tf.eye(o2) # Calculating the root inverse of covariance matrices by using eigen decomposition [D1, V1] = tf.self_adjoint_eig(SigmaHat11) [D2, V2 ] = tf.self_adjoint_eig(SigmaHat22) # Added to increase stability posInd1 = tf.where(tf.greater(D1, eps)) D1 = tf.gather_nd( D1, posInd1) # get eigen values that are larger than eps V1 = tf.transpose( tf.nn.embedding_lookup(tf.transpose(V1), tf.squeeze(posInd1))) posInd2 = tf.where(tf.greater(D2, eps)) D2 = tf.gather_nd(D2, posInd2) V2 = tf.transpose( tf.nn.embedding_lookup(tf.transpose(V2), tf.squeeze(posInd2))) SigmaHat11RootInv = tf.matmul(tf.matmul(V1, tf.diag(D1**-0.5)), V1, transpose_b=True) # [dim, dim] SigmaHat22RootInv = tf.matmul(tf.matmul(V2, tf.diag(D2**-0.5)), V2, transpose_b=True) Tval = tf.matmul(tf.matmul(SigmaHat11RootInv, SigmaHat12), SigmaHat22RootInv) if use_all_singular_values: corr = tf.sqrt( tf.trace(tf.matmul(Tval, Tval, transpose_a=True))) else: [U, V] = tf.self_adjoint_eig( tf.matmul(Tval, Tval, transpose_a=True)) U = tf.gather_nd(U, tf.where(tf.greater(U, eps))) kk = tf.reshape(tf.cast(tf.shape(U), tf.int32), []) K = tf.minimum(kk, outdim_size) w, _ = tf.nn.top_k(U, k=K) corr = tf.reduce_sum(tf.sqrt(w)) return -corr
def train_model(): users = tf.placeholder(tf.int32, shape=[None]) items = tf.placeholder(tf.int32, shape=[None]) users_inputs = tf.placeholder(tf.int32, shape=[None, max_doc_length]) items_inputs = tf.placeholder(tf.int32, shape=[None, max_doc_length]) ratings = tf.placeholder(tf.float32, shape=[None, 1]) dropout_rate = tf.placeholder(tf.float32) text_embedding = tf.Variable(word_embedding_mtrx, dtype=tf.float32, name="review_text_embeds") padding_embedding = tf.Variable(np.zeros([1, word_latent_dim]), dtype=tf.float32) text_mask = tf.constant([1.0] * text_embedding.get_shape()[0] + [0.0]) # shape=(805794,) word_embeddings = tf.concat([text_embedding, padding_embedding], 0) # TensorShape([805794, 100]) word_embeddings = word_embeddings * tf.expand_dims( text_mask, -1 ) # padding_embedding和text_mask的作用? #expand_dims(text_mask, -1),增加-1位置的维度为1 user_reviews_representation = tf.nn.embedding_lookup( word_embeddings, users_inputs) user_reviews_representation_expnd = tf.expand_dims( user_reviews_representation, -1) # TensorShape([None, 300, 100, 1]) 因为这里users_inputs未知,所以第一个维度为None item_reviews_representation = tf.nn.embedding_lookup( word_embeddings, items_inputs) item_reviews_representation_expnd = tf.expand_dims( item_reviews_representation, -1) W_u = tf.Variable(tf.truncated_normal( [window_size, word_latent_dim, 1, num_filters], stddev=0.03), name="W_u") W_i = tf.Variable(tf.truncated_normal( [window_size, word_latent_dim, 1, num_filters], stddev=0.03), name="W_i") W_u1 = tf.Variable(tf.truncated_normal([num_filters, latent_dim], stddev=0.3), name="W_u1") W_i1 = tf.Variable(tf.truncated_normal([num_filters, latent_dim], stddev=0.3), name="W_i1") user, item = cnn(user_reviews_representation_expnd, item_reviews_representation_expnd, W_u, W_i, W_u1, W_i1, drop_rate) entity_embeds_sum = tf.concat([user, item], 1) #FM layer w_entity_0 = tf.Variable(tf.zeros(1), name="entity_w_0") w_entity_1 = tf.Variable(tf.truncated_normal([1, latent_dim * 2], stddev=0.03), name="entity_w_1") v_entity = tf.Variable(tf.truncated_normal([latent_dim * 2, v_dim], stddev=0.03), name="entity_v") J_e_1 = w_entity_0 + tf.matmul( entity_embeds_sum, w_entity_1, transpose_b=True) entity_embeds_sum_1 = tf.expand_dims(entity_embeds_sum, -1) entity_embeds_sum_2 = tf.expand_dims(entity_embeds_sum, 1) J_e_2 = tf.reduce_sum(tf.reduce_sum( tf.multiply(tf.matmul(entity_embeds_sum_1, entity_embeds_sum_2), tf.matmul(v_entity, v_entity, transpose_b=True)), 2), 1, keep_dims=True) J_e_3 = tf.trace( tf.multiply(tf.matmul(entity_embeds_sum_1, entity_embeds_sum_2), tf.matmul(v_entity, v_entity, transpose_b=True))) J_e_total = (J_e_1 + 0.5 * (J_e_2 - tf.expand_dims(J_e_3, -1))) predict_rating = J_e_total loss1 = tf.reduce_mean(tf.squared_difference(predict_rating, ratings)) lamda = lambda_1 * (tf.nn.l2_loss(user) + tf.nn.l2_loss(item) + tf.nn.l2_loss(v_entity)) loss = loss1 + lamda train_step = tf.train.RMSPropOptimizer(learning_rate).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for e in range(epochs): t = time() loss_total = 0.0 total_trainmse = 0.0 count = 0.0 for i in range(int(math.ceil(len(user_input) / float(batch_size)))): user_batch, item_batch, user_input_batch, item_input_batch, rates_batch = get_train_instance_batch_change( i, batch_size, user_input, item_input, rateings, user_reviews, item_reviews) _, loss_val, la, los1 = sess.run( [train_step, loss, lamda, loss1], feed_dict={ users: user_batch, items: item_batch, users_inputs: user_input_batch, items_inputs: item_input_batch, ratings: rates_batch, dropout_rate: drop_rate }) total_trainmse += los1 #pdb.set_trace() loss_total += loss_val count += 1.0 t1 = time() mses, maes = [], [] for i in range(len(user_input_test)): mses, maes = eval_model(users, items, users_inputs, items_inputs, dropout_rate, predict_rating, sess, user_tests[i], item_tests[i], user_input_test[i], item_input_test[i], rating_input_test[i], mses, maes) mse = np.array(mses).mean() mae = np.array(maes).mean() t2 = time() print( "epoch%d train time: %.3fs test time: %.3f loss = %.3f train_mse = %.3f testmse = %.3f testmae = %.3f" % (e, (t1 - t), (t2 - t1), loss_total / count, total_trainmse / count, mse, mae))