def do_matops(shape=(100,100)): A = uniform(shape) B = uniform(shape) C = matmul(A,B) D = matmul(B,A) eig(A);eig(B);eig(C);eig(D) svd(A);svd(B);svd(C);eig(D)
def compute_moments(self, x): ''' compute moments of output Gaussian distribution INPUTS: x - input OUTPUTS: mu_y - mean of output Gaussian distribution log_sig_sq_y - log variance of output Gaussian distribution ''' hidden1_pre = tfm.add(tfl.matmul(x, self.weights['W_x_to_h1']), self.weights['b_x_to_h1']) hidden_post = self.nonlinearity(hidden1_pre) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) mu_y = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_muy'.format(ni)]), self.weights['b_h{}_to_muy'.format(ni)]) mu_y = tf.nn.sigmoid(mu_y) log_sig_sq_y = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_sy'.format(ni)]), self.weights['b_h{}_to_sy'.format(ni)]) log_sig_sq_y = 100 * (tf.nn.sigmoid(log_sig_sq_y / 100) - 0.5) return mu_y, log_sig_sq_y
def compute_py(self, x): ''' compute probability for each class INPUTS: x - input OUTPUTS: py - histogram of probabilities for each class ''' hidden1_pre = tfm.add(tfl.matmul(x, self.weights['W_x_to_h1']), self.weights['b_x_to_h1']) hidden_post = self.nonlinearity(hidden1_pre) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) p_un = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_py'.format(ni)]), self.weights['b_h{}_to_py'.format(ni)]) p_un = tf.nn.sigmoid(p_un) + 1e-6 py = tfm.divide( p_un, tf.tile(tf.expand_dims(tfm.reduce_sum(p_un, axis=1), axis=1), [1, self.n_y])) return py
def compute_moments(self, x): ''' compute moments of latent Gaussian distribution INPUTS: x - conditional input OUTPUTS: mu_z - mean of latent Gaussian distribution log_sig_sq_z - log variance of latent Gaussian distribution ''' hidden1_pre = tfm.add(tfl.matmul(x, self.weights['W_x_to_h1']), self.weights['b_x_to_h1']) hidden_post = self.nonlinearity(hidden1_pre) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) mu_z = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_muz'.format(ni)]), self.weights['b_h{}_to_muz'.format(ni)]) log_sig_sq_z = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_sz'.format(ni)]), self.weights['b_h{}_to_sz'.format(ni)]) log_sig_sq_z = self.sig_lim * ( tf.nn.sigmoid(log_sig_sq_z / self.sig_lim) - 0.5) return mu_z, log_sig_sq_z
def objective(v): x_v, parts_v = (v[:T * o.dxA], v[T * o.dxA:]) omega_v, theta_v = (parts_v[:K * o.dxA], parts_v[K * o.dxA:]) # build omega and compute omega cost for k in range(K): omega_vk = omega_v[k * o.dxA:(k + 1) * o.dxA] omega[k] = matmul(omega0, ex(alg(G, omega_vk))) omegaCost[k] = tf.reduce_sum( mahalanobis2_tf(tf.expand_dims(omega_vk, 0), Wi)) # iterate through each time xPrev, QPrev_i = (x0, Sigma_x0i) thetaPrev = theta0 for t in range(T): # Make x_t x_vt = x_v[t * o.dxA:(t + 1) * o.dxA] x[t] = matmul(xPrev, ex(alg(G, x_vt))) # x_t cost xCost[t] = tf.reduce_sum( mahalanobis2_tf(tf.expand_dims(x_vt, 0), QPrev_i)) theta_vt = theta_v[t * K * o.dxA:(t + 1) * K * o.dxA] for k in range(K): # Make theta_tk s_tk = theta_vt[k * o.dxA:(k + 1) * o.dxA] s_tkRot = s_tk[o.dy:] s_tkTrans = s_tk[:o.dy] R_thetaPrev_k, d_thetaPrev_k = SED_tf.Rt(np2tf(thetaPrev[k])) R_theta_tk = matmul(R_thetaPrev_k, ex(alg(GRot, s_tkRot))) theta[t][k] = SED_tf.MakeRd(o, R_theta_tk, s_tkTrans) # theta_tk cost m_tk = matvec(Bi, (s_tkTrans - matvec(A, d_thetaPrev_k))) val = tf.concat([m_tk, s_tkRot], axis=0) thetaCost[t][k] = tf.reduce_sum( mahalanobis2_tf(tf.expand_dims(val, 0), Si[k])) lhs = SED_tf.inv(o, matmul(matmul(x[t], omega[k]), theta[t][k])) yPart = SED_tf.TransformPointsNonHomog(lhs, y_[t]) negDists[k] = -mahalanobis2_tf(yPart, Ei[k]) thetaCost_t[t] = tf.reduce_sum(thetaCost[t]) negDistsStacked = tf.stack(negDists) smoothMins = -tf.math.reduce_logsumexp(negDistsStacked, axis=0) obsCost[t] = tf.reduce_sum(smoothMins) # Set prevs xPrev = x[t] thetaPrev = theta[t] QPrev_i = Qi ## end time t totalCost = tf.reduce_sum(xCost) + tf.reduce_sum(omegaCost) + \ tf.reduce_sum(thetaCost_t) + tf.reduce_sum(obsCost) return totalCost
def compute_py(self, xl): ''' compute moments of output Gaussian distribution INPUTS: x - input OUTPUTS: mu_y - mean of output Gaussian distribution log_sig_sq_y - log variance of output Gaussian distribution ''' x, _ = NN_utils.reshape_and_extract(xl, self.sz_im) hidden_post = layers.tf_conv_layer(x, self.weights['W_x_to_h1'], self.weights['b_x_to_h1'], self.St[0], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) num_layers_1 = np.shape(self.N_h1)[0] - 1 for i in range(num_layers_1): ni = i + 2 hidden_post = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)], self.weights['b_h{}_to_h{}'.format(ni - 1, ni)], self.St[ni - 1], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) hidden_post = NN_utils.flatten(hidden_post) # print(tf.shape(hidden_post).numpy()) num_layers_F = np.shape(self.NF_h)[0] for i in range(num_layers_F): ni = ni + 1 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) # print(tf.shape(hidden_post).numpy()) p_un = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_py'.format(ni)]), self.weights['b_h{}_to_py'.format(ni)]) p_un = tf.nn.sigmoid(p_un) + 1e-6 py = tfm.divide( p_un, tf.tile(tf.expand_dims(tfm.reduce_sum(p_un, axis=1), axis=1), [1, self.n_y])) return py
def objective(w): omega = matmul(omega0, ex(alg(G, w))) omega_inv = SED_tf.inv(o, omega) lhs = [matmul(theta_ki[t], omega_inv) for t in range(T)] yPart = [ SED_tf.TransformPointsNonHomog(lhs[t], yObj[t])[:-1] for t in range(T) ] dists2 = [ tf.reduce_sum(mahalanobis2_tf(yPart[t], Ei)) for t in range(T) ] costDyn = tf.reduce_sum(mahalanobis2_tf(tf.expand_dims(w, 0), Wi)) return tf.reduce_sum(dists2) + costDyn
def call(self, h_it): # x -> <batch_size>,None,None,300 u_it = self.dense0(h_it) # <batch_size>,None,None,300 a_it = tf.math.softmax(linalg.matmul(u_it, self.u_w), axis=-2) # <batch_size>,None,None,300 s_t = tf.math.reduce_sum(tf.multiply(a_it, h_it), axis=-2, keepdims=False, name=None) # <batch_size>,None,300 return s_t
def batch_product(input_0, input_1): result = None for i in range(input_0.shape[0]): op = matmul(input_0[i], input_1) op = tf.expand_dims(op, 0) if result == None: result = op else: result = tf.concat([result, op], axis=0) return tf.squeeze(result, axis=2)
def optimize_omega(o, yk, x, theta_k, E_k, **kwargs): m = getattr(lie, o.lie) T = len(yk) # y_tn = x_t omega_k theta_tk epsilon_tn # => (x_t omega_k theta_tk)^{-1} = epsilon_tn # => theta_tk^{-1} omega_k^{-1} x_t^{-1} y_tn = epsilon_tn # => theta_tk^{-1} omega_k^{-1} x_t^{-1} y_tn ~ N(0, E) yObj = [ np2tf(SED.TransformPointsNonHomog(m.inv(x[t]), yk[t])) for t in range(T) ] theta_ki = [np2tf(m.inv(theta_k[t])) for t in range(T)] omega0 = np2tf(o.H_omega[1]) Wi = np2tf(np.linalg.inv(o.H_omega[2])) Ei = np2tf(np.linalg.inv(E_k)) G = generators_tf(o) w = tf.Variable(np2tf(kwargs.get('w_t', np.zeros(o.dxA)))) def objective(w): omega = matmul(omega0, ex(alg(G, w))) omega_inv = SED_tf.inv(o, omega) lhs = [matmul(theta_ki[t], omega_inv) for t in range(T)] yPart = [ SED_tf.TransformPointsNonHomog(lhs[t], yObj[t])[:-1] for t in range(T) ] dists2 = [ tf.reduce_sum(mahalanobis2_tf(yPart[t], Ei)) for t in range(T) ] costDyn = tf.reduce_sum(mahalanobis2_tf(tf.expand_dims(w, 0), Wi)) return tf.reduce_sum(dists2) + costDyn def grad(w): cost = tf.Variable(0.0) with tf.GradientTape() as tape: cost = objective(w) return cost, tape.gradient(cost, w) steps = kwargs.get('opt_steps', 10000) opt = tf.compat.v1.train.AdamOptimizer(learning_rate=0.1) prevCost = 1e6 for s in range(steps): cost, grads = grad(w) opt.apply_gradients([(grads, w)]) # print(f'{s:05}, cost: {cost.numpy():.2f}, w: {w.numpy()}') if np.abs(cost.numpy() - prevCost) < 1e-6: break else: prevCost = cost.numpy() omega_k = matmul(omega0, ex(alg(G, w))) return omega_k.numpy()
def call(self, inputs, states): C = self._getCKmatrix(self.kernel) y = states[0][:, :2] ydot = states[0][:, 2:] yddoti = self._fun(self.Minv, self.K, C, inputs, y, ydot) yi = y + self.A[0] * ydot * self.dt ydoti = ydot + self.A[0] * yddoti * self.dt fn = self._fun(self.Minv, self.K, C, inputs, yi, ydoti) for j in range(1, 4): yn = y + self.A[j] * ydot * self.dt ydotn = ydot + self.A[j] * yddoti * self.dt ydoti = concat([ydoti, ydotn], axis=0) fn = concat( [fn, self._fun(self.Minv, self.K, C, inputs, yn, ydotn)], axis=0) y = y + linalg.matmul(self.B, ydoti) * self.dt ydot = ydot + linalg.matmul(self.B, fn) * self.dt return y, [concat(([y, ydot]), axis=-1)]
def update_weights(self, v0, vk, ph0, phk, lr, momentum_coef, weight_decay, batch_size): """Learning step: update parameters Uses contrastive divergence algorithm as described in Parameters ---------- v0: Tensor initial visible state vk: Tensor final visible state ph0: Tensor hidden activation probabilities for v0 phk: Tensor hidden activation probabilities for vk lr: float learning rate momentum_coef: float coefficient to use for momentum weight_decay: float coefficient to use for weight decay batch_size: int size of each batch """ self.W_momentum *= momentum_coef self.W_momentum = tf.cast(self.W_momentum, tf.float32) self.W_momentum = self.W_momentum + matmul( tf.transpose(v0), ph0) - matmul(tf.transpose(vk), phk) self.h_bias_momentum *= momentum_coef self.h_bias_momentum += tf.math.reduce_sum((ph0 - phk), 0) self.v_bias_momentum *= momentum_coef self.v_bias_momentum += tf.math.reduce_sum((v0 - vk), 0) self.W = self.W + lr * self.W_momentum / batch_size self.h_bias = self.h_bias + lr * self.h_bias_momentum / batch_size self.v_bias = self.v_bias + lr * self.v_bias_momentum / batch_size self.W -= self.W * weight_decay # L2 weight decay
def compute_moments(self, z, constrain=True): ''' compute moments of input/output Gaussian distribution INPUTS: z - latent variable OPTIONAL INPUTS: constrain - whether to force the output mean to be between 0 and 1 OUTPUTS: mu_x - mean of output Gaussian distribution log_sig_sq_x - log variance of output Gaussian distribution ''' hidden1_pre = tfm.add(tfl.matmul(z, self.weights['W_z_to_h1']), self.weights['b_z_to_h1']) hidden_post = self.nonlinearity(hidden1_pre) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) mu_x = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_mux'.format(ni)]), self.weights['b_h{}_to_mux'.format(ni)]) if constrain == True: mu_x = tf.nn.sigmoid(mu_x) log_sig_sq_x = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_sx'.format(ni)]), self.weights['b_h{}_to_sx'.format(ni)]) log_sig_sq_x = self.sig_lim * ( tf.nn.sigmoid(log_sig_sq_x / self.sig_lim) - 0.5) return mu_x, log_sig_sq_x
def objective(qs_t): q_t, s_t = (qs_t[:o.dxA], qs_t[o.dxA:]) # make x_t, theta_tk for each k x_t = matmul(xPrev_, ex(alg(G, q_t))) theta_t = [[] for k in range(K)] for k in range(K): s_tk = s_t[k * o.dxA:(k + 1) * o.dxA] s_tkRot = s_tk[o.dy:] s_tkTrans = s_tk[:o.dy] R_theta_t = matmul(R_thetaPrev[k], ex(alg(GRot, s_tkRot))) theta_t[k] = SED_tf.MakeRd(o, R_theta_t, s_tkTrans) lhs = [ SED_tf.inv(o, matmul(matmul(x_t, omega_[k]), theta_t[k])) for k in range(K) ] yPart = [SED_tf.TransformPointsNonHomog(lhs[k], yt_) for k in range(K)] negDists = tf.stack( [-mahalanobis2_tf(yPart[k], Ei[k]) for k in range(K)]) smooth_mins = -tf.math.reduce_logsumexp(negDists, axis=0) cost = tf.reduce_sum(smooth_mins) # x dynamics cost_xDyn = tf.reduce_sum(mahalanobis2_tf(tf.expand_dims(q_t, 0), Qi)) # theta dynamics cost_thetaDyn = [[] for k in range(K)] for k in range(K): s_tk = s_t[k * o.dxA:(k + 1) * o.dxA] s_tkRot = s_tk[o.dy:] s_tkTrans = s_tk[:o.dy] m_tk = matvec(Bi, (s_tkTrans - matvec(A, d_thetaPrev[k]))) val = tf.concat([m_tk, s_tkRot], axis=0) cost_thetaDyn[k] = tf.reduce_sum( mahalanobis2_tf(tf.expand_dims(val, 0), Si[k])) return cost + cost_xDyn + tf.reduce_sum(cost_thetaDyn)
def TransformPointsNonHomog(x, y): R, d = Rt(x) ptsRot = tf.transpose(matmul(R, y, transpose_b=True)) ptsRotTrans = ptsRot + d return ptsRotTrans
def _fun(self, Minv, K, C, u, y, ydot): return linalg.matmul(u - linalg.matmul(ydot, C, transpose_b=True) - linalg.matmul(y, K, transpose_b=True), Minv, transpose_b=True)
def compute_moments(self, z, x, x2, constrain=True): ''' compute moments of output Gaussian distribution INPUTS: x - conditional input x2 - conditional input z - latent variable OPTIONAL INPUTS: constrain - whether to force the output mean to be between 0 and 1 OUTPUTS: mu_y - mean of output Gaussian distribution log_sig_sq_y - log variance of output Gaussian distribution ''' # Channel for latent variable alone hidden_pre_z = tfm.add(tfl.matmul(z, self.weights['W_z_to_h1z']), self.weights['b_z_to_h1z']) hidden_post_z = self.nonlinearity(hidden_pre_z) num_layers_middle_z = np.shape(self.N_hz)[0] - 1 for i in range(num_layers_middle_z): ni = i + 2 hidden_pre_z = tfm.add( tfl.matmul(hidden_post_z, self.weights['W_h{}z_to_h{}z'.format(ni - 1, ni)]), self.weights['b_h{}z_to_h{}z'.format(ni - 1, ni)]) hidden_post_z = self.nonlinearity(hidden_pre_z) # Channel for first conditional input alone hidden_pre_x = tfm.add(tfl.matmul(x, self.weights['W_x_to_h1x']), self.weights['b_x_to_h1x']) hidden_post_x = self.nonlinearity(hidden_pre_x) num_layers_middle_x = np.shape(self.N_hx)[0] - 1 for i in range(num_layers_middle_x): ni = i + 2 hidden_pre_x = tfm.add( tfl.matmul(hidden_post_x, self.weights['W_h{}x_to_h{}x'.format(ni - 1, ni)]), self.weights['b_h{}x_to_h{}x'.format(ni - 1, ni)]) hidden_post_x = self.nonlinearity(hidden_pre_x) # Channel for second conditional input alone hidden_pre_x2 = tfm.add(tfl.matmul(x2, self.weights['W_x2_to_h1x2']), self.weights['b_x2_to_h1x2']) hidden_post_x2 = self.nonlinearity(hidden_pre_x2) num_layers_middle_x2 = np.shape(self.N_hx2)[0] - 1 for i in range(num_layers_middle_x2): ni = i + 2 hidden_pre_x2 = tfm.add( tfl.matmul(hidden_post_x2, self.weights['W_h{}x2_to_h{}x2'.format(ni - 1, ni)]), self.weights['b_h{}x2_to_h{}x2'.format(ni - 1, ni)]) hidden_post_x2 = self.nonlinearity(hidden_pre_x2) hidden_post = tf.concat([hidden_post_z, hidden_post_x, hidden_post_x2], 1) # Channel after combining the inputs hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h0_to_h1']), self.weights['b_h0_to_h1']) hidden_post = self.nonlinearity(hidden_pre) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) mu_y = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_muy'.format(ni)]), self.weights['b_h{}_to_muy'.format(ni)]) if constrain == True: mu_y = tf.nn.sigmoid(mu_y) log_sig_sq_y = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_sy'.format(ni)]), self.weights['b_h{}_to_sy'.format(ni)]) log_sig_sq_y = self.sig_lim * ( tf.nn.sigmoid(log_sig_sq_y / self.sig_lim) - 0.5) return mu_y, log_sig_sq_y
def compute_moments(self, y, x, x2): ''' compute moments of latent Gaussian distribution INPUTS: x - conditional input y - output to encode OUTPUTS: mu_z - mean of output Gaussian distribution log_sig_sq_z - log variance of output Gaussian distribution ''' # Channel for input/output alone hidden_pre_y = tfm.add(tfl.matmul(y, self.weights['W_y_to_h1y']), self.weights['b_y_to_h1y']) hidden_post_y = self.nonlinearity(hidden_pre_y) num_layers_middle_y = np.shape(self.N_hy)[0] - 1 for i in range(num_layers_middle_y): ni = i + 2 hidden_pre_y = tfm.add( tfl.matmul(hidden_post_y, self.weights['W_h{}y_to_h{}y'.format(ni - 1, ni)]), self.weights['b_h{}y_to_h{}y'.format(ni - 1, ni)]) hidden_post_y = self.nonlinearity(hidden_pre_y) # Channel for conditional input alone hidden_pre_x = tfm.add(tfl.matmul(x, self.weights['W_x_to_h1x']), self.weights['b_x_to_h1x']) hidden_post_x = self.nonlinearity(hidden_pre_x) num_layers_middle_x = np.shape(self.N_hx)[0] - 1 for i in range(num_layers_middle_x): ni = i + 2 hidden_pre_x = tfm.add( tfl.matmul(hidden_post_x, self.weights['W_h{}x_to_h{}x'.format(ni - 1, ni)]), self.weights['b_h{}x_to_h{}x'.format(ni - 1, ni)]) hidden_post_x = self.nonlinearity(hidden_pre_x) # Channel for second conditional input alone hidden_pre_x2 = tfm.add(tfl.matmul(x2, self.weights['W_x2_to_h1x2']), self.weights['b_x2_to_h1x2']) hidden_post_x2 = self.nonlinearity(hidden_pre_x2) num_layers_middle_x2 = np.shape(self.N_hx2)[0] - 1 for i in range(num_layers_middle_x2): ni = i + 2 hidden_pre_x2 = tfm.add( tfl.matmul(hidden_post_x2, self.weights['W_h{}x2_to_h{}x2'.format(ni - 1, ni)]), self.weights['b_h{}x2_to_h{}x2'.format(ni - 1, ni)]) hidden_post_x2 = self.nonlinearity(hidden_pre_x2) hidden_post = tf.concat([hidden_post_y, hidden_post_x, hidden_post_x2], 1) # Channel after combining the inputs hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h0_to_h1']), self.weights['b_h0_to_h1']) hidden_post = self.nonlinearity(hidden_pre) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) mu_z = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_muz'.format(ni)]), self.weights['b_h{}_to_muz'.format(ni)]) log_sig_sq_z = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_sz'.format(ni)]), self.weights['b_h{}_to_sz'.format(ni)]) log_sig_sq_z = self.sig_lim * ( tf.nn.sigmoid(log_sig_sq_z / self.sig_lim) - 0.5) return mu_z, log_sig_sq_z
def compute_moments(self, xl): ''' compute moments of output Gaussian distribution INPUTS: x - input OUTPUTS: mu_y - mean of output Gaussian distribution log_sig_sq_y - log variance of output Gaussian distribution ''' x, l = NN_utils.reshape_and_extract(xl, self.sz_im) hidden_post = layers.tf_conv_layer(x, self.weights['W_x_to_h1'], self.weights['b_x_to_h1'], self.St[0], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) num_layers_1 = np.shape(self.N_h1)[0] - 1 for i in range(num_layers_1): ni = i + 2 hidden_post = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)], self.weights['b_h{}_to_h{}'.format(ni - 1, ni)], self.St[ni - 1], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) hidden_post = NN_utils.flatten(hidden_post) hidden_post = tf.concat([hidden_post, l], axis=1) # print(tf.shape(hidden_post).numpy()) num_layers_F = np.shape(self.NF_h)[0] for i in range(num_layers_F): ni = ni + 1 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) # print(tf.shape(hidden_post).numpy()) hidden_post = NN_utils.reshape_to_images(hidden_post, self.Sz2[0, :]) # print(tf.shape(hidden_post).numpy()) num_layers_2 = np.shape(self.N_h2)[0] for i in range(num_layers_2): ni = ni + 1 hidden_post = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)], self.weights['b_h{}_to_h{}'.format(ni - 1, ni)], self.St[ni - 1], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) mu_y = layers.tf_conv_layer(hidden_post, self.weights['W_h{}_to_muy'.format(ni)], self.weights['b_h{}_to_muy'.format(ni)], 1, self.nonlinearity) mu_y = tf.nn.sigmoid(mu_y) log_sig_sq_y = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_sy'.format(ni)], self.weights['b_h{}_to_sy'.format(ni)], 1, self.nonlinearity) log_sig_sq_y = 100 * (tf.nn.sigmoid(log_sig_sq_y / 100) - 0.5) mu_y = NN_utils.flatten(mu_y) mu_y = tf.concat([mu_y, tf.zeros([tf.shape(mu_y)[0], 1])], axis=1) log_sig_sq_y = NN_utils.flatten(log_sig_sq_y) log_sig_sq_y = tf.concat( [log_sig_sq_y, tf.zeros([tf.shape(log_sig_sq_y)[0], 1])], axis=1) return mu_y, log_sig_sq_y
def call(self, input): return matmul(input, self.kernel)
def call(self, _input): alpha = tf.nn.softmax(batch_product(_input, self.bin_context_vector), axis=1) batch_size, source_length, _ = _input.shape alpha = tf.expand_dims(alpha, 2).reshape(batch_size, -1, source_length) return matmul(alpha, _input), alpha
def optimize_t(o, yt, xPrev, omega, thetaPrev, E, S, Q, **kwargs): # jointly optimize x[t], theta[t,k] m = getattr(lie, o.lie) t = len(yt) K = E.shape[0] omega_ = [np2tf(omega[k]) for k in range(K)] omega_inv = [np2tf(m.inv(omega[k])) for k in range(K)] xPrev_ = np2tf(xPrev) thetaPrev_ = np2tf(thetaPrev) R_thetaPrev, d_thetaPrev = zip( *[SED_tf.Rt(np2tf(thetaPrev[k])) for k in range(K)]) Ei = [np2tf(np.linalg.inv(E[k])) for k in range(K)] Si = [np2tf(np.linalg.inv(S[k])) for k in range(K)] Qi = np2tf(np.linalg.inv(Q)) yt_ = np2tf(yt) Bi = np2tf(o.Bi) A = np2tf(o.A) G = generators_tf(o) GRot = G[o.dy:, :-1, :-1] qs_t = tf.Variable(np2tf(kwargs.get('qs_t', np.zeros((K + 1) * o.dxA)))) def objective(qs_t): q_t, s_t = (qs_t[:o.dxA], qs_t[o.dxA:]) # make x_t, theta_tk for each k x_t = matmul(xPrev_, ex(alg(G, q_t))) theta_t = [[] for k in range(K)] for k in range(K): s_tk = s_t[k * o.dxA:(k + 1) * o.dxA] s_tkRot = s_tk[o.dy:] s_tkTrans = s_tk[:o.dy] R_theta_t = matmul(R_thetaPrev[k], ex(alg(GRot, s_tkRot))) theta_t[k] = SED_tf.MakeRd(o, R_theta_t, s_tkTrans) lhs = [ SED_tf.inv(o, matmul(matmul(x_t, omega_[k]), theta_t[k])) for k in range(K) ] yPart = [SED_tf.TransformPointsNonHomog(lhs[k], yt_) for k in range(K)] negDists = tf.stack( [-mahalanobis2_tf(yPart[k], Ei[k]) for k in range(K)]) smooth_mins = -tf.math.reduce_logsumexp(negDists, axis=0) cost = tf.reduce_sum(smooth_mins) # x dynamics cost_xDyn = tf.reduce_sum(mahalanobis2_tf(tf.expand_dims(q_t, 0), Qi)) # theta dynamics cost_thetaDyn = [[] for k in range(K)] for k in range(K): s_tk = s_t[k * o.dxA:(k + 1) * o.dxA] s_tkRot = s_tk[o.dy:] s_tkTrans = s_tk[:o.dy] m_tk = matvec(Bi, (s_tkTrans - matvec(A, d_thetaPrev[k]))) val = tf.concat([m_tk, s_tkRot], axis=0) cost_thetaDyn[k] = tf.reduce_sum( mahalanobis2_tf(tf.expand_dims(val, 0), Si[k])) return cost + cost_xDyn + tf.reduce_sum(cost_thetaDyn) def grad(qs_t): cost = tf.Variable(0.0) with tf.GradientTape() as tape: cost = objective(qs_t) return cost, tape.gradient(cost, qs_t) steps = kwargs.get('opt_steps', 10000) learning_rate = kwargs.get('learning_rate', 0.1) # opt = tf.compat.v1.train.AdamOptimizer(learning_rate=0.1) opt = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate) prevCost = 1e6 for s in range(steps): cost, grads = grad(qs_t) opt.apply_gradients([(grads, qs_t)]) # print(f'{s:05}, cost: {cost.numpy():.2f}, qs_t: {qs_t.numpy()}') if np.abs(cost.numpy() - prevCost) < 1e-6: break else: prevCost = cost.numpy() # omega_k = matmul(omega0, ex(alg(G, w))) # return omega_k.numpy() q_t, s_t = (qs_t[:o.dxA], qs_t[o.dxA:]) x_t = matmul(xPrev_, ex(alg(G, q_t))).numpy() theta_t = [[] for k in range(K)] for k in range(K): s_tk = s_t[k * o.dxA:(k + 1) * o.dxA] s_tkRot = s_tk[o.dy:] s_tkTrans = s_tk[:o.dy] R_theta_t = matmul(R_thetaPrev[k], ex(alg(GRot, s_tkRot))) theta_t[k] = SED_tf.MakeRd(o, R_theta_t, s_tkTrans).numpy() return x_t, np.stack(theta_t), cost.numpy()