def test_kron_sequence_pairwise_quadratic_form(self): """Tests `_kron_sequence_pairwise_quadratic_form` function. """ # Matrix batch kron_mat_init = t3f.initializers.random_matrix_batch(((6, 7), (6, 7)), tt_rank=1, batch_size=5) kron_mat = t3f.get_variable('kron_mat', initializer=kron_mat_init) # Vector batch seq_lens, sum_len, max_len = tf.constant([2, 4, 3]), 9, tf.constant(6) kron_vec_init = t3f.initializers.random_matrix_batch(((6, 7), (1, 1)), tt_rank=1, batch_size=sum_len) kron_vec = t3f.get_variable('kron_vec', initializer=kron_vec_init) ans_ = misc._kron_sequence_pairwise_quadratic_form(kron_mat, kron_vec, seq_lens, max_len) init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) ans = sess.run(ans_) # compute answer with numpy mat_full = sess.run(t3f.ops.full(kron_mat)) vec_full = sess.run(t3f.ops.full(kron_vec)) vec_full = vec_full.reshape([vec_full.shape[0], -1]) for i, seq_len in enumerate(sess.run(seq_lens)): cur_seq = vec_full[:seq_len, :] vec_full = vec_full[seq_len:, :] ans_np = np.einsum('vi,mij,uj->mvu', cur_seq, mat_full, cur_seq) self.assertAllClose(ans_np, ans[:, i, :seq_len, :seq_len], atol=1e-3)
def build(self, input_shape): if self.init == 'glorot': initializer = t3f.glorot_initializer(self.tt_shape, tt_rank=self.tt_rank) elif self.init == 'he': initializer = t3f.he_initializer(self.tt_shape, tt_rank=self.tt_rank) elif self.init == 'lecun': initializer = t3f.lecun_initializer(self.tt_shape, tt_rank=self.tt_rank) else: raise ValueError('Unknown init "%s", only %s are supported' % (self.init, inits)) name = 'tt_dense_matrix_{}'.format(TTDense.counter) self.W = t3f.get_variable(name, initializer=initializer) self.b = None if self.bias: b_name = 'tt_dense_b_{}'.format(TTDense.counter) b_init = tf.constant_initializer(self.bias_init) self.b = tf.get_variable(b_name, shape=self.output_dim, initializer=b_init) TTDense.counter += 1 self.trainable_weights = list(self.W.tt_cores) if self.b is not None: self.trainable_weights.append(self.b)
def build(self, input_shape): if self.kernel_initializer == 'glorot': initializer = t3f.glorot_initializer(self.tt_shape, tt_rank=self.tt_rank) elif self.kernel_initializer == 'he': initializer = t3f.he_initializer(self.tt_shape, tt_rank=self.tt_rank) elif self.kernel_initializer == 'lecun': initializer = t3f.lecun_initializer(self.tt_shape, tt_rank=self.tt_rank) else: raise ValueError('Unknown kernel_initializer "%s", only "glorot",' '"he", and "lecun" are supported' % self.kernel_initializer) name = 'tt_dense_{}'.format(self.counter) with tf.variable_scope(name): self.matrix = t3f.get_variable('matrix', initializer=initializer) self.b = None if self.use_bias: b_init = tf.constant_initializer(self.bias_initializer) self.b = tf.get_variable('bias', shape=self.output_dim, initializer=b_init) self._trainable_weights = list(self.matrix.tt_cores) if self.b is not None: self._trainable_weights.append(self.b)
def __init__(self, rank, tt_shape, shape_out, bias_initializer=None, regularizer=None, use_bias=True): import t3f self.use_bias = use_bias self.tt_shape = tt_shape initializer = t3f.lecun_initializer(self.tt_shape, tt_rank=rank) # initializer = t3f.he_initializer(self.tt_shape, tt_rank=rank) # initializer = t3f.glorot_initializer(self.tt_shape, tt_rank=rank) self.weights = t3f.get_variable("tensor_train", initializer=initializer, regularizer=regularizer, trainable=True) if bias_initializer is None: bias_initializer = tf.constant_initializer(0.01) if use_bias: self.bias = tf.get_variable(name="bias", shape=[shape_out], initializer=bias_initializer, regularizer=regularizer)
def __init__(self, input_dims, output_dims, tt_rank=2, activation=None, use_bias=True, kernel_initializer='glorot', bias_initializer=0.1, **kwargs): """Creates a TT-Matrix based Dense Keras layer. Args: input_dims: an array, tensor shape of the matrix row index ouput_dims: an array, tensor shape of the matrix column index tt_rank: a number or an array, desired tt-rank of the TT-Matrix activation: [None] string or None, specifies the activation function. use_bias: bool, whether to use bias kernel_initializer: string specifying initializer for the TT-Matrix. Possible values are 'glorot', 'he', and 'lecun'. bias_initializer: a number, initialization value of the bias Returns: Layer object corresponding to multiplication by a TT-Matrix followed by addition of a bias and applying an elementwise activation Raises: ValueError if the provided activation or kernel_initializer is unknown. """ self.counter = next(self._counter) self.tt_shape = [input_dims, output_dims] self.output_dim = np.prod(output_dims) self.tt_rank = tt_rank self.activation = activation self.use_bias = use_bias self.kernel_initializer = kernel_initializer self.bias_initializer = bias_initializer name = 'tt_dense_{}'.format(self.counter) if self.kernel_initializer == 'glorot': initializer = t3f.glorot_initializer(self.tt_shape, tt_rank=self.tt_rank) elif self.kernel_initializer == 'he': initializer = t3f.he_initializer(self.tt_shape, tt_rank=self.tt_rank) elif self.kernel_initializer == 'lecun': initializer = t3f.lecun_initializer(self.tt_shape, tt_rank=self.tt_rank) else: raise ValueError('Unknown kernel_initializer "%s", only "glorot",' '"he", and "lecun" are supported' % self.kernel_initializer) self.matrix = t3f.get_variable('matrix', initializer=initializer) self._tt_cores = self.matrix.tt_cores self.b = None if self.use_bias: self.b = tf.Variable(self.bias_initializer * tf.ones( (self.output_dim, ))) super(KerasDense, self).__init__(name=name, **kwargs)
def _get_mu(self, ranks, x, y): """Initializes latent inputs expectations mu. Either loads pretrained values of tt-cores of mu, or initializes it according to optimal formulas from the given data. Args: ranks: tt-ranks of mu x: features of a batch of objects y: targets of a batch of objects """ # TODO: test if this is needed. w = self.inputs.interpolate_on_batch(self.cov.project(x)) Sigma = ops.tt_tt_matmul(self.sigma_l, ops.transpose(self.sigma_l)) temp = ops.tt_tt_matmul(w, y) anc = ops.tt_tt_matmul(Sigma, temp) res = TensorTrain([core[0, :, :, :, :] for core in anc.tt_cores], tt_ranks=[1]*(anc.ndims()+1)) res = res for i in range(1, anc.get_shape()[0]): elem = TensorTrain([core[i, :, :, :, :] for core in anc.tt_cores], tt_ranks=[1]*(anc.ndims()+1)) res = ops.add(res, elem) mu_ranks = [1] + [ranks] * (res.ndims() - 1) + [1] return t3f.get_variable('tt_mu', initializer=TensorTrain(res.tt_cores, res.get_raw_shape(), mu_ranks))
def _get_sigma_ls(self): """Initialize covariance matrix of var distribution over unary potentials. """ cov = self.cov inputs_dists = self.inputs_dists K_mm = cov.kron_cov(inputs_dists) return t3f.get_variable('sigma_ls', initializer=kron.cholesky(K_mm))
def _get_mus(self, mu_ranks): """Initialize expectations of var distribution over unary potentials. Args: mu_ranks: TT-ranks of mus. """ # TODO: is this a good initialization? x_init = tf.random_normal([mu_ranks, self.d], dtype=tf.float64) y_init = tf.random_normal([mu_ranks], dtype=tf.float64) w = self.inputs.interpolate_on_batch(x_init) y_init_cores = [tf.reshape(y_init, (-1, 1, 1, 1, 1))] for core_idx in range(1, w.ndims()): y_init_cores += [tf.ones((mu_ranks, 1, 1, 1, 1), dtype=tf.float64)] y_init = t3f.TensorTrainBatch(y_init_cores) Sigma = ops.tt_tt_matmul(self.sigma_ls[0], ops.transpose(self.sigma_ls[0])) res_batch = t3f.tt_tt_matmul(Sigma, t3f.tt_tt_matmul(w, y_init)) res = res_batch[0] for i in range(1, mu_ranks): res = res + res_batch[i] mu_ranks = [1] + [mu_ranks] * (res.ndims() - 1) + [1] mu_cores = [] for core in res.tt_cores: mu_cores.append( tf.tile(core[None, ...], [self.n_labels, 1, 1, 1, 1])) return t3f.get_variable('tt_mus', initializer=TensorTrainBatch( mu_cores, res.get_raw_shape(), mu_ranks))
def build(self, input_shape): q_init = t3f.random_tensor(shape=self.tt_shape, tt_rank=self.tt_rank, stddev=1e-3) q_init = t3f.cast(q_init, dtype=tf.float32) self.Q = t3f.get_variable('Q', initializer=q_init) self._trainable_weights = list(self.Q.tt_cores) self.built = True
def _get_sigma_l(self): """Initializes latent inputs covariance Sigma_l. """ shapes = self.inputs.npoints cov = self.cov inputs_dists = self.inputs_dists K_mm = cov.kron_cov(inputs_dists) return t3f.get_variable('sigma_l', initializer=kron.cholesky(K_mm))
def __init__(self, num_actions, num_colors=2, state_shape=[8, 8, 3], tt_rank=24, optimizer=tf.train.AdamOptimizer(2.5e-4), dtype=tf.float32, scope="qqtt_network", reuse=False): input_shape = np.prod(state_shape) * [ num_colors, ] + [ num_actions, ] with tf.variable_scope(scope, reuse=reuse): # random initialization of Q-tensor q0init = t3f.random_tensor(shape=input_shape, tt_rank=tt_rank, stddev=1e-3) q0init = t3f.cast(q0init, dtype=dtype) q0 = t3f.get_variable('Q', initializer=q0init) self.input_states = tf.placeholder(dtype=tf.int32, shape=[None] + state_shape) self.input_actions = tf.placeholder(dtype=tf.int32, shape=[None]) self.input_targets = tf.placeholder(dtype=dtype, shape=[None]) reshaped_s = tf.reshape(self.input_states, (-1, np.prod(state_shape))) reshaped_a = tf.reshape(self.input_actions, (-1, 1)) input_s_and_a = tf.concat([reshaped_s, reshaped_a], axis=1) self.q_selected = t3f.gather_nd(q0, input_s_and_a, dtype=dtype) reshaped_s_ = tf.reshape(self.input_states, [-1] + state_shape) # some shitty code s_a_idx = tf.concat(num_actions * [reshaped_s], axis=0) actions_range = tf.range(start=0, limit=num_actions) a_idx = self.tf_repeat(actions_range, tf.shape(self.input_states)[0:1]) s_a_idx = tf.concat([s_a_idx, a_idx], axis=1) vals = t3f.gather_nd(q0, s_a_idx, dtype=dtype) self.q_values = tf.transpose( tf.reshape(vals, shape=(num_actions, -1))) # shitty code ends here self.q_argmax = tf.argmax(self.q_values, axis=1) self.q_max = tf.reduce_max(self.q_values, axis=1) self.loss = tf.losses.huber_loss(self.q_selected, self.input_targets) self.update_model = optimizer.minimize(self.loss)
def _get_mus(self, ranks, x_init, y_init): w = self.inputs.interpolate_on_batch(self.cov.project(x_init)) Sigma = ops.tt_tt_matmul(self.sigma_ls[0], ops.transpose(self.sigma_ls[0])) temp = ops.tt_tt_matmul(w, y_init) anc = ops.tt_tt_matmul(Sigma, temp) res = TensorTrain([core[0, :, :, :, :] for core in anc.tt_cores], tt_ranks=[1]*(anc.ndims()+1)) res = res for i in range(1, anc.get_shape()[0]): elem = TensorTrain([core[i, :, :, :, :] for core in anc.tt_cores], tt_ranks=[1]*(anc.ndims()+1)) res = ops.add(res, elem) mu_ranks = [1] + [ranks] * (res.ndims() - 1) + [1] mu_cores = [] for core in res.tt_cores: mu_cores.append(tf.tile(core[None, ...], [self.n_class, 1, 1, 1, 1])) return t3f.get_variable('tt_mus', initializer=TensorTrainBatch(mu_cores, res.get_raw_shape(), mu_ranks))
def build(self, input_shape): if self.kernel_initializer == 'glorot': initializer = t3f.glorot_initializer(self.tt_shape, tt_rank=self.tt_rank) elif self.kernel_initializer == 'he': initializer = t3f.he_initializer(self.tt_shape, tt_rank=self.tt_rank) elif self.kernel_initializer == 'lecun': initializer = t3f.lecun_initializer(self.tt_shape, tt_rank=self.tt_rank) else: raise ValueError('Unknown kernel_initializer "%s", only "glorot",' '"he", and "lecun" are supported' % self.kernel_initializer) name = 'tt_dense_{}'.format(self.counter) with tf.variable_scope(name): self.matrix = t3f.get_variable('matrix', initializer=initializer) cores_ = [] for i, v in enumerate(self.matrix.tt_cores): def _initializer(*args, **kwargs): return v.initialized_value() cores_.append( self.add_weight('%d' % i, shape=v.shape, trainable=True, dtype=tf.float32, initializer=_initializer)) self.matrix = t3f.TensorTrain(cores_) self.b = None if self.use_bias: b_init = tf.constant_initializer(self.bias_initializer) self.b = self.add_weight('bias', shape=(self.output_dim, ), initializer=b_init)
def LQ(niters, batch_size, lr, rank, X_data, Y_data, model): tf.reset_default_graph() sess = tf.Session() losses = [] if model == 'riemt3f': X = tf.placeholder(tf.float32, [None, dx]) Y = tf.placeholder(tf.float32, [None, dy]) #### T3F RGD #### print('Starting T3F RGD...') # using riemannian projection implemented by t3f, compute a separate update # this requires projection/rounding which should be computationally intensive initializer = t3f.glorot_initializer([nx, ny], tt_rank=rank) W_t3f_rgd = t3f.get_variable('W_t3f_rgd', initializer=initializer) cost_t3f_rgd = tf.reduce_mean(0.5 * tf.square(Y - t3f.matmul(X, W_t3f_rgd))) # least squares derivative grad = t3f.to_tt_matrix(tf.matmul( tf.transpose(Y - t3f.matmul(X, W_t3f_rgd)), -1 * X), shape=[nx, ny], max_tt_rank=rank) riemannian_grad = t3f.riemannian.project(grad, W_t3f_rgd) # norm_t3f_rgd = t3f.frobenius_norm(riemannian_grad, epsilon=1e-10) ### HARD CODED SLOWER RATE HERE BC OF DIVERGENCE train_step = t3f.assign( W_t3f_rgd, t3f.round(W_t3f_rgd - 0.1 * lr * riemannian_grad, max_tt_rank=rank)) sess.run(tf.global_variables_initializer()) nparams = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ]) print('Total number of parameters: ', nparams) t0 = time.time() # while(sess.run(tf.less(mingradnorm, norm_t3f_rgd), feed_dict={X: X_data, Y: Y_data})): i = 0 while (i <= niters): i = i + 1 x_mb, y_mb = next_batch(X_data, Y_data, batch_size) _, tmp = sess.run([train_step.op, cost_t3f_rgd], feed_dict={ X: x_mb, Y: y_mb }) losses.append(tmp) # print(sess.run(norm_t3f_rgd, feed_dict={X: X_data, Y: Y_data})) print(i, tmp) if tmp < mincost or np.isnan(tmp): break t1 = time.time() myT = t1 - t0 elif model == 'ott': X = tf.placeholder(tf.float32, [None, dx]) Y = tf.placeholder(tf.float32, [None, dy]) # #### Own EOTT GD #### print('Starting OTT...') W_EOTT_gd = aOTTtfVariable(shape=[ny, nx], r=rank) cost_eott_gd = tf.reduce_mean( 0.5 * tf.square(Y - tf.transpose(W_EOTT_gd.mult(tf.transpose(X))))) opt = tf.train.GradientDescentOptimizer(learning_rate=1.0) # Manifold Update gW1 = opt.compute_gradients(cost_eott_gd, W_EOTT_gd.getQ()) man_update = [v.assign(gradStep(X=v, G=g, lr=lr)) for g, v in gW1] t0 = time.time() sess.run(tf.global_variables_initializer()) nparams = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ]) print('Total number of parameters: ', nparams) wopt = sess.run(W_EOTT_gd.getQ()) i = 0 while (i <= niters): i = i + 1 x_mb, y_mb = next_batch(X_data, Y_data, batch_size) _, tmp = sess.run([man_update, cost_eott_gd], feed_dict={ X: x_mb, Y: y_mb }) # _, tmp = sess.run([Eucupdate, cost_eott_gd], feed_dict={X: x_mb, Y: y_mb}) losses.append(tmp) print(i, tmp) if tmp < mincost or np.isnan(tmp): break t1 = time.time() myT = t1 - t0 else: print('what model is that? unknown') return t1 = time.time() print('Took seconds:', myT) return myT, losses
import pickle import argparse import tensorflow as tf from tensorflow.python.client import device_lib import t3f parser = argparse.ArgumentParser( description='Measure execution time of various t3f operations.') parser.add_argument('--file_path', help='Path to the file to save logs.') args = parser.parse_args() # Matvec. shape = 10 * np.ones(10, dtype=int) matrices = t3f.random_matrix_batch((shape, shape), 10, batch_size=100) matrices = t3f.cast(matrices, tf.float64) one_matrix = t3f.get_variable('one_matrix', initializer=matrices[0]) matrices = t3f.get_variable('matrices', initializer=matrices) vecs = t3f.random_matrix_batch((shape, None), 10, batch_size=100) vecs = t3f.cast(vecs, tf.float64) one_vec = t3f.get_variable('one_vec', initializer=vecs[0]) vecs = t3f.get_variable('vecs', initializer=vecs) vecs100 = t3f.random_matrix_batch((shape, None), 100, batch_size=100) vecs100 = t3f.cast(vecs100, tf.float64) one_vec100 = t3f.get_variable('one_vec100', initializer=vecs100[0]) vecs100 = t3f.get_variable('vecs100', initializer=vecs100) sess = tf.Session() sess.run(tf.global_variables_initializer()) print(device_lib.list_local_devices()) logs = {} matvec_op = t3f.matmul(one_matrix, one_vec).op
def _get_sigma_ls(self): cov = self.cov inputs_dists = self.inputs_dists K_mm = cov.kron_cov(inputs_dists) return t3f.get_variable('sigma_ls', initializer=kron.cholesky(K_mm))