def fit(self, data_train, *args): if self.preprocessing: data_train = self.preprocessing(data_train, *args) with tf.Graph().as_default() as _: # create sampler and variables variables, sampler = machine_reading_sampler(data_train, batch_size=None) # main graph objective, _, _ = embedding_updater_model(variables, rank=self.rank, n_ents=self.n_ents, n_slots=self.n_slots, reg=self.reg) # tf_debug_gradient(emb0, objective, verbose=False) # This creates new variables... # train the model optimizer = tf.train.AdamOptimizer(learning_rate=0.1) hooks = [] if self.verbose: hooks += [lambda it, e, xy, f: it and ((it % 100) == 0 or it is 1) and print("%d) loss=%f" % (it, f[0]))] self.params = learn(objective, sampler, optimizer=optimizer, hooks=hooks, max_epochs=self.max_epochs)
def test_matrix_factorization(verbose=False): np.random.seed(1) n, m, rank = 7, 6, 3 mat = np.random.randn(n, rank).dot(np.random.randn(rank, m)) tuples = [([i, n + j], mat[i, j]) for i in range(n) for j in range(m)] tuple_iterable = data_to_batches(tuples, minibatch_size=n * m) sampler, (x, y) = feed_dict_sampler(tuple_iterable, types=[np.int64, np.float32]) emb_var = tf.Variable(tf.cast(np.random.randn(n + m, rank), 'float32')) offset = tf.Variable(tf.cast(1.0, 'float32')) loss_op = tf.reduce_mean(tf.square(tf.reduce_sum(tf.reduce_prod(tf.gather(emb_var, x), 1), 1) + offset - y)) emb, offset_val = learn(loss_op, sampler, max_epochs=200, variables=[emb_var, offset]) mat_est = emb[:n, :].dot(emb[n:, :].T) if verbose: print(np.linalg.norm(mat_est - mat) ** 2) # we should have recovered the low-rank matrix else: assert (np.linalg.norm(mat_est - mat) < 1e-3)
def test_multitask_learning(verbose=False): # factorization of the parameters with multiple linear regressions np.random.seed(1) n_data, n_features, n_labels, rank = 6, 5, 4, 3 batch_size = n_data n_emb = 1 + n_features + n_labels x_mat = np.random.randn(n_data, n_features) w_mat = np.random.randn(n_features, rank).dot(np.random.randn(rank, n_labels)) y_mat = x_mat.dot(w_mat) if False: x = tf.placeholder(np.float32, [batch_size, n_features], name='answer') y = tf.placeholder(np.float32, [batch_size, n_labels], name='answer') sampler = placeholder_feeder((x, y), [(x_mat, y_mat)]) # only one sample emb = tf.Variable(tf.cast(np.random.randn(n_features + n_labels, rank), 'float32')) w = tf.matmul(emb[:n_features, :], emb[n_features:, :], transpose_b=True) preds = tf.matmul(x, w) objective = tf.reduce_sum(tf.square(preds - y)) hooks = [lambda it, e, xy, f: it and ((it % 10) == 0) and print("%d) loss=%f" % (it, f))] emb_val, = learn(objective, sampler, hooks=hooks, max_epochs=2000, variables=[emb]) mat_est = x_mat.dot(emb_val[:n_features, :]).dot(emb_val[n_features:, :].T) else: data = [] for i in range(n_data): data.append(([((NEW_ID, j + 1), x_mat[i, j]) for j in range(n_features)], [((NEW_ID, k + 1 + n_features), y_mat[i, k]) for k in range(n_labels)])) data_arr = vectorize_samples(data) batches = data_to_batches(data_arr, minibatch_size=batch_size) qc = tf.placeholder(np.int64, [batch_size, n_features, 2], name='question_in_context') yc = tf.placeholder(np.float32, [batch_size, n_features], name='answer_in_context') q = tf.placeholder(np.int64, [batch_size, n_labels, 2], name='question') y = tf.placeholder(np.float32, [batch_size, n_labels], name='answer') sampler = placeholder_feeder((qc, yc, q, y), batches) [print(s) for s in sampler] emb0 = tf.Variable(tf.cast(np.random.randn(n_emb, rank), 'float32')) def reader(context_inputs, context_ouputs): context_embs = tf.gather(emb0, context_inputs[:, :, 1]) # preds = tf.reshape(tf.matmul( # tf.reshape(context_embs, (n_data * n_features, rank)), # tf.reshape(emb0[0, :], [rank, 1])), # (n_data, n_features)) # residues = tf.tile(tf.reshape(preds - yc, (n_data, n_features, 1)), [1, 1, rank]) # embs_after_reading = tf.tile(tf.reshape(emb0[0, :], (1, rank)), (n_data, 1)) \ # + tf.reduce_mean(context_embs * residues, 1) * step_size # step_size = tf.Variable(tf.cast(1.0, 'float32'), trainable=True) yc_rep = tf.tile(tf.reshape(context_ouputs, (n_data, n_features, 1)), (1, 1, rank)) embs_after_reading = 0 * tf.tile(tf.reshape(emb0[0, :], (1, rank)), (n_data, 1)) \ + tf.reduce_sum(context_embs * yc_rep, 1) # * step_size return embs_after_reading def answerer(embeddings, question): embs_after_reading_mat = tf.tile(tf.reshape(embeddings, [n_data, 1, 1, rank]), [1, n_labels, 1, 1]) fixed_embs = tf.reshape(tf.gather(emb0, question[:, :, 1]), [n_data, n_labels, 1, rank]) emb1_question = tf.concat(2, [fixed_embs, embs_after_reading_mat]) return tf.reduce_sum(tf.reduce_prod(emb1_question, 2), 2) def loss(pred, gold): return tf.nn.l2_loss(pred - gold) objective = loss(answerer(reader(qc, yc), q), y) hooks = [lambda it, e, xy, f: it and ((it % 10) == 0) and print("%d) loss=%f" % (it, f))] emb_val, = learn(objective, sampler, hooks=hooks, max_epochs=2000, variables=[emb0]) mat_est = x_mat.dot(emb_val[1:n_features + 1, :]).dot(emb_val[n_features + 1:, :].T) if verbose: print(0.5 * np.linalg.norm(mat_est - y_mat) ** 2) # we should have recovered the low-rank matrix else: assert (np.linalg.norm(mat_est - y_mat) < 1e-3)
def test_larcqy(verbose=False): # factorization of the parameters with multiple linear regressions # input_types = {'index', 'features'} input_types = {'index'} input_types = {'features'} n1, n2, d1, d2, rank_gold = 7, 6, 5, 4, 3 # random data generation np.random.seed(1) emb_noise, noise = 1, 0 batch_size = n1 * n2 t = lambda x: np.round(x, 1) data_emb1 = t(np.random.randn(n1, rank_gold) * emb_noise) data_emb2 = t(np.random.randn(n2, rank_gold) * emb_noise) feat_emb1 = t(np.random.randn(d1, rank_gold) * emb_noise) feat_emb2 = t(np.random.randn(d2, rank_gold) * emb_noise) x1_mat = data_emb1.dot(feat_emb1.T) x2_mat = data_emb2.dot(feat_emb2.T) a1 = x1_mat.dot(feat_emb1) a2 = x2_mat.dot(feat_emb2) y_mat = a1.dot(a2.T) + np.random.randn(n1, n2) * noise # data stuff data = [] n_ents = 2 for i in range(n1): for j in range(n2): inputs = [] if 'features' in input_types: inputs += [((0, k + 2), x1_mat[i, k]) for k in range(d1)] \ + [((1, k + 2 + d1), x2_mat[j, k]) for k in range(d2)] n_ents = d1 + d2 + 2 if 'index' in input_types: inputs += [((0, d1 + d2 + 2 + i), 1.0), ((1, d1 + d2 + 2 + n1 + j), 1.0)] n_ents = d1 + d2 + n1 + n2 + 2 outputs = [((0, 1), y_mat[i, j])] data.append((inputs, outputs)) data_arr = vectorize_samples(data) batches = data_to_batches(data_arr, batch_size, dtypes=[np.int64, np.float32, np.int64, np.float32]) if False: qc = tf.placeholder(np.int64, (batch_size, n_ents, 2), name='question_in_context') yc = tf.placeholder(np.float32, (batch_size, n_ents), name='answer_in_context') q = tf.placeholder(np.int64, (batch_size, 1, 2), name='question') y = tf.placeholder(np.float32, (batch_size, 1), name='answer') sampler = placeholder_feeder((qc, yc, q, y), batches) sampler = [x for x in sampler] else: qc0, yc0, q0, y0 = [x for x in batches][0] # print(qc0, yc0, q0, y0) qc = tf.Variable(qc0) yc = tf.Variable(yc0) q = tf.Variable(q0) y = tf.Variable(y0) # model definition rank = min(rank_gold, max(min(n1, n2), min(d1, d2))) print(rank) if False: emb0_val = np.concatenate((np.zeros((2, rank)), feat_emb1, feat_emb2)) emb0_val += np.random.randn(n_ents, rank) * 0.1 # emb0_val = np.round(emb0_val, 1) else: emb0_val = np.random.randn(n_ents, rank) emb0 = tf.constant(emb0_val.tolist(), dtype=np.float32, shape=(n_ents, rank)) loss = total_loss_quadratic # reading step emb1 = reader(emb0=emb0, context=(qc, yc), n_slots=2) # answering step objective = loss(answerer(emb1, q), y) # + 1e-6 * tf.nn.l2_loss(emb0) # tf_debug_gradient(emb0, objective, verbose=False) # This creates new variables... # train the model optimizer = tf.train.AdamOptimizer(learning_rate=0.1) hooks = [lambda it, e, xy, f: it and ((it % 100) == 0) and print("%d) loss=%f" % (it, f[0]))] # hooks += [OperatorHook(tf_show(grads[0]), sampler[0][1])] params = learn(objective, optimizer=optimizer, hooks=hooks, max_epochs=300, variables=[emb0]) # mat_est = model.numeric_eval(params, x_mat) # if verbose: # print(0.5 * np.linalg.norm(mat_est - y_mat) ** 2) # we should have recovered the low-rank matrix print(params[-1]) pass