def _eval_test_set(sess, model, test_buckets): # Evaluate on the test set for bucket_id in range(len(settings.BUCKETS)): if len(test_buckets[bucket_id]) == 0: print("\nEmpty test bucket {}".format(settings.BUCKETS[bucket_id])) continue # Run forward only on test batch encoder_inputs, decoder_inputs, decoder_masks = util.get_batch( test_buckets[bucket_id], bucket_id, batch_size=settings.BATCH_SIZE) #settings.BATCH_SIZE) _, step_loss, logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) print("\nEVALUATING ON TEST SET") print("\n{} | test bucket {}; test loss {}\n".format( time.strftime("%c"), settings.BUCKETS[bucket_id], step_loss)) # Print random example of Q/A example = random.choice(range(settings.BATCH_SIZE)) question = [ encoder_inputs[char][example] for char in range(len(encoder_inputs)) ] print_encoder(question, model.encoder_to_words) print_decoder(logits, model.decoder_to_words, example)
def fetch_batch_and_train(sents, docs, tags, model, seq_len, i, p1, p2): (tm_costs, tm_words, lm_costs, lm_words) = p1 (m_tm_cost, m_tm_train, m_lm_cost, m_lm_train) = p2 x, y, m, d, t = get_batch(sents, docs, tags, i, cf.doc_len, seq_len, cf.tag_len, cf.batch_size, 0, \ (True if isinstance(model, LM) else False)) if isinstance(model, LM): if cf.topic_number > 0: tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \ {model.x: x, model.y: y, model.lm_mask: m, model.doc: d, model.tag: t}) else: #pure lstm tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \ {model.x: x, model.y: y, model.lm_mask: m}) else: tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \ {model.y: y, model.tm_mask: m, model.doc: d, model.tag: t}) if tm_cost != None: tm_costs += tm_cost * cf.batch_size #keep track of full batch loss (not per example batch loss) tm_words += np.sum(m) if lm_cost != None: lm_costs += lm_cost * cf.batch_size lm_words += np.sum(m) return tm_costs, tm_words, lm_costs, lm_words
def fit(self, x_train, num_epochs=1, print_every=0): """ Method to train GAN. Parameters ---------- print_every : int Print loss information every |print_every| number of batches. If 0 prints nothing. """ num_batches = x_train.shape[0] / self.model.batch_size print("num batches {}".format(num_batches)) for epoch in range(num_epochs): print("\nEpoch {}".format(epoch + 1)) for batch in range(num_batches): x_batch = get_batch(x_train, self.model.batch_size) self.train_discriminator() self.train_discriminator(x_batch) self.train_gan() if print_every and batch % print_every == 0: print("GAN loss {} \t D loss {} \t Entropy {}".format( self.g_loss_history[-1], self.d_loss_history[-1], self.ent_loss_history[-1]))
def log(config, data, patterns, word2idx_dict, model, sess, label="train", entropy=None): golds, preds, vals, sim_preds, sim_vals = [], [], [], [], [] for batch in get_batch(config, data, word2idx_dict): gold, pred, val, sim_pred, sim_val = sess.run([model.gold, model.pred, model.max_val, model.sim_pred, model.sim_max_val], feed_dict=get_feeddict(model, batch, patterns, is_train=False)) golds += gold.tolist() preds += pred.tolist() vals += val.tolist() sim_preds += sim_pred.tolist() sim_vals += sim_val.tolist() threshold = [0.01 * i for i in range(1, 200)] acc, recall, f1 = 0., 0., 0. best_entro = 0. if entropy is None: for t in threshold: _preds = (np.asarray(vals, dtype=np.float32) <= t).astype(np.int32) * np.asarray(preds, dtype=np.int32) _preds = _preds.tolist() _acc, _recall, _f1 = evaluate(golds, _preds) if _f1 > f1: acc, recall, f1 = _acc, _recall, _f1 best_entro = t else: preds = (np.asarray(vals, dtype=np.float32) <= entropy).astype(np.int32) * np.asarray(preds, dtype=np.int32) preds = preds.tolist() acc, recall, f1 = evaluate(golds, preds) return (acc, recall, f1), best_entro
def train(config, data): word2idx_dict, word_emb, train_data, dev_data, test_data = data patterns = get_patterns(config, word2idx_dict) with tf.variable_scope("models"): if config.dataset == "tacred": import tacred_constant as constant else: import semeval_constant as constant regex = Pat_Match(config, constant.LABEL_TO_ID) match = Soft_Match(config, word_mat=word_emb, word2idx_dict=word2idx_dict) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True labeled_data = [] unlabeled_data = [] for x in train_data: batch = [x["tokens"]] res, pred = regex.match(batch) patterns["weights"] += res[0] if np.amax(res) > 0: x["rel"] = pred.tolist()[0] x["pat"] = np.argmax(res, axis=1).tolist()[0] labeled_data.append(x) else: x["rel"] = 0 unlabeled_data.append(x) patterns["weights"] = patterns["weights"] / np.sum(patterns["weights"]) random.shuffle(unlabeled_data) print("{} labeled data".format(len(labeled_data))) dev_history, test_history = [], [] with tf.Session(config=sess_config) as sess: lr = float(config.init_lr) sess.run(tf.global_variables_initializer()) for epoch in tqdm(range(1, config.num_epoch + 1), desc="Epoch"): for batch1, batch2 in zip(get_batch(config, labeled_data, word2idx_dict), get_batch(config, unlabeled_data, word2idx_dict, pseudo=True)): batch = merge_batch(batch1, batch2) loss, _ = sess.run([match.loss, match.train_op], feed_dict=get_feeddict(match, batch, patterns)) (dev_acc, dev_rec, dev_f1), best_entro = log(config, dev_data, patterns, word2idx_dict, match, sess, "dev") (test_acc, test_rec, test_f1), _ = log( config, test_data, patterns, word2idx_dict, match, sess, "test", entropy=best_entro) dev_history.append((dev_acc, dev_rec, dev_f1)) test_history.append((test_acc, test_rec, test_f1)) if len(dev_history) >= 1 and dev_f1 <= dev_history[-1][2]: lr *= config.lr_decay sess.run(tf.assign(match.lr, lr)) max_idx = dev_history.index(max(dev_history, key=lambda x: x[2])) max_acc, max_rec, max_f1 = test_history[max_idx] print("acc: {}, rec: {}, f1: {}".format(max_acc, max_rec, max_f1)) sys.stdout.flush() return max_acc, max_rec, max_f1
def train_model( model, dataloader, place_cells, hd_cells, num_epochs=10, lr=1e-5, momentum=0.9, weight_decay=1e-5, clip=1e-5, ): """Train model using CrossEntropy and RMSProp as in paper""" hdloss = CrossEntropyLoss().spec("hdcell") placeloss = CrossEntropyLoss().spec("placecell") params = decay_params(model, ["head", "place", "g"], weight_decay) optimizer = torch.optim.RMSprop(params, lr=lr, momentum=momentum) losses = [] tq = tqdm_notebook if in_ipynb() else tqdm for k in range(num_epochs): model.train() epoch_losses = [] for i, traj in enumerate(tq(dataloader)): cs, hs, ego_vel, c0, h0 = get_batch(traj, place_cells, hd_cells) optimizer.zero_grad() zs, ys, _ = model(ego_vel, c0, h0) loss = hdloss(zs, hs) + placeloss(ys, cs) epoch_losses.append(loss.item()) loss.backward() # torch.nn.utils.clip_grad_value_(model.head.parameters(), clip) # torch.nn.utils.clip_grad_value_(model.place.parameters(), clip) torch.nn.utils.clip_grad_value_(model.parameters(), clip) optimizer.step() if (i + 1) % 1000 == 0 or i + 1 == len(dataloader): # Output and visualize progress each epoch print( f"epoch {k}, mean loss {np.mean(epoch_losses)}, std loss {np.std(epoch_losses)}" ) visualize_g(model, dataloader, place_cells, hd_cells) model.train() break if i > 1000 * num_epochs: return epoch_losses losses += epoch_losses return losses
def run_epoch(sents, docs, tags, p1, pad_id, cf, idxvocab): (tm, lm) = p1 #generate the batches tm_num_batches, lm_num_batches = int(math.ceil(float(len(sents[0]))/cf.batch_size)), \ int(math.ceil(float(len(sents[1]))/cf.batch_size)) #run an epoch to compute tm and lm perplexities if tm != None: tm_costs, tm_words = 0.0, 0.0 for bi in range(tm_num_batches): _, y, m, d, t = get_batch(sents[0], docs[0], tags, bi, cf.doc_len, cf.tm_sent_len, cf.tag_len, cf.batch_size, \ pad_id, False) tm_cost = sess.run(tm.tm_cost, { tm.y: y, tm.tm_mask: m, tm.doc: d, tm.tag: t }) tm_costs += tm_cost * cf.batch_size tm_words += np.sum(m) print("\ntest topic model perplexity = %.3f" % (np.exp(tm_costs / tm_words))) if lm != None: lm_costs, lm_words = 0.0, 0.0 for bi in range(lm_num_batches): x, y, m, d, t = get_batch(sents[1], docs[1], tags, bi, cf.doc_len, cf.lm_sent_len, cf.tag_len, cf.batch_size, \ pad_id, True) lm_cost, tw = sess.run([lm.lm_cost, lm.tm_weights], { lm.x: x, lm.y: y, lm.lm_mask: m, lm.doc: d, lm.tag: t }) lm_costs += lm_cost * cf.batch_size lm_words += np.sum(m) print("test language model perplexity = %.3f" % (np.exp(lm_costs / lm_words)))
def _train_epoch(args, epoch, model, train_data, corpus, device, lr, criterion): total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) model.train() for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(args.bptt, train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Log to tensorboard info = { f'training/{epoch}loss/loss': cur_loss, f'training/{epoch}/loss_exp': math.exp(cur_loss), 'training/lr': lr, } for tag, value in info.items(): inject_summary(summary_writer, tag, value, i) summary_writer.flush()
def evaluate(args, data_source, model, corpus, criterion): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(args.bptt, data_source, i) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() hidden = repackage_hidden(hidden) return total_loss / (len(data_source) - 1)
def visualize_g(model, test_iter, place_cells, hd_cells, offset=0, limit=50): """Visualize 25 cells in G layer of model (applied to output of LSTM)""" model.eval() G, P = None, None c = 0 # Get batches up to limit as samples for traj in test_iter: cs, hs, ego_vel, c0, h0, xs = get_batch(traj, place_cells, hd_cells, pos=True) if c > limit: break zs, gs, ys = model(ego_vel, c0, h0) if G is None: G = gs.cpu() P = xs.cpu() else: G = ntorch.cat((G, gs.cpu()), "batch") P = ntorch.cat((P, xs.cpu()), "batch") del ego_vel, cs, xs, hs, zs, ys, gs, h0, c0 torch.cuda.empty_cache() c += 1 pts = P.stack(("t", "batch"), "pts") G = G.stack(("t", "batch"), "pts") xs, ys = [pts.get("ax", i).values.detach().numpy() for i in [0, 1]] # Plot 5x5 grid of cell activations, starting at offset axs = plt.subplots(5, 5, figsize=(50, 50))[1] axs = axs.flatten() for i, ax in enumerate(axs): acts = G.get("placecell", offset + i).values.detach().numpy() res = stats.binned_statistic_2d(xs, ys, acts, bins=20, statistic="mean")[0] ax.imshow(res, cmap="jet") ax.axis("off") plt.show()
def simulator(args, emb_array, labels, max_compare_num, filepath, threshold): # Initialize fa = 0 # False accept wa = 0 # Wrong answer fr = 0 # False reject accept = 0 reject = 0 # Construct database database = Database(emb_array.shape[0], max_compare_num) # Simulating for indx, emb in enumerate(emb_array): test_array, test_label = util.get_batch(emb_array, labels, indx) if len(database) != 0: # train_array is not empty max_id, max_similarity = database.get_most_similar(test_array) # Not intruder if threshold < max_similarity: accept += 1 if not database.contains(test_label): fa += 1 # False accept elif test_label != database.get_label_by_id(max_id): wa += 1 # Recognition error # Intruder else: reject += 1 if database.contains(test_label): fr += 1 # False reject # Add to database database.insert(test_label, test_array) #database.print_database() # Calculate error result_file = util.show_and_save_v3(fa, fr, wa, accept, reject, max_compare_num, filepath) return result_file
idxvocab = [] #constants pad_symbol = "<pad>" start_symbol = "<go>" end_symbol = "<eos>" unk_symbol = "<unk>" dummy_symbols = [pad_symbol, start_symbol, end_symbol, unk_symbol] ########### #functions# ########### def fetch_batch_and_train(sents, docs, tags, model, seq_len, i, (tm_costs, tm_words, lm_costs, lm_words), \ (m_tm_cost, m_tm_train, m_lm_cost, m_lm_train)): x, y, m, d, t = get_batch(sents, docs, tags, i, cf.doc_len, seq_len, cf.tag_len, cf.batch_size, 0, \ (True if isinstance(model, LM) else False)) if isinstance(model, LM): if cf.topic_number > 0: tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \ {model.x: x, model.y: y, model.lm_mask: m, model.doc: d, model.tag: t}) else: #pure lstm tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \ {model.x: x, model.y: y, model.lm_mask: m}) else: tm_cost, _, lm_cost, _ = sess.run([m_tm_cost, m_tm_train, m_lm_cost, m_lm_train], \ {model.y: y, model.tm_mask: m, model.doc: d, model.tag: t}) if tm_cost != None: tm_costs += tm_cost * cf.batch_size #keep track of full batch loss (not per example batch loss)
def main(_): opts = Options(save_path=FLAGS.save_path, train_biom=FLAGS.train_biom, test_biom=FLAGS.test_biom, train_metadata=FLAGS.train_metadata, test_metadata=FLAGS.test_metadata, formula=FLAGS.formula, tree=FLAGS.tree, learning_rate=FLAGS.learning_rate, clipping_size=FLAGS.clipping_size, beta_mean=FLAGS.beta_mean, beta_scale=FLAGS.beta_scale, gamma_mean=FLAGS.gamma_mean, gamma_scale=FLAGS.gamma_scale, epochs_to_train=FLAGS.epochs_to_train, num_neg_samples=FLAGS.num_neg_samples, batch_size=FLAGS.batch_size, min_sample_count=FLAGS.min_sample_count, min_feature_count=FLAGS.min_feature_count, statistics_interval=FLAGS.statistics_interval, summary_interval=FLAGS.summary_interval, checkpoint_interval=FLAGS.checkpoint_interval) # preprocessing train_table, train_metadata = opts.train_table, opts.train_metadata train_metadata = train_metadata.loc[train_table.ids(axis='sample')] sample_filter = lambda val, id_, md: ( (id_ in train_metadata.index) and np.sum(val) > opts.min_sample_count) read_filter = lambda val, id_, md: np.sum(val) > opts.min_feature_count metadata_filter = lambda val, id_, md: id_ in train_metadata.index train_table = train_table.filter(metadata_filter, axis='sample') train_table = train_table.filter(sample_filter, axis='sample') train_table = train_table.filter(read_filter, axis='observation') train_metadata = train_metadata.loc[train_table.ids(axis='sample')] sort_f = lambda xs: [xs[train_metadata.index.get_loc(x)] for x in xs] train_table = train_table.sort(sort_f=sort_f, axis='sample') train_metadata = dmatrix(opts.formula, train_metadata, return_type='dataframe') tree = opts.tree train_table, tree = match_tips(train_table, tree) basis, _ = sparse_balance_basis(tree) basis = basis.T # hold out data preprocessing test_table, test_metadata = opts.test_table, opts.test_metadata metadata_filter = lambda val, id_, md: id_ in test_metadata.index obs_lookup = set(train_table.ids(axis='observation')) feat_filter = lambda val, id_, md: id_ in obs_lookup test_table = test_table.filter(metadata_filter, axis='sample') test_table = test_table.filter(feat_filter, axis='observation') test_metadata = test_metadata.loc[test_table.ids(axis='sample')] sort_f = lambda xs: [xs[test_metadata.index.get_loc(x)] for x in xs] test_table = test_table.sort(sort_f=sort_f, axis='sample') test_metadata = dmatrix(opts.formula, test_metadata, return_type='dataframe') test_table, tree = match_tips(test_table, tree) p = train_metadata.shape[1] # number of covariates G_data = train_metadata.values y_data = train_table.matrix_data.tocoo().T y_test = np.array(test_table.matrix_data.todense()).T N, D = y_data.shape save_path = opts.save_path learning_rate = opts.learning_rate batch_size = opts.batch_size gamma_mean, gamma_scale = opts.gamma_mean, opts.gamma_scale beta_mean, beta_scale = opts.beta_mean, opts.beta_scale num_neg = opts.num_neg_samples clipping_size = opts.clipping_size epoch = y_data.nnz // batch_size num_iter = int(opts.epochs_to_train * epoch) holdout_size = test_metadata.shape[0] checkpoint_interval = opts.checkpoint_interval # Model code with tf.Graph().as_default(), tf.Session() as session: with tf.device("/cpu:0"): # Place holder variables to accept input data Gpos_ph = tf.placeholder(tf.float32, [batch_size, p], name='G_pos') Gneg_ph = tf.placeholder(tf.float32, [num_neg, p], name='G_neg') G_holdout = tf.placeholder(tf.float32, [holdout_size, p], name='G_holdout') Y_holdout = tf.placeholder(tf.float32, [holdout_size, D], name='Y_holdout') Y_ph = tf.placeholder(tf.float32, [batch_size], name='Y_ph') pos_row = tf.placeholder(tf.int32, shape=[batch_size], name='pos_row') pos_col = tf.placeholder(tf.int32, shape=[batch_size], name='pos_col') neg_row = tf.placeholder(tf.int32, shape=[num_neg], name='neg_row') neg_col = tf.placeholder(tf.int32, shape=[num_neg], name='neg_col') neg_data = tf.zeros(shape=[num_neg], name='neg_data', dtype=tf.float32) total_zero = tf.constant(y_data.shape[0] * y_data.shape[1] - y_data.nnz, dtype=tf.float32) total_nonzero = tf.constant(y_data.nnz, dtype=tf.float32) # Define PointMass Variables first qgamma = tf.Variable(tf.random_normal([1, D - 1]), name='qgamma') qbeta = tf.Variable(tf.random_normal([p, D - 1]), name='qB') theta = tf.Variable(tf.random_normal([N, 1]), name='theta') # Distributions species bias gamma = Normal(loc=tf.zeros([1, D - 1]) + gamma_mean, scale=tf.ones([1, D - 1]) * gamma_scale, name='gamma') # regression coefficents distribution beta = Normal(loc=tf.zeros([p, D - 1]) + beta_mean, scale=tf.ones([p, D - 1]) * beta_scale, name='B') Bprime = tf.concat([qgamma, qbeta], axis=0) # Add bias terms for samples Gpos = tf.concat([tf.ones([batch_size, 1]), Gpos_ph], axis=1) Gneg = tf.concat([tf.ones([num_neg, 1]), Gneg_ph], axis=1) # Convert basis to SparseTensor psi = tf.SparseTensor(indices=np.mat([basis.row, basis.col]).transpose(), values=basis.data, dense_shape=basis.shape) V = tf.transpose( tf.sparse_tensor_dense_matmul(psi, tf.transpose(Bprime))) # sparse matrix multiplication for positive samples pos_prime = tf.reduce_sum(tf.multiply( Gpos, tf.transpose(tf.gather(V, pos_col, axis=1))), axis=1) pos_phi = tf.reshape(tf.gather(theta, pos_row), shape=[batch_size]) + pos_prime Y = Poisson(log_rate=pos_phi, name='Y') # sparse matrix multiplication for negative samples neg_prime = tf.reduce_sum(tf.multiply( Gneg, tf.transpose(tf.gather(V, neg_col, axis=1))), axis=1) neg_phi = tf.reshape(tf.gather(theta, neg_row), shape=[num_neg]) + neg_prime neg_poisson = Poisson(log_rate=neg_phi, name='neg_counts') loss = -( tf.reduce_sum(gamma.log_prob(qgamma)) + \ tf.reduce_sum(beta.log_prob(qbeta)) + \ tf.reduce_sum(Y.log_prob(Y_ph)) * (total_nonzero / batch_size) + \ tf.reduce_sum(neg_poisson.log_prob(neg_data)) * (total_zero / num_neg) ) optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.9) gradients, variables = zip(*optimizer.compute_gradients(loss)) gradients, _ = tf.clip_by_global_norm(gradients, clipping_size) train = optimizer.apply_gradients(zip(gradients, variables)) with tf.name_scope('accuracy'): holdout_count = tf.reduce_sum(Y_holdout, axis=1) spred = tf.nn.softmax( tf.transpose( tf.sparse_tensor_dense_matmul( psi, tf.transpose( (tf.matmul(G_holdout, qbeta) + qgamma))))) pred = tf.reshape(holdout_count, [-1, 1]) * spred mse = tf.reduce_mean(tf.squeeze(tf.abs(pred - Y_holdout))) tf.summary.scalar('mean_absolute_error', mse) tf.summary.scalar('loss', loss) tf.summary.histogram('qbeta', qbeta) tf.summary.histogram('qgamma', qgamma) tf.summary.histogram('theta', theta) merged = tf.summary.merge_all() tf.global_variables_initializer().run() writer = tf.summary.FileWriter(save_path, session.graph) losses = np.array([0.] * num_iter) idx = np.arange(train_metadata.shape[0]) log_handle = open(os.path.join(save_path, 'run.log'), 'w') gen = get_batch(batch_size, N, D, y_data.data, y_data.row, y_data.col, num_neg=num_neg) start_time = time.time() last_checkpoint_time = 0 start_time = time.time() saver = tf.train.Saver() for i in range(num_iter): batch_idx = np.random.choice(idx, size=batch_size) batch = next(gen) (positive_row, positive_col, positive_data, negative_row, negative_col, negative_data) = batch feed_dict = { Y_ph: positive_data, Y_holdout: y_test.astype(np.float32), G_holdout: test_metadata.values.astype(np.float32), Gpos_ph: G_data[positive_row, :], Gneg_ph: G_data[negative_row, :], pos_row: positive_row, pos_col: positive_col, neg_row: negative_row, neg_col: negative_col } if i % 1000 == 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, summary, train_loss, grads = session.run( [train, merged, loss, gradients], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) writer.add_run_metadata(run_metadata, 'step%d' % i) writer.add_summary(summary, i) elif i % 5000 == 0: _, summary, err, train_loss, grads = session.run( [train, mse, merged, loss, gradients], feed_dict=feed_dict) writer.add_summary(summary, i) else: _, summary, train_loss, grads = session.run( [train, merged, loss, gradients], feed_dict=feed_dict) writer.add_summary(summary, i) now = time.time() if now - last_checkpoint_time > checkpoint_interval: saver.save(session, os.path.join(opts.save_path, "model.ckpt"), global_step=i) last_checkpoint_time = now losses[i] = train_loss elapsed_time = time.time() - start_time print('Elapsed Time: %f seconds' % elapsed_time) # Cross validation pred_beta = qbeta.eval() pred_gamma = qgamma.eval() mse, mrc = cross_validation(test_metadata.values, pred_beta @ basis.T, pred_gamma @ basis.T, y_test) print("MSE: %f, MRC: %f" % (mse, mrc))
def chat(to_file=False): # Takes user input and responds with the trained model # Init model model = models.ChatbotModel(forward_only=True, batch_size=1) model.build_graph() # Init checkpoint saver saver = tf.train.Saver() sess = tf.InteractiveSession() # More flexible with ipynb format print("Running session") sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) if to_file: output_file = open( os.path.join( settings.GENERATED_PATH.format(str(round(time.time())))), 'a+') output_file.write("=" * 120) output_file.write("{}".format(time.strftime("%c"))) max_length = settings.BUCKETS[-1][0] print("=" * 120) print(""" _____ _ _ / __ \ | | (_) | / \/ ___ _ ____ _____ _ __ ___ __ _| |_ _ ___ _ __ | | / _ \| '_ \ \ / / _ \ '__/ __|/ _` | __| |/ _ \| '_ \ | \__/\ (_) | | | \ V / __/ | \__ \ (_| | |_| | (_) | | | | \____/\___/|_| |_|\_/ \___|_| |___/\__,_|\__|_|\___/|_| |_| """) print("=" * 120) print('Welcome to Conversation.') print("Type up to {} chars to start, ENTER to exit.".format(max_length)) while True: line = _get_user_input() if len(line) <= 0 or line == "": break # Tokens for input sentence tokens = util.embed(model.words_to_encoder, line) if (len(tokens) > max_length): print( "System message: Maximum input length for this model is {}, please try again." .format(max_length)) line = _get_user_input() continue bucket_id = _find_right_bucket_length( len(tokens)) # Which bucket for this input length? # Form the input sentence into a one element batch to feed the model encoder_inputs, decoder_inputs, decoder_masks = util.get_batch( [(tokens, [])], bucket_id, batch_size=1) # Get outputs of model _, _, logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) print_decoder(logits, model.decoder_to_words) if to_file: output_file.write("Q | " + line) output_file.write("A >> " + response) if to_file: output_file.write("=" * 120) output_file.close()
def train(messages_only=False): # Trains the chatbot model defined above with the data processed above print(""" _____ _ _ |_ _| (_) (_) | |_ __ __ _ _ _ __ _ _ __ __ _ | | '__/ _` | | '_ \| | '_ \ / _` | | | | | (_| | | | | | | | | | (_| | \_/_| \__,_|_|_| |_|_|_| |_|\__, | __/ | |___/ """) # Load data test_buckets, train_buckets, train_buckets_scale = _get_buckets( messages_only) # Init model model = models.ChatbotModel(forward_only=False, batch_size=settings.BATCH_SIZE) model.build_graph() # Init checkpoint saver saver = tf.train.Saver(max_to_keep=100) sess = tf.InteractiveSession() # More flexible with ipynb format print("\nRunning session") sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) iteration = model.global_step.eval() total_loss = 0 print("\nStarting training at {}\n".format(time.strftime('%c'))) for _ in range(settings.MAX_ITER): bucket_id = _get_random_bucket(train_buckets_scale) encoder_inputs, decoder_inputs, decoder_masks = util.get_batch( train_buckets[bucket_id], bucket_id, batch_size=settings.BATCH_SIZE) ###### ###### Kept having errors with below line of type # InvalidArgumentError (see above for traceback): indices[61] = 42998 is not in [0, 42996) ###### SO added a try excpet wrapper so it didn't break but needs fixing ###### TO-DO: fix sizing error in embedding call un training loop call to run_step ~ line 217 ###### It seems to come from decoding outputs back to embeddings that are too large -poss due tp ###### my new evaluation print out parts ###### try: _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False) total_loss += step_loss iteration += 1 except: print( "Error in training step []run_step()], continuing from next step" ) if iteration % settings.PRINT_EVERY == 0: # Print over period of iterations to reduce noise by averaging print("{} | Iteration {}; Loss {};".format( time.strftime('%c'), iteration, float(total_loss) / settings.PRINT_EVERY)) total_loss = 0 if iteration % settings.SAVE_EVERY == 0: saved_path = saver.save(sess, os.path.join( settings.CKPT_PATH, 'chatbot-ckpt-{}'.format( str(round(time.time())))), global_step=model.global_step) print("\nModel saved to {}".format(saved_path)) if iteration % settings.EVAL_EVERY == 0: # run evaluation on development set and print their loss _eval_test_set(sess, model, test_buckets) sys.stdout.flush()
print "Topic", ti, "=", attention[si][ti] docid += 1 np.save(open(output_file, "w"), dt_dist) def run_epoch(sents, docs, tags, (tm, lm), pad_id, cf, idxvocab): #generate the batches tm_num_batches, lm_num_batches = int(math.ceil(float(len(sents[0]))/cf.batch_size)), \ int(math.ceil(float(len(sents[1]))/cf.batch_size)) #run an epoch to compute tm and lm perplexities if tm != None: tm_costs, tm_words = 0.0, 0.0 for bi in xrange(tm_num_batches): _, y, m, d, t = get_batch(sents[0], docs[0], tags, bi, cf.doc_len, cf.tm_sent_len, cf.tag_len, cf.batch_size, \ pad_id, False) tm_cost = sess.run(tm.tm_cost, { tm.y: y, tm.tm_mask: m, tm.doc: d, tm.tag: t }) tm_costs += tm_cost * cf.batch_size tm_words += np.sum(m) print "\ntest topic model perplexity = %.3f" % (np.exp( tm_costs / tm_words)) if lm != None: lm_costs, lm_words = 0.0, 0.0 for bi in xrange(lm_num_batches): x, y, m, d, t = get_batch(sents[1], docs[1], tags, bi, cf.doc_len, cf.lm_sent_len, cf.tag_len, cf.batch_size, \
def train(model, dataset, optimizer, dest_dir, batch_size=128, max_epoch=None, gpu=None, save_every=5, test_every=5, alpha_init=1., alpha_delta=0., l2_weight_gen=0., l2_weight_con=0.): """Common training procedure. :param model: model to train :param dataset: training & validation data :param optimizer: chainer optimizer :param dest_dir: destination directory :param batch_size: number of sample in minibatch :param max_epoch: maximum number of epochs to train (None to train indefinitely) :param gpu: ID of GPU (None to use CPU) :param save_every: save every this number of epochs (first epoch and last epoch are always saved) :param alpha_init: initial value of alpha :param alpha_delta: change of alpha at every batch """ if gpu is not None: # set up GPU cuda.get_device(gpu).use() model.to_gpu(gpu) logger = logging.getLogger() # set up optimizer opt_enc = util.list2optimizer(optimizer) opt_enc.setup(model.encoder) if hasattr(model, 'controller'): opt_con = util.list2optimizer(optimizer) opt_con.setup(model.controller) opt_gen = util.list2optimizer(optimizer) opt_gen.setup(model.generator) # training loop epoch = 0 alpha = alpha_init test_losses = [] train_losses = [] train_data = dataset["train_data"] test_data = dataset["valid_data"] split = 'test' while True: if max_epoch is not None and epoch >= max_epoch: # terminate training break # Every ten epochs, try validation set if split == 'train': x_data, _ = util.get_batch(train_data, batch_size=batch_size) else: x_data, _ = util.get_batch(test_data, batch_size=batch_size) # create batches x_data = x_data.astype(np.float32) # copy data to GPU if gpu is not None: x_data = cuda.to_gpu(x_data) # create variable xs = [] [xs.append(Variable(x.astype(np.float32))) for x in x_data] # set new alpha alpha += alpha_delta alpha = min(alpha, 1.) alpha = max(alpha, 0.) time_start = time.time() # encoder _, h_bxtxd = model.encoder(xs) h_bxtxd = F.stack(h_bxtxd, 0) d_dims = h_bxtxd.data.shape[2] # generator g0_bxd, kl_g0 = model.generator.sample_g0( F.concat( [h_bxtxd[:, 0, -d_dims / 2:], h_bxtxd[:, -1, :d_dims / 2]], axis=1)) f0_bxd = model.generator.l_f(g0_bxd) # main x_hat = [] rec_loss_total = 0 if hasattr(model, 'controller'): kl_u_total = 0 for i in range(0, h_bxtxd[0].data.shape[0]): if i == 0: if hasattr(model, 'controller'): con_i = model.controller( F.concat((f0_bxd, h_bxtxd[:, i, :d_dims / 2], h_bxtxd[:, i, d_dims / 2:]), axis=1)) u_i_bxd, kl_u = model.generator.sample_u_1(con_i) kl_u_total += kl_u g_i_bxd = model.generator(u_i_bxd, hx=g0_bxd) else: g_i_bxd = model.generator(F.concat( (h_bxtxd[:, i, :d_dims / 2], h_bxtxd[:, i, d_dims / 2:]), axis=1), hx=g0_bxd) else: if hasattr(model, 'controller'): con_i = model.controller(F.concat([ f_i, h_bxtxd[:, i, :d_dims / 2], h_bxtxd[:, i, d_dims / 2:] ], axis=1), hx=con_i) u_i_bxd, kl_u = model.generator.sample_u_i(con_i, u_i_bxd) kl_u_total += kl_u g_i_bxd = model.generator(u_i_bxd, hx=g_i_bxd) else: g_i_bxd = model.generator(F.concat([ h_bxtxd[:, i, :d_dims / 2], h_bxtxd[:, i, d_dims / 2:] ], axis=1), hx=g_i_bxd) f_i = model.generator.l_f(g_i_bxd) x_hat_i, rec_loss_i = model.generator.sample_x_hat( f_i, xs=Variable(x_data[:, i, :]), nrep=1) x_hat.append(x_hat_i) rec_loss_total += rec_loss_i # calculate loss if hasattr(model, 'controller'): loss = rec_loss_total + alpha * (kl_g0 + kl_u_total) else: loss = rec_loss_total + alpha * kl_g0 l2_loss = 0 if l2_weight_gen > 0: l2_W_gen = F.sum(F.square(model.generator.gru.W.W)) l2_W_r_gen = F.sum(F.square(model.generator.gru.W_r.W)) l2_W_z_gen = F.sum(F.square(model.generator.gru.W_z.W)) l2_gen = l2_weight_gen * (l2_W_gen + l2_W_r_gen + l2_W_z_gen) l2_loss += l2_gen if hasattr(model, 'controller') and l2_weight_con > 0: l2_W_con = F.sum(F.square(model.controller.gru.W.W)) l2_W_r_con = F.sum(F.square(model.controller.gru.W_r.W)) l2_W_z_con = F.sum(F.square(model.controller.gru.W_z.W)) l2_con = l2_weight_con * (l2_W_con + l2_W_r_con + l2_W_z_con) l2_loss += l2_con loss += l2_loss # update if split == 'train': model.cleargrads() model.encoder.cleargrads() if hasattr(model, 'controller'): model.controller.cleargrads() model.generator.cleargrads() loss.backward() opt_enc.update() if hasattr(model, 'controller'): opt_con.update() opt_gen.update() # report training status time_end = time.time() time_delta = time_end - time_start # report training status status = OrderedDict() status['epoch'] = epoch status['time'] = int(time_delta * 1000) # time in msec status['alpha'] = alpha status[split + '_loss'] = '{:.4}'.format(float( loss.data)) # total training loss status[split + '_rec_loss'] = '{:.4}'.format(float( rec_loss_total.data)) # reconstruction loss status[split + '_kl_g0'] = '{:.4}'.format(float( kl_g0.data)) # KL-divergence loss for g0 if hasattr(model, 'controller'): status[split + '_kl_u_total'] = '{:.4}'.format( float(kl_u_total.data)) # KL-divergence loss for us if l2_weight_con > 0: status[split + '_l2_loss_con'] = '{:.4}'.format( float(l2_con.data)) # L2 loss for controller if l2_weight_gen > 0: status[split + '_l2_loss_gen'] = '{:.4}'.format( float(l2_gen.data)) # L2 loss for generator logger.info(_status_str(status)) # # save model if ((epoch % save_every) == 0 or (max_epoch is not None and epoch == max_epoch - 1)) and split == 'train': model.save(dest_dir, epoch) if split == 'train' and epoch % test_every == 0: split = 'test' else: split = 'train' epoch += 1
#Y_test = Y_test[:1000] batch_size = 16 D_out = 10 model = nn.LeNet5() losses = [] optim = optimizer.SGD(model.get_params(), lr=0.00003) #optim = optimizer.SGDMomentum(model.get_params(), lr=0.00003, momentum=0.80, reg=0.0003) criterion = loss.SoftmaxLoss() # Train ITER = 30000 for i in range(ITER): # get batch, make onehot X_batch, Y_batch = util.get_batch(X_train, Y_train, batch_size) Y_batch = util.MakeOneHot(Y_batch, D_out) # forward, loss, backward, step Y_pred = model.forward(X_batch) loss, dout = criterion.get(Y_pred, Y_batch) model.backward(dout) optim.step() print("%s%% iter: %s, loss: %s" % (100 * i / ITER, i, loss)) losses.append(loss) """ if i % 100 == 0: print("%s%% iter: %s, loss: %s" % (100*i/ITER,i, loss)) losses.append(loss) """
# Merge all summaries into a single operator merged_summary_op = tf.summary.merge_all() sess = tf.Session() sess.run(init) # Set the logs writer to the folder /tmp/tensorflow_logs summary_writer = tf.summary.FileWriter(BASE_DIR + '/logs', graph_def=sess.graph_def) batch_size = 100 number_of_batches = number_of_samples / batch_size for epoch in range(20): for i in range(number_of_batches): batch_x, batch_y = util.get_batch(train_set, batch_size, i) batch_x_blury, _ = util.get_batch(blurry_set, batch_size, i) batch_x_cropped, _ = util.get_batch(cropped_set, batch_size, i) train_data = { x: batch_x, label: batch_y, x_blury: batch_x_blury, x_cropped: batch_x_cropped } sess.run(train_step, feed_dict={ x: batch_x, label: batch_y, x_blury: batch_x_blury, x_cropped: batch_x_cropped, pkeep: 0.75
def train(): batch_size = 10 print "Starting ABC-CNN training" vqa = dl.load_questions_answers('data') # Create subset of data for over-fitting sub_vqa = {} sub_vqa['training'] = vqa['training'][:10] sub_vqa['validation'] = vqa['validation'][:10] sub_vqa['answer_vocab'] = vqa['answer_vocab'] sub_vqa['question_vocab'] = vqa['question_vocab'] sub_vqa['max_question_length'] = vqa['max_question_length'] train_size = len(vqa['training']) max_itr = (train_size // batch_size) * 10 with tf.Session() as sess: image, ques, ans, optimizer, loss, accuracy = abc.model( sess, batch_size) print "Defined ABC model" train_loader = util.get_batch(sess, vqa, batch_size, 'training') print "Created train dataset generator" valid_loader = util.get_batch(sess, vqa, batch_size, 'validation') print "Created validation dataset generator" writer = abc.write_tensorboard(sess) init = tf.global_variables_initializer() merged = tf.summary.merge_all() sess.run(init) print "Initialized Tensor variables" itr = 1 while itr < max_itr: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, vgg_batch, ques_batch, answer_batch = train_loader.next() _, valid_vgg_batch, valid_ques_batch, valid_answer_batch = valid_loader.next( ) sess.run(optimizer, feed_dict={ image: vgg_batch, ques: ques_batch, ans: answer_batch }) [train_summary, train_loss, train_accuracy] = sess.run([merged, loss, accuracy], feed_dict={ image: vgg_batch, ques: ques_batch, ans: answer_batch }, options=run_options, run_metadata=run_metadata) [valid_loss, valid_accuracy] = sess.run( [loss, accuracy], feed_dict={ image: valid_vgg_batch, ques: valid_ques_batch, ans: valid_answer_batch }) writer.add_run_metadata(run_metadata, 'step%03d' % itr) writer.add_summary(train_summary, itr) writer.flush() print "Iteration:%d\tTraining Loss:%f\tTraining Accuracy:%f\tValidation Loss:%f\tValidation Accuracy:%f" % ( itr, train_loss, 100. * train_accuracy, valid_loss, 100. * valid_accuracy) itr += 1
def log(config, data, pretrain_data, word2idx_dict, model, sess, writer=None, label="train", entropy=None, bound=None): global_step = sess.run(model.global_step) + 1 golds, preds, vals, sim_preds, sim_vals = [], [], [], [], [] simss = [] for batch, _ in zip( get_batch(config, data, word2idx_dict, shuffle=False), get_pretrain_batch(config, pretrain_data, word2idx_dict, pretrain=False)): gold, pred, val, sim_pred, sim_val = sess.run([ model.gold, model.pred, model.max_val, model.sim_pred, model.sim_max_val ], feed_dict=get_feeddict( model, batch, _, is_train=False)) prt_sim = sess.run(model.sim, feed_dict=get_feeddict(model, batch, _, is_train=False)) batch_sents = batch['raw_sent'] golds += gold.tolist() preds += pred.tolist() vals += val.tolist() sim_preds += sim_pred.tolist() sim_vals += sim_val.tolist() threshold = [0.01 * i for i in range(1, 200)] threshold2 = [0.05 * i for i in range(1, 20)] acc, recall, f1, jac = 0., 0., 0., 0. acc2, recall2, f12, jac2 = 0., 0., 0., 0. best_entro = 0. best_bound = 0. if entropy is None: for t in threshold: _preds = (np.asarray(vals, dtype=np.float32) <= t).astype( np.int32) * np.asarray(preds, dtype=np.int32) _preds = _preds.tolist() _acc, _recall, _f1, _jac = evaluate(golds, _preds) if _f1 > f1: acc, recall, f1, jac = _acc, _recall, _f1, _jac best_entro = t else: preds = (np.asarray(vals, dtype=np.float32) <= entropy).astype( np.int32) * np.asarray(preds, dtype=np.int32) preds = preds.tolist() acc, recall, f1, jac = evaluate(golds, preds) if bound is None: for t in threshold2: _sim_preds = (np.asarray(sim_vals, dtype=np.float32) >= t).astype( np.int32) * np.asarray(sim_preds, dtype=np.int32) _sim_preds = _sim_preds.tolist() _acc2, _recall2, _f12, _jac2 = evaluate(golds, _sim_preds) if _f12 > f12: acc2, recall2, f12, jac2 = _acc2, _recall2, _f12, _jac2 best_bound = t else: sim_preds = (np.asarray(sim_vals, dtype=np.float32) >= bound).astype( np.int32) * np.asarray(sim_preds, dtype=np.int32) sim_preds = sim_preds.tolist() acc2, recall2, f12, jac2 = evaluate(golds, sim_preds) acc_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/acc".format(label), simple_value=acc), ]) rec_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/rec".format(label), simple_value=recall), ]) f1_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/f1".format(label), simple_value=f1), ]) jac_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/jac".format(label), simple_value=jac), ]) acc_sum2 = tf.Summary(value=[ tf.Summary.Value(tag="{}/sim_acc".format(label), simple_value=acc2), ]) rec_sum2 = tf.Summary(value=[ tf.Summary.Value(tag="{}/sim_rec".format(label), simple_value=recall2), ]) f1_sum2 = tf.Summary(value=[ tf.Summary.Value(tag="{}/sim_f1".format(label), simple_value=f12), ]) jac_sum2 = tf.Summary(value=[ tf.Summary.Value(tag="{}/sim_jac".format(label), simple_value=jac2), ]) entropy_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/entro".format(label), simple_value=sum(vals) / len(vals)), ]) if writer is not None: writer.add_summary(acc_sum, global_step) writer.add_summary(rec_sum, global_step) writer.add_summary(f1_sum, global_step) writer.add_summary(jac_sum, global_step) writer.add_summary(acc_sum2, global_step) writer.add_summary(rec_sum2, global_step) writer.add_summary(f1_sum2, global_step) writer.add_summary(jac_sum2, global_step) writer.add_summary(entropy_sum, global_step) res = [golds, preds] return (acc, recall, f1), (acc2, recall2, f12), (best_entro, best_bound), res
start = global_step.eval() step = 0 r = ReadData.Actionreader() ckpt_dir = "/home/cxr/BvhLstm1-2" filename = "/home/cxr/7-2" if Need_to_restore: if restore(ckpt_dir+"/"): print "restore_seccessfully" if not Use_to_train: r.reset() v,timelist=utl.readData(filename) length = len(v) i=0 step = 0 batch_xs, batch_ys = utl.get_batch(v, i, length, classnumber=classnum, batchsize=batch_size, n_sequence=n_sequence) print len(batch_xs) while batch_xs and step<=2000: pre = sess.run([predic], feed_dict={ x: batch_xs, }) r.out_data(pre[0],ckpt_dir) #batch_xs = batch_xs[0] #batch_xs = batch_xs[1:] #batch_xs.append(utl.transform(pre,classnum)) #batch_xs = [batch_xs] batch_xs, batch_ys = utl.get_batch(v, i, length, classnumber=classnum, batchsize=batch_size, n_sequence=n_sequence) #print batch_xs
def train(): t1 = time.time() tf.reset_default_graph() with tf.variable_scope(name_or_scope='train', reuse=tf.AUTO_REUSE): cls, (x, y), (w0, w1, w2, w3, w4) = gm.result() loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=cls, name='loss') loss_mean = tf.reduce_mean(loss, name='loss_mean') global_step = tf.Variable(0, name='global_step') learning_rate = tf.train.exponential_decay(constant.LEARNING_RATE, global_step, 1000, 0.96, staircase=True, name='learning_rate') optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='optimizer') train_op = optimizer.minimize(loss_mean, global_step=global_step, name='train_op') data_train = util.load_data(constant.DATA_TRAIN) data_test = util.load_data(constant.DATA_TEST) graph = tf.get_default_graph() # var_list = [i for i in tf.global_variables() if i.name.split('/')[1] == 'result'] # saver = tf.train.Saver(var_list=var_list, max_to_keep=5) # [print(i) for i in tf.global_variables()] # [print(i.name) for i in graph.get_operations()] saver = tf.train.Saver(max_to_keep=5) with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) idx_train = np.linspace(0, constant.TRAIN_TOTAL_SIZE - 1, constant.TRAIN_TOTAL_SIZE, dtype=np.int32) step = 0 accuracies_train = [] accuracies_test = [] losses = [] ws = (w0, w1, w2, w3, w4) wa = WeightAdjust() wa.init(len(ws)) for i in range(constant.EPOCH): np.random.shuffle(idx_train) for j in range(constant.TRAIN_TIMES_FOR_EPOCH): idx_j = np.linspace(j * constant.BATCH_SIZE, (j + 1) * constant.BATCH_SIZE - 1, constant.BATCH_SIZE, dtype=np.int32) idx_train_batch = idx_train[idx_j] _, labels_train, _, images_train = util.get_batch( data_train, idx_train_batch) feed_dict_train = {x: images_train, y: labels_train} cls_train, _loss, _ = sess.run([cls, loss_mean, train_op], feed_dict=feed_dict_train) arg_idx_train = np.argmax(cls_train, axis=1) accuracy_train = sum( labels_train == arg_idx_train) / constant.BATCH_SIZE # test idx_test_batch = np.random.randint(0, constant.TEST_TOTAL_SIZE, [constant.BATCH_SIZE]) _, labels_test, _, images_test = util.get_batch( data_test, idx_test_batch) feed_dict_test = {x: images_test, y: labels_test} cls_test = sess.run(cls, feed_dict=feed_dict_test) arg_idx_test = np.argmax(cls_test, axis=1) accuracy_test = sum( labels_test == arg_idx_test) / constant.BATCH_SIZE step += 1 if step % constant.PRINT_EVERY_TIMES == 0: print( 'time:{},epoch:{},loss:{},accuracy_train:{:.2%},accuracy_test:{:.2%}' .format(util.cur_time(), step, _loss, accuracy_train, accuracy_test)) accuracies_train.append(accuracy_train) accuracies_test.append(accuracy_test) losses.append(_loss) times = int(constant.TRAIN_TIMES_FOR_EPOCH / constant.PRINT_EVERY_TIMES) train_mean = util.mean(accuracies_train[-times:]) test_mean = util.mean(accuracies_test[-times:]) print('save model,step: {},train_mean:{},test_mean:{}'.format( step, train_mean, test_mean)) saver.save(sess, save_path='./model/resnet/cifar-resnet.ckpt', global_step=step) wa.adjust(train_mean, test_mean, step) print(wa.action) if wa.action == 'adjust': print('本次迭代权重经过调整:{}'.format(wa.weights)) assigns = gm.assign_weight(wa, ws) sess.run(assigns) elif wa.action == 'stop': break else: pass accuracy_map = { 'accuracies_train': accuracies_train, 'accuracies_test': accuracies_test, 'losses': losses, 'weights': wa } util.dump_data(accuracy_map, './accuracy_map.pkl') t2 = time.time() print('耗时:{}'.format(util.str_time(t2 - t1)))
def pseudo_labeling(config, data): word2idx_dict, fixed_emb, traiable_emb, train_data, dev_data, test_data, pretrain_data, pretrain_data2 = data pretrain_test_data = (pretrain_data[0][:config.pretrain_test_size], pretrain_data[1][:config.pretrain_test_size], pretrain_data[2][:config.pretrain_test_size, :]) pretrain_data = ( pretrain_data[0][config.pretrain_test_size:config.pretrain_test_size + config.pretrain_train_size], pretrain_data[1][config.pretrain_test_size:config.pretrain_test_size + config.pretrain_train_size], pretrain_data[2][config.pretrain_test_size:config.pretrain_test_size + config.pretrain_train_size, :]) lfs = get_lfs(config, word2idx_dict) identifier = "_{}".format(config.tag) with tf.variable_scope("models", reuse=tf.AUTO_REUSE): regex = Pat_Match(config) match = Soft_Match(config, lfs['lfs'], np.array(lfs['rels'], np.float32), lfs['keywords'], lfs['keywords_rels'], lfs['raw_keywords'], mat=(( fixed_emb, traiable_emb, )), word2idx_dict=word2idx_dict, pseudo=True) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True if os.path.exists('labeled_data.pkl'): with open('labeled_data.pkl', 'rb') as f: labeled_data = pickle.load(f) with open('unlabeled_data.pkl', 'rb') as f: unlabeled_data = pickle.load(f) with open('weights.pkl', 'rb') as f: lfs["weights"] = pickle.load(f) else: with open('exp2pat.json', 'r') as f: exp2pat = json.load(f) exp2pat = {int(key): val for key, val in exp2pat.items()} lab_d = [] unlab_d = [] tacred_labeled = [] tacred_unlabeled = [] labeled_data = [] unlabeled_data = [] idxx = -1 idx2rel = {val: key for key, val in constant.LABEL_TO_ID.items()} for x in tqdm(train_data): idxx += 1 batch = [x["phrase"]] res, pred = regex.match(batch) lfs["weights"] += res[0] new_dict = {} if np.amax(res) > 0: x["rel"] = pred.tolist()[0] x["logic_form"] = np.argmax(res, axis=1).tolist()[0] new_dict['tokens'] = x['phrase'].token new_dict['start'] = min(x['phrase'].subj_posi, x['phrase'].obj_posi) + 1 new_dict['end'] = max(x['phrase'].subj_posi, x['phrase'].obj_posi) - 1 new_dict['rel'] = pred.tolist()[0] try: new_dict['pat'] = exp2pat[np.argmax(res, axis=1).tolist()[0]] lab_d.append(new_dict) except: new_dict['pat'] = -1 unlab_d.append(new_dict) tacred_labeled.append((idxx, idx2rel[x['rel']])) labeled_data.append(x) else: tacred_unlabeled.append(idxx) new_dict['tokens'] = x['phrase'].token new_dict['start'] = min(x['phrase'].subj_posi, x['phrase'].obj_posi) + 1 new_dict['end'] = max(x['phrase'].subj_posi, x['phrase'].obj_posi) - 1 new_dict['rel'] = pred.tolist()[0] new_dict['pat'] = -1 x["rel"] = 0 unlab_d.append(new_dict) unlabeled_data.append(x) new_weight = np.array([ elem for i, elem in enumerate(list(lfs['weights'])) if i in exp2pat ], np.float32) new_weight = new_weight / np.sum(new_weight) lfs["weights"] = lfs["weights"] / np.sum(lfs["weights"]) with open('tacred_labeled.json', 'w') as f: json.dump(tacred_labeled, f) with open('tacred_unlabeled.json', 'w') as f: json.dump(tacred_unlabeled, f) with open('labeled_data.pkl', 'wb') as f: pickle.dump(labeled_data, f) with open('unlabeled_data.pkl', 'wb') as f: pickle.dump(unlabeled_data, f) with open('weights.pkl', 'wb') as f: pickle.dump(lfs["weights"], f) with open('lab_d.pkl', 'wb') as f: pickle.dump(lab_d, f) with open('unlab_d.pkl', 'wb') as f: pickle.dump(unlab_d, f) with open('weights_d.pkl', 'wb') as f: pickle.dump(new_weight, f) random.shuffle(unlabeled_data) print('unlabdel data:', str(len(unlabeled_data)), 'labeled data:', str(len(labeled_data))) dev_history, test_history = [], [] dev_history2, test_history2 = [], [] with tf.Session(config=sess_config) as sess: lr = float(config.init_lr) writer = tf.summary.FileWriter(config.log_dir + identifier) sess.run(tf.global_variables_initializer()) print('---Pretrain-----') for epoch in range(config.pretrain_epoch): loss_list, pretrain_loss_lis, sim_loss_lis = [], [], [] for batch in get_pretrain_batch(config, pretrain_data, word2idx_dict): pretrain_loss_prt, sim_loss_prt, loss, _ = sess.run( [ match.pretrain_loss, match.sim_loss, match.pretrain_loss_v2, match.pre_train_op ], feed_dict={ match.pretrain_sents: batch['sents'], match.pretrain_pats: batch['pats'], match.pretrain_labels: batch['labels'], match.is_train: True }) loss_list.append(loss) pretrain_loss_lis.append(pretrain_loss_prt) sim_loss_lis.append(sim_loss_prt) print("{} epoch:".format(str(epoch))) print("loss:{} pretrain_loss:{} sim_loss:{}".format( str(np.mean(loss_list)), str(np.mean(pretrain_loss_lis)), str(np.mean(sim_loss_lis)))) pred_labels = [] goldens = [] prt_id = 0 for batch in get_pretrain_batch(config, pretrain_data2, word2idx_dict, shuffle=False): prt_id += 1 pp, ppp, pred_label = sess.run( [ match.prt_loss, match.prt_pred, match.pretrain_pred_labels ], feed_dict={ match.pretrain_sents: batch['sents'], match.pretrain_pats: batch['pats'], match.is_train: False, match.pretrain_labels: batch['labels'] }) pred_label = list(pred_label) golden = list(np.reshape(batch['labels'], [-1])) assert len(golden) == len(pred_label) pred_labels.extend(pred_label) goldens.extend(golden) p, r, f = f_score(pred_labels, goldens) print('PRF:', (p, r, f)) if p > 0.9 and r > 0.9: break print('\n') print('----Training----') for epoch in range(1, config.num_epoch + 1): pretrain_loss_lis,sim_loss_lis, labeled_loss_lis, unlabeled_loss_lis, hard_train_loss_lis, loss_lis = [],[],[],[],[],[] for batch1, batch2, batch3 in zip( get_batch(config, labeled_data, word2idx_dict), get_batch(config, unlabeled_data, word2idx_dict, pseudo=True), get_pretrain_batch(config, pretrain_data, word2idx_dict, pretrain=False)): batch = merge_batch(batch1, batch2) global_step = sess.run(match.global_step) + 1 pretrain_loss, sim_loss, labeled_loss, unlabeled_loss, hard_train_loss, loss, _ = sess.run( [ match.pretrain_loss, match.sim_loss, match.labeled_loss, match.unlabeled_loss, match.hard_train_loss, match.loss, match.train_op ], feed_dict=get_feeddict(match, batch, batch3)) pretrain_loss_lis.append(pretrain_loss) sim_loss_lis.append(sim_loss) labeled_loss_lis.append(labeled_loss) unlabeled_loss_lis.append(unlabeled_loss) hard_train_loss_lis.append(hard_train_loss) loss_lis.append(loss) if global_step % config.period == 0: loss_sum = tf.Summary(value=[ tf.Summary.Value(tag="model/loss", simple_value=loss), ]) writer.add_summary(loss_sum, global_step) writer.flush() (dev_acc, dev_rec, dev_f1), (dev_acc2, dev_rec2, dev_f12), (best_entro, best_bound), _ = log( config, dev_data, pretrain_data, word2idx_dict, match, sess, writer, "dev") (test_acc, test_rec, test_f1), (test_acc2, test_rec2, test_f12), _, _ = log(config, test_data, pretrain_data, word2idx_dict, match, sess, writer, "test", entropy=best_entro, bound=best_bound) writer.flush() print('\n') print("{} epoch:".format(str(epoch))) print( "pretrain_loss:{} sim_loss:{} labeled_loss:{} unlabeled_loss:{} hard_train_loss:{} loss:{} best_bound:{}:" .format(str(np.mean(pretrain_loss_lis)), str(np.mean(sim_loss_lis)), str(np.mean(labeled_loss_lis)), str(np.mean(unlabeled_loss_lis)), str(np.mean(hard_train_loss_lis)), str(np.mean(loss_lis)), str(best_bound))) print( "dev_acc:{} dev_rec:{} dev_f1:{} dev_acc_2:{} dev_rec_2:{} dev_f1_2:{}\ntest_acc:{} test_rec:{} test_f1:{} test_acc_2:{} test_rec_2:{} test_f1_2:{}" .format(str(dev_acc), str(dev_rec), str(dev_f1), str(dev_acc2), str(dev_rec2), str(dev_f12), str(test_acc), str(test_rec), str(test_f1), str(test_acc2), str(test_rec2), str(test_f12))) dev_history.append((dev_acc, dev_rec, dev_f1)) test_history.append((test_acc, test_rec, test_f1)) dev_history2.append((dev_acc2, dev_rec2, dev_f12)) test_history2.append((test_acc2, test_rec2, test_f12)) if len(dev_history) >= 1 and dev_f1 <= dev_history[-1][2]: lr *= config.lr_decay sess.run(tf.assign(match.lr, lr)) max_idx = dev_history.index(max(dev_history, key=lambda x: x[2])) max_idx2 = dev_history2.index(max(dev_history2, key=lambda x: x[2])) max_acc, max_rec, max_f1 = test_history[max_idx] max_acc2, max_rec2, max_f12 = test_history2[max_idx2] print("acc: {}, rec: {}, f1: {}, acc2 {}, rec2 {}, f12 {}".format( max_acc, max_rec, max_f1, max_acc2, max_rec2, max_f12)) sys.stdout.flush() return max_acc, max_rec, max_f1, max_acc2, max_rec2, max_f12
def run(self, data_x, data_x_, hidden_dim, activation, loss, lr, print_step, epoch, batch_size=100): tf.reset_default_graph() input_dim = len(data_x[0]) with tf.Session() as sess: x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x') x_ = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x_') encode = { 'weights': tf.Variable(tf.truncated_normal([input_dim, hidden_dim], dtype=tf.float32), name='weight'), 'biases': tf.Variable(tf.truncated_normal([hidden_dim], dtype=tf.float32), name='bias') } decode = { 'biases': tf.Variable(tf.truncated_normal([input_dim], dtype=tf.float32)), 'weights': tf.transpose(encode['weights']) } encoded = self.activate( tf.matmul(x, encode['weights']) + encode['biases'], activation) decoded = tf.matmul(encoded, decode['weights']) + decode['biases'] # reconstruction loss if loss == 'rmse': loss = tf.sqrt( tf.reduce_mean(tf.square(tf.subtract(x_, decoded)))) elif loss == 'cross-entropy': eps = 1e-10 # loss = -tf.reduce_mean(x_ * tf.log(decoded + eps)) loss = tf.reduce_mean(-1 * x_ * tf.log(decoded + eps) - 1 * (1 - x_) * tf.log(1 - decoded + eps)) elif loss == 'l1': loss = tf.reduce_mean(tf.abs(tf.subtract(x_, decoded))) elif loss == 'l2': loss = tf.sqrt( tf.reduce_mean(tf.square(tf.subtract(x_, decoded)))) train_op = tf.train.AdamOptimizer(lr).minimize(loss) sess.run(tf.global_variables_initializer()) for i in range(epoch): b_x, b_x_ = util.get_batch(data_x, data_x_, batch_size) sess.run(train_op, feed_dict={x: b_x, x_: b_x_}) if (i + 1) % print_step == 0: l = sess.run(loss, feed_dict={x: data_x, x_: data_x_}) print('epoch {0}: SAE pretraining loss = {1}'.format(i, l)) self.weights.append(sess.run(encode['weights'])) self.biases.append(sess.run(encode['biases'])) return sess.run(encoded, feed_dict={x: data_x_})
epochs = 20000 with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) start = global_step.eval() ckpt_dir = "/home/cxr/BvhLstm1-2" filename = "/home/cxr/7-2" for epoch in range(epochs): print "tarining Epochs = ", epoch r = ReadData.Actionreader() v, _ = utl.readData(filename) length = len(v) i = 0 step = 0 batch_xs, batch_ys = utl.get_batch(i, v, sequenceLength, batch_size) while batch_xs != None and batch_ys != None: _, losss = sess.run( [train_op, loss], feed_dict={ encoder_inputs_raw: batch_xs, decoder_targets_raw: batch_ys, decoder_inputs_raw: batch_ys, }) if step % 20 == 0: print losss if step % 200 == 0: if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) global_step.assign(step).eval()