def main(config): model_dir = os.path.join(config.model_dir, config.run_name) if config.pretrained and not tf.train.checkpoint_exists(model_dir): print('Loading pretrained base transformer...') exit() tf.train.init_from_checkpoint( config.pretrain_data_dir, {'base_transformer/': 'base_transformer/'}) run_config = tf.estimator.RunConfig(model_dir=model_dir, save_checkpoints_steps=100, save_summary_steps=10, log_step_count_steps=10) train_spec = tf.estimator.TrainSpec(input_fn=lambda: ds.load( config, os.path.join(config.record_dir, config.train_filename)).repeat( ), max_steps=config.steps_per_epoch * config.num_epochs) print("Training on %d minibatches" % (config.steps_per_epoch * config.num_epochs)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: ds.load( config, os.path.join(config.record_dir, config.val_filename)), steps=None, name='validation', start_delay_secs=config.eval_delay, throttle_secs=config.eval_throttle) estimator = tf.estimator.Estimator(model_fn=Model.model_fn, config=run_config, params=config, warm_start_from=config.warm_start_model) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def save_act_data(config): config.batch_size = 1 estimator = tf.estimator.Estimator( model_fn=model_fn, model_dir=config.model_dir, params=config ) predictions = estimator.predict(input_fn=lambda: load(config, os.path.join(config.record_dir, config.val_filename)).make_one_shot_iterator().get_next()) id2word = {v: k for k, v in config.word2id.iteritems()} id2boundary = {v: k for k, v in config.boundary2id.iteritems()} data = [] ids = [] maxdoclen = -1 for i, prediction in enumerate(predictions): print('Example %d: %s' % (i, prediction['record_id'])) ids.append(prediction['record_id']) doclen = prediction['doclen'][0] maxdoclen = doclen if doclen > maxdoclen else maxdoclen words = [id2word[w] if id2word[w] != ',' else '<COMMA>' for w in prediction['words'][:doclen]] labels = [id2boundary[w] for w in prediction['boundary_labels'][:doclen]] ponder_times = prediction['ponder_times'][:doclen] print('(Label, Word, PonderTime): %s' % '\n'.join([str(t) for t in zip(labels, words, ponder_times)])) data.append((labels, words, ponder_times)) with io.open(os.path.join(config.model_dir, config.run_name, 'visualization2.csv'), 'w+', encoding='utf-8') as f: f.write(u','.join(ids)) f.write(u'\n') for i in range(maxdoclen): for (labels, words, ponder_times) in data: print('i: %d, lbl: %d, w: %d, p: %d' % (i, len(labels), len(words), len(ponder_times))) if len(labels) > i and len(words) > i and len(ponder_times) > i: f.write(u'%s,%s,%d,,,' % (labels[i], words[i], ponder_times[i])) f.write(u'\n')
def run_model(dataset_name, emb_dim, voc_size, sen_len, hid_dim, batch_size, epochs): """Run training loop and an evaluation at the end. Args: dataset_name: Dataset name to be trained and evaluated. emb_dim: The dimension of the Embedding layer. voc_size: The number of the most frequent tokens to be used from the corpus. sen_len: The number of words in each sentence. Longer sentences get cut, shorter ones padded. hid_dim: The dimension of the Embedding layer. batch_size: The size of each batch during training. epochs: The number of the iteration over the training set for training. """ model = sentiment_model.CNN(emb_dim, voc_size, sen_len, hid_dim, dataset.get_num_class(dataset_name), _DROPOUT_RATE) model.summary() model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"]) tf.logging.info("Loading the data") x_train, y_train, x_test, y_test = dataset.load( dataset_name, voc_size, sen_len) model.fit(x_train, y_train, batch_size=batch_size, validation_split=0.4, epochs=epochs) score = model.evaluate(x_test, y_test, batch_size=batch_size) tf.logging.info("Score: {}".format(score))
def run_model(dataset_name, emb_dim, voc_size, sen_len, hid_dim, batch_size, epochs, model_save_dir): """Run training loop and an evaluation at the end. Args: dataset_name: Dataset name to be trained and evaluated. emb_dim: The dimension of the Embedding layer. voc_size: The number of the most frequent tokens to be used from the corpus. sen_len: The number of words in each sentence. Longer sentences get cut, shorter ones padded. hid_dim: The dimension of the Embedding layer. batch_size: The size of each batch during training. epochs: The number of the iteration over the training set for training. """ model = sentiment_model.CNN(emb_dim, voc_size, sen_len, hid_dim, dataset.get_num_class(dataset_name), _DROPOUT_RATE) model.summary() model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"]) tf.logging.info("Loading the data") x_train, y_train, x_test, y_test = dataset.load(dataset_name, voc_size, sen_len) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) filepath = model_save_dir + "/model-{epoch:02d}.hdf5" checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=True, mode='auto') model.fit(x_train, y_train, batch_size=batch_size, validation_split=0.4, epochs=epochs, callbacks=[checkpoint_callback]) score = model.evaluate(x_test, y_test, batch_size=batch_size) model.save(os.path.join(model_save_dir, "full-model.h5")) tf.logging.info("Score: {}".format(score))
def load_and_prepare_dataset(): training_images, training_labels, test_images, test_labels = dataset.load( "data/mnist.pkl") training_images = training_images.astype(np.float) test_images = test_images.astype(np.float) nn_training_labels = np.ndarray((len(training_labels), 10), dtype=float) nn_test_labels = np.ndarray((len(training_labels), 10), dtype=float) for i, v in enumerate(training_images): training_images[i] = np.interp(v, (0, 255), (0, 1)) for i, v in enumerate(training_labels): nn_training_labels[i] = integer_to_positional_vector(v, 10) for i, v in enumerate(test_images): test_images[i] = np.interp(v, (0, 255), (0, 1)) for i, v in enumerate(test_labels): nn_test_labels[i] = integer_to_positional_vector(v, 10) return training_images, nn_training_labels, test_images, nn_test_labels
def save_attn_data(config): import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import matplotlib as mpl mpl.rcParams.update({'figure.autolayout':True}) # plt.tight_layout() config.batch_size = 1 estimator = tf.estimator.Estimator( model_fn=model_fn, model_dir=config.model_dir, params=config ) predictions = estimator.predict(input_fn=lambda: load(config, os.path.join(config.record_dir, config.val_filename)).make_one_shot_iterator().get_next()) id2word = {v: k for k, v in config.word2id.iteritems()} outdir = os.path.join(config.model_dir, config.run_name, 'attention_plots') if not os.path.exists(outdir): os.mkdir(outdir) for i, prediction in enumerate(predictions): print('Example %d: %s' % (i, prediction['record_id'])) if not os.path.exists(os.path.join(outdir, prediction['record_id'])): os.mkdir(os.path.join(outdir, prediction['record_id'])) lens = prediction['sentence_lens'] # print(lens.shape) sentences = prediction['sentences'] # print(sentences.shape) attn_weights = prediction['attn_weights'] # print(attn_weights.shape) for s, (slen, sentence, weights) in enumerate(zip(lens, sentences, attn_weights)): # weights [4, slen, slen] if slen > 0: words = [id2word[w] for w in sentence[:slen]] columns0 = {} columns1 = {} columns2 = {} columns3 = {} for w, word in enumerate(words): weight_vector = weights[:, w, :slen] # [4, slen] columns0[word] = weight_vector[0] columns1[word] = weight_vector[1] columns2[word] = weight_vector[2] columns3[word] = weight_vector[3] # columns[word] = weight_vector[0, :slen] table = pd.DataFrame(columns0, index=words, columns=words) plotfile = os.path.join(outdir, prediction['record_id'], 'sent%d-0.png' % s) plot = sns.heatmap(table, vmin=0., xticklabels=True, yticklabels=True) fig = plot.get_figure() fig.savefig(plotfile) plt.clf() table = pd.DataFrame(columns1, index=words, columns=words) plotfile = os.path.join(outdir, prediction['record_id'], 'sent%d-1.png' % s) plot = sns.heatmap(table, vmin=0., xticklabels=True, yticklabels=True) fig = plot.get_figure() fig.savefig(plotfile) plt.clf() table = pd.DataFrame(columns2, index=words, columns=words) plotfile = os.path.join(outdir, prediction['record_id'], 'sent%d-2.png' % s) plot = sns.heatmap(table, vmin=0., xticklabels=True, yticklabels=True) fig = plot.get_figure() fig.savefig(plotfile) plt.clf() table = pd.DataFrame(columns3, index=words, columns=words) plotfile = os.path.join(outdir, prediction['record_id'], 'sent%d-3.png' % s) plot = sns.heatmap(table, vmin=0., xticklabels=True, yticklabels=True) fig = plot.get_figure() fig.savefig(plotfile) plt.clf()