def train(): classification_type = params_set["classification_type"] init_embedding = [] if classification_type == "multiclass": print("Performing multiclass classification on AGNews Dataset") x_text, y = load_data("data/ag_news_csv/train.csv") x_test1, y_test = load_data("data/ag_news_csv/test.csv") x_train1, x_dev1, y_train, y_dev = train_test_split(x_text, y, test_size=0.1) vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(model_params['max_sentence_length']) x_train = np.array(list(vocab_processor.fit_transform(x_train1))) x_dev = np.array(list(vocab_processor.transform(x_dev1))) x_test = np.array(list(vocab_processor.transform(x_test1))) print("Text Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train Data - X: " + str(x_train.shape) + " Labels: " + str(y_train.shape)) print("Dev Data - X: " + str(x_dev.shape) + " Labels: " + str(y_dev.shape)) vocab_dictionary = vocab_processor.vocabulary_._mapping word_to_id = sorted(vocab_dictionary.items(), key=lambda x: x[1]) # w2v = word2vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True) # init_embedding = np.random.uniform(-1.0, 1.0, (len(vocab_processor.vocabulary_), params_set["embedding_dim"])) # for word, word_idx in sorted_vocab: # if word in w2v: # init_embedding[word_idx] = w2v[word] # print("Successfully loaded the pre-trained word2vec model!\n") del (x_train1, x_dev1, x_test1) glove_dir = "/home/raj/Desktop/Aruna/glove.6B" embeddings_index = {} f = open(os.path.join(glove_dir, 'glove.6B.300d.txt')) for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Total %s word vectors.' % len(embeddings_index)) # building Hierachical Attention network init_embedding = np.random.random((len(vocab_processor.vocabulary_) + 1, params_set["embedding_dim"])) for word, i in word_to_id: embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. init_embedding[i] = embedding_vector vocab_size = len(vocab_processor.vocabulary_) + 1 elif classification_type == "binary": print("Performing binary classification on IMDB Dataset") train_set, dev_set = imdb.load_data(num_words=model_params["vocab_size"], index_from=INDEX_FROM) x_tr, y_tr = train_set[0], train_set[1] x_d, y_d = dev_set[0], dev_set[1] word_to_id = imdb.get_word_index() word_to_id = {k: (v + INDEX_FROM) for k, v in word_to_id.items()} word_to_id["<PAD>"] = 0 word_to_id["<START>"] = 1 word_to_id["<UNK>"] = 2 # id_to_word = {value: key for key, value in word_to_id.items()} x_text = np.concatenate([x_tr, x_d]) y = np.concatenate([y_tr, y_d]) # one-hot vectors n_values = np.max(y) + 1 y = np.array(np.eye(n_values)[y], int) n_train = x_text.shape[0] - 1000 n_valid = 1000 vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(model_params['max_sentence_length']) x_tr = x_text[:n_train] x_d = x_text[n_train:n_train + n_valid] y_train = y[:n_train] y_dev = y[n_train:n_train + n_valid] x_train = pad_sequences(x_tr, maxlen=model_params['max_sentence_length']) x_dev = pad_sequences(x_d, maxlen=model_params['max_sentence_length']) del (x_tr, y_tr, x_d, y_d, y, train_set, dev_set, x_text) glove_dir = "/home/raj/Desktop/Aruna/glove.6B" embeddings_index = {} f = open(os.path.join(glove_dir, 'glove.6B.300d.txt')) for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Total %s word vectors.' % len(embeddings_index)) # building Hierachical Attention network init_embedding = np.random.random((len(word_to_id) + 1, params_set["embedding_dim"])) for word, i in word_to_id.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. init_embedding[i] = embedding_vector vocab_size = len(word_to_id) + 1 graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto() sess = tf.Session(config=session_conf) with sess.as_default(): model = StructuredSelfAttention(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=vocab_size, embedding_size=params_set["embedding_dim"], hidden_size=model_params['lstm_hidden_dimension'], d_a_size=model_params["d_a"], r_size=params_set["attention_hops"], fc_size=model_params["fc"], p_coef=params_set["C"]) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(params_set["learning_rate"]).minimize(model.loss, global_step=global_step) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "outputs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", model.loss) acc_summary = tf.summary.scalar("accuracy", model.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocabulary")) # Initialize all variables sess.run(tf.global_variables_initializer()) sess.run(model.w_embedding.assign(init_embedding)) # Generate batches & start training loop for each batch batches = batch_iterator(list(zip(x_train, y_train)), model_params["batch_size"], params_set["num_epochs"]) for batch in batches: x_batch, y_batch = zip(*batch) feed_dict = {model.input_text: x_batch, model.input_y: y_batch} _, step, summaries, loss, accuracy = sess.run([train_op, global_step, train_summary_op, model.loss, model.accuracy], feed_dict) train_summary_writer.add_summary(summaries, step) # Training progress display if step % FLAGS.display_every == 0: time_str = datetime.datetime.now() print("{}: Step {}, Loss {:g}, Acc {:g}".format(time_str, step, loss, accuracy)) # Evaluation on validation set every 1000 steps if step % FLAGS.evaluate_every == 0: print("\nEvaluation on Dev set every 1000 steps:") # Generate batches batches_dev = batch_iterator(list(zip(x_dev, y_dev)), model_params["batch_size"], 1) # Evaluation loop. For each batch... loss_dev = 0 accuracy_dev = 0 cnt = 0 for batch_dev in batches_dev: x_batch_dev, y_batch_dev = zip(*batch_dev) feed_dict_dev = {model.input_text: x_batch_dev, model.input_y: y_batch_dev} summaries_dev, loss, accuracy = sess.run( [dev_summary_op, model.loss, model.accuracy], feed_dict_dev) dev_summary_writer.add_summary(summaries_dev, step) loss_dev += loss accuracy_dev += accuracy cnt += 1 time_str = datetime.datetime.now() print("{}: Step {}, Loss {:g}, Acc {:g}".format(time_str, step, loss_dev / cnt, accuracy_dev / cnt)) # Model checkpoint if step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path)) # Evaluate trained model on test data attn_wts, acc, loss = sess.run([model.A, model.accuracy, model.loss], feed_dict={model.input_text: x_test[:test_idx], model.input_y: y_test[:test_idx]}) visualize_attention(sess, attn_wts, x_test[:test_idx], dict(word_to_id), filename='attention.html') print("Test Accuracy: {:g}".format(acc)) print("Test Loss: {:g}\n".format(loss))
def main(): setup_train_experiment(logger, FLAGS, "%(model)s_at") logger.info("Loading data...") data = mnist_load(FLAGS.train_size, FLAGS.seed) X_train, y_train = data.X_train, data.y_train X_val, y_val = data.X_val, data.y_val X_test, y_test = data.X_test, data.y_test img_shape = [None, 1, 28, 28] train_images = T.tensor4('train_images') train_labels = T.lvector('train_labels') val_images = T.tensor4('valid_labels') val_labels = T.lvector('valid_labels') layer_dims = [int(dim) for dim in FLAGS.layer_dims.split("-")] num_classes = layer_dims[-1] net = create_network(FLAGS.model, img_shape, layer_dims=layer_dims) model = with_end_points(net) train_outputs = model(train_images) val_outputs = model(val_images, deterministic=True) # losses train_ce = categorical_crossentropy(train_outputs['prob'], train_labels).mean() train_at = adversarial_training(lambda x: model(x)['prob'], train_images, train_labels, epsilon=FLAGS.epsilon).mean() train_loss = train_ce + FLAGS.lmbd * train_at val_ce = categorical_crossentropy(val_outputs['prob'], val_labels).mean() val_deepfool_images = deepfool( lambda x: model(x, deterministic=True)['logits'], val_images, val_labels, num_classes, max_iter=FLAGS.deepfool_iter, clip_dist=FLAGS.deepfool_clip, over_shoot=FLAGS.deepfool_overshoot) # metrics train_acc = categorical_accuracy(train_outputs['logits'], train_labels).mean() train_err = 1.0 - train_acc val_acc = categorical_accuracy(val_outputs['logits'], val_labels).mean() val_err = 1.0 - val_acc # deepfool robustness reduc_ind = range(1, train_images.ndim) l2_deepfool = (val_deepfool_images - val_images).norm(2, axis=reduc_ind) l2_deepfool_norm = l2_deepfool / val_images.norm(2, axis=reduc_ind) train_metrics = OrderedDict([('loss', train_loss), ('nll', train_ce), ('at', train_at), ('err', train_err)]) val_metrics = OrderedDict([('nll', val_ce), ('err', val_err)]) summary_metrics = OrderedDict([('l2', l2_deepfool.mean()), ('l2_norm', l2_deepfool_norm.mean())]) lr = theano.shared(floatX(FLAGS.initial_learning_rate), 'learning_rate') train_params = get_all_params(net, trainable=True) train_updates = adam(train_loss, train_params, lr) logger.info("Compiling theano functions...") train_fn = theano.function([train_images, train_labels], outputs=train_metrics.values(), updates=train_updates) val_fn = theano.function([val_images, val_labels], outputs=val_metrics.values()) summary_fn = theano.function([val_images, val_labels], outputs=summary_metrics.values() + [val_deepfool_images]) logger.info("Starting training...") try: samples_per_class = FLAGS.summary_samples_per_class summary_images, summary_labels = select_balanced_subset( X_val, y_val, num_classes, samples_per_class) save_path = os.path.join(FLAGS.samples_dir, 'orig.png') save_images(summary_images, save_path) epoch = 0 batch_index = 0 while epoch < FLAGS.num_epochs: epoch += 1 start_time = time.time() train_iterator = batch_iterator(X_train, y_train, FLAGS.batch_size, shuffle=True) epoch_outputs = np.zeros(len(train_fn.outputs)) for batch_index, (images, labels) in enumerate(train_iterator, batch_index + 1): batch_outputs = train_fn(images, labels) epoch_outputs += batch_outputs epoch_outputs /= X_train.shape[0] // FLAGS.batch_size logger.info( build_result_str( "Train epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time), train_metrics.keys(), epoch_outputs)) # update learning rate if epoch > FLAGS.start_learning_rate_decay: new_lr_value = lr.get_value( ) * FLAGS.learning_rate_decay_factor lr.set_value(floatX(new_lr_value)) logger.debug("learning rate was changed to {:.10f}".format( new_lr_value)) # validation start_time = time.time() val_iterator = batch_iterator(X_val, y_val, FLAGS.test_batch_size, shuffle=False) val_epoch_outputs = np.zeros(len(val_fn.outputs)) for images, labels in val_iterator: val_epoch_outputs += val_fn(images, labels) val_epoch_outputs /= X_val.shape[0] // FLAGS.test_batch_size logger.info( build_result_str( "Test epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time), val_metrics.keys(), val_epoch_outputs)) if epoch % FLAGS.summary_frequency == 0: summary = summary_fn(summary_images, summary_labels) logger.info( build_result_str( "Epoch [{}] adversarial statistics:".format(epoch), summary_metrics.keys(), summary[:-1])) save_path = os.path.join(FLAGS.samples_dir, 'epoch-%d.png' % epoch) df_images = summary[-1] save_images(df_images, save_path) if epoch % FLAGS.checkpoint_frequency == 0: save_network(net, epoch=epoch) except KeyboardInterrupt: logger.debug("Keyboard interrupt. Stopping training...") finally: save_network(net) # evaluate final model on test set test_iterator = batch_iterator(X_test, y_test, FLAGS.test_batch_size, shuffle=False) test_results = np.zeros(len(val_fn.outputs)) for images, labels in test_iterator: test_results += val_fn(images, labels) test_results /= X_test.shape[0] // FLAGS.test_batch_size logger.info( build_result_str("Final test results:", val_metrics.keys(), test_results))
def main(): setup_experiment() data = mnist_load() X_test = data.X_test y_test = data.y_test if FLAGS.sort_labels: ys_indices = np.argsort(y_test) X_test = X_test[ys_indices] y_test = y_test[ys_indices] img_shape = [None, 1, 28, 28] test_images = T.tensor4('test_images') test_labels = T.lvector('test_labels') # loaded discriminator number of classes and dims layer_dims = [int(dim) for dim in FLAGS.layer_dims.split("-")] num_classes = layer_dims[-1] # create and load discriminator net = create_network(FLAGS.model, img_shape, layer_dims=layer_dims) load_network(net, epoch=FLAGS.load_epoch) model = with_end_points(net) test_outputs = model(test_images, deterministic=True) # deepfool images test_df_images = deepfool(lambda x: model(x, deterministic=True)['logits'], test_images, test_labels, num_classes, max_iter=FLAGS.deepfool_iter, clip_dist=FLAGS.deepfool_clip, over_shoot=FLAGS.deepfool_overshoot) test_df_images_all = deepfool( lambda x: model(x, deterministic=True)['logits'], test_images, num_classes=num_classes, max_iter=FLAGS.deepfool_iter, clip_dist=FLAGS.deepfool_clip, over_shoot=FLAGS.deepfool_overshoot) test_df_outputs = model(test_df_images, deterministic=True) # fast gradient sign images test_fgsm_images = test_images + fast_gradient_perturbation( test_images, test_outputs['logits'], test_labels, FLAGS.fgsm_epsilon) test_at_outputs = model(test_fgsm_images, deterministic=True) # test metrics test_acc = categorical_accuracy(test_outputs['logits'], test_labels).mean() test_err = 1 - test_acc test_fgsm_acc = categorical_accuracy(test_at_outputs['logits'], test_labels).mean() test_fgsm_err = 1 - test_fgsm_acc test_df_acc = categorical_accuracy(test_df_outputs['logits'], test_labels).mean() test_df_err = 1 - test_df_acc # adversarial noise statistics reduc_ind = range(1, test_images.ndim) test_l2_df = T.sqrt( T.sum((test_df_images - test_images)**2, axis=reduc_ind)) test_l2_df_norm = test_l2_df / T.sqrt(T.sum(test_images**2, axis=reduc_ind)) test_l2_df_skip = test_l2_df.sum() / T.sum(test_l2_df > 0) test_l2_df_skip_norm = test_l2_df_norm.sum() / T.sum(test_l2_df_norm > 0) test_l2_df_all = T.sqrt( T.sum((test_df_images_all - test_images)**2, axis=reduc_ind)) test_l2_df_all_norm = test_l2_df_all / T.sqrt( T.sum(test_images**2, axis=reduc_ind)) test_metrics = OrderedDict([('err', test_err), ('err_fgsm', test_fgsm_err), ('err_df', test_df_err), ('l2_df', test_l2_df.mean()), ('l2_df_norm', test_l2_df_norm.mean()), ('l2_df_skip', test_l2_df_skip), ('l2_df_skip_norm', test_l2_df_skip_norm), ('l2_df_all', test_l2_df_all.mean()), ('l2_df_all_norm', test_l2_df_all_norm.mean()) ]) logger.info("Compiling theano functions...") test_fn = theano.function([test_images, test_labels], outputs=test_metrics.values()) generate_fn = theano.function([test_images, test_labels], [test_df_images, test_df_images_all], on_unused_input='ignore') logger.info("Generate samples...") samples_per_class = 10 summary_images, summary_labels = select_balanced_subset( X_test, y_test, num_classes, samples_per_class) save_path = os.path.join(FLAGS.samples_dir, 'orig.png') save_images(summary_images, save_path) df_images, df_images_all = generate_fn(summary_images, summary_labels) save_path = os.path.join(FLAGS.samples_dir, 'deepfool.png') save_images(df_images, save_path) save_path = os.path.join(FLAGS.samples_dir, 'deepfool_all.png') save_images(df_images_all, save_path) logger.info("Starting...") test_iterator = batch_iterator(X_test, y_test, FLAGS.batch_size, shuffle=False) test_results = np.zeros(len(test_fn.outputs)) start_time = time.time() for batch_index, (images, labels) in enumerate(test_iterator, 1): batch_results = test_fn(images, labels) test_results += batch_results if batch_index % FLAGS.summary_frequency == 0: df_images, df_images_all = generate_fn(images, labels) save_path = os.path.join(FLAGS.samples_dir, 'b%d-df.png' % batch_index) save_images(df_images, save_path) save_path = os.path.join(FLAGS.samples_dir, 'b%d-df_all.png' % batch_index) save_images(df_images_all, save_path) logger.info( build_result_str( "Batch [{}] adversarial statistics:".format(batch_index), test_metrics.keys(), batch_results)) test_results /= batch_index logger.info( build_result_str( "Test results [{:.2f}s]:".format(time.time() - start_time), test_metrics.keys(), test_results))