def adverse_generate2(gen_model, ad_model, cmodel, train, word_index, glove, threshold = 0.95, batch_size = 64, ci = False): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) results = [] for i, train_index in mb: if len(train_index) != batch_size: continue orig_batch = [train[k] for k in train_index] class_indices = [load_data.LABEL_LIST.index(train[k][2]) for k in train_index] probs = generation.generation_predict_embed(gen_model, word_index.index, orig_batch, np.random.random_integers(0, len(train), len(orig_batch)), class_indices = class_indices) gen_batch = generation.get_classes(probs) ad_preds = ad_model.predict_on_batch(gen_batch)[0].flatten() X = [] for i in range(len(orig_batch)): concat = orig_batch[i][0] + ["--"] + word_index.get_seq(gen_batch[i]) X.append(load_data.load_word_vecs(concat, glove)) X = np.array(X) X_padded = load_data.pad_sequences(X, dim = len(X[0][0])) cpreds = cmodel.predict_on_batch(X_padded)[0][np.arange(len(X_padded)), class_indices] pred_seq = [word_index.print_seq(gen) for gen in gen_batch] premises = [" ".join(ex[0]) for ex in orig_batch] classes = np.array(load_data.LABEL_LIST)[class_indices] zipped = zip(cpreds, ad_preds, premises, pred_seq, classes) results += [el for el in zipped if el[0] * el[1]> threshold] p.add(len(train_index),[('added', float(len([el for el in zipped if el[0] * el[1]> threshold])))]) if len(results) > 200: print (i + 1) * batch_size return results return results
def predict_example(premise, hypothesis, model, glove): concat = premise.split() + ["--"] + hypothesis.split() for word in concat: if word not in glove: print word, 'not in glove' return vec = load_data.load_word_vecs(concat, glove) return model.predict_on_batch(np.expand_dims(vec, axis=0))
import paraphrase import numpy as np import itertools import os if __name__ == "__main__": train, dev, test = load_data.load_all_snli_datasets('data/snli_1.0/') glove = load_data.import_glove('data/snli_vectors.txt') for ex in train+dev: load_data.load_word_vecs(ex[0] + ex[1], glove) load_data.load_word_vec('EOS', glove) wi = load_data.WordIndex(glove) def grid_experiments(train, dev, glove, embed_size = 300, hidden_size = 100): lr_vec = [0.001, 0.0003, 0.0001] dropout_vec = [0.0, 0.1, 0.2] reg_vec = [0.0, 0.001, 0.0003, 0.0001] for params in itertools.product(lr_vec, dropout_vec, reg_vec): filename = 'lr' + str(params[0]).replace('.','') + '_drop' + str(params[1]).replace('.','') + '_reg' + str(params[2]).replace('.','') print 'Model', filename model = models.init_model(embed_size, hidden_size, params[0], params[1], params[2]) models.train_model(train, dev, glove, model, 'models/' + filename)