def train_model_embed(train, dev, glove, model, model_dir = 'models/curr_model', nb_epochs = 20, batch_size = 64, hs=True, ci = True): X_dev_p, X_dev_h, y_dev = load_data.prepare_split_vec_dataset(dev, glove=glove) word_index = load_data.WordIndex(glove) if not os.path.exists(model_dir): os.makedirs(model_dir) for e in range(nb_epochs): print "Epoch ", e mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: if len(train_index) != batch_size: continue X_train_p, X_train_h , y_train = load_data.prepare_split_vec_dataset([train[k] for k in train_index], word_index.index) padded_p = load_data.pad_sequences(X_train_p, maxlen = PREM_LEN, dim = -1, padding = 'pre') padded_h = load_data.pad_sequences(X_train_h, maxlen = HYPO_LEN, dim = -1, padding = 'post') data = {'premise_input': padded_p, 'embed_input': np.expand_dims(np.array(train_index), axis=1), 'output' : padded_h} if ci: data['class_input'] = y_train if hs: data['train_input'] = padded_h data['output'] = np.ones((batch_size, HYPO_LEN, 1)) #sw = (padded_h != 0).astype(float) #train_loss = float(model.train_on_batch(data, sample_weight={'output':sw})[0]) train_loss = float(model.train_on_batch(data)[0]) p.add(len(train_index),[('train_loss', train_loss)]) sys.stdout.write('\n') model.save_weights(model_dir + '/model~' + str(e))
def adversarial_generator(train, gen_model, discriminator, word_index, beam_size): batch_size, prem_len, _ = gen_model[0].inputs['premise'].input_shape examples = batch_size / beam_size hidden_size = gen_model[0].nodes['hypo_merge'].output_shape[2] hypo_len = discriminator.input_shape[1] while True: mb = load_data.get_minibatches_idx(len(train), examples, shuffle=True) for i, train_index in mb: if len(train_index) != examples: continue orig_batch = [train[k] for k in train_index] noise_input = np.random.normal(scale=0.11, size=(examples, 1, hidden_size)) class_indices = np.random.random_integers(0, 2, examples) hypo_batch, probs = generative_predict_beam(gen_model, word_index, orig_batch, noise_input, class_indices, True, hypo_len) ad_preds = discriminator.predict_on_batch(hypo_batch)[0].flatten() X_prem, _, _ = load_data.prepare_split_vec_dataset(orig_batch, word_index.index) premise_batch = load_data.pad_sequences(X_prem, maxlen = prem_len, dim = -1, padding = 'pre') yield {'premise' : premise_batch, 'hypo' : hypo_batch, 'label': class_indices, 'sanity': ad_preds, 'gen_probs' : probs}
def adverse_generate2(gen_model, ad_model, cmodel, train, word_index, glove, threshold = 0.95, batch_size = 64, ci = False): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) results = [] for i, train_index in mb: if len(train_index) != batch_size: continue orig_batch = [train[k] for k in train_index] class_indices = [load_data.LABEL_LIST.index(train[k][2]) for k in train_index] probs = generation.generation_predict_embed(gen_model, word_index.index, orig_batch, np.random.random_integers(0, len(train), len(orig_batch)), class_indices = class_indices) gen_batch = generation.get_classes(probs) ad_preds = ad_model.predict_on_batch(gen_batch)[0].flatten() X = [] for i in range(len(orig_batch)): concat = orig_batch[i][0] + ["--"] + word_index.get_seq(gen_batch[i]) X.append(load_data.load_word_vecs(concat, glove)) X = np.array(X) X_padded = load_data.pad_sequences(X, dim = len(X[0][0])) cpreds = cmodel.predict_on_batch(X_padded)[0][np.arange(len(X_padded)), class_indices] pred_seq = [word_index.print_seq(gen) for gen in gen_batch] premises = [" ".join(ex[0]) for ex in orig_batch] classes = np.array(load_data.LABEL_LIST)[class_indices] zipped = zip(cpreds, ad_preds, premises, pred_seq, classes) results += [el for el in zipped if el[0] * el[1]> threshold] p.add(len(train_index),[('added', float(len([el for el in zipped if el[0] * el[1]> threshold])))]) if len(results) > 200: print (i + 1) * batch_size return results return results
def generation_test(train, glove, model, batch_size = 64, prem_len = 22, hypo_len = 12): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: X_prem, X_hypo, _ = load_data.prepare_split_vec_dataset([train[k] for k in train_index], glove) X_p = load_data.pad_sequences(X_prem, maxlen = prem_len, dim = 50) X_h = load_data.pad_sequences(X_hypo, maxlen = hypo_len, dim = 50) train_loss = model.train_on_batch(X_p, X_h)[0] p.add(len(X_p),[('train_loss', train_loss)])
def generator(train, batch_size, split, trainable): size = split if trainable else len(train[0]) - split while True: mb = load_data.get_minibatches_idx(size, batch_size, shuffle=trainable) for _, train_index in mb: if not train: train_index += split yield [train_index], train[2][train_index]
def test_adverse(dev, ad_model, gen_model, word_index, glove, train_len, batch_size=64, ci = False): mb = load_data.get_minibatches_idx(len(dev), batch_size, shuffle=False) p = Progbar(len(dev) * 2) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None X, y = adverse_batch([dev[k] for k in train_index], word_index, gen_model, train_len, class_indices = class_indices) pred = ad_model.predict_on_batch(X)[0].flatten() loss = binary_crossentropy(y.flatten(), pred).eval() acc = sum(np.abs(y - pred) < 0.5) / float(len(y)) p.add(len(X),[('test_loss', loss), ('test_acc', acc)])
def adverse_generator(train, gen_model, noise_embed_len, word_index, batch_size = 64, prem_len = 22, hypo_len = 12): while True: mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) for i, train_index in mb: if len(train_index) != batch_size: continue orig_batch = [train[k] for k in train_index] gen_batch = make_gen_batch(orig_batch, gen_model, noise_embed_len, word_index, batch_size, prem_len, hypo_len) train_batch = make_train_batch(orig_batch, word_index, hypo_len) yield {'train_hypo' : train_batch, 'gen_hypo': gen_batch, 'output2': np.zeros((batch_size))}
def adverse_model_train(train, ad_model, gen_model, word_index, glove, nb_epochs = 20, batch_size=64, ci=False): for e in range(nb_epochs): print "Epoch ", e mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(2 * len(train)) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None X, y = adverse_batch([train[k] for k in train_index], word_index, gen_model, len(train), class_indices = class_indices) loss = ad_model.train_on_batch(X, y)[0] p.add(len(X),[('train_loss', loss)])
def test_model(model, dev, glove, batch_size = 100, return_probs = False): X_dev, y_dev = load_data.prepare_vec_dataset(dev, glove) dmb = load_data.get_minibatches_idx(len(X_dev), batch_size, shuffle=False) #dmb = load_data.get_minibatches_idx_bucketing([len(ex[0]) + len(ex[1]) for ex in dev], batch_size, shuffle=True) y_pred = np.zeros((len(y_dev), 3)) for i, dev_index in dmb: X_padded = load_data.pad_sequences(X_dev[dev_index], dim = len(X_dev[0][0])) y_pred[dev_index] = model.predict_on_batch(X_padded) acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax(y_dev, axis=1)) / float(len(y_pred)) if return_probs: return acc, y_pred else: return acc
def val_generator(dev, gen_test, beam_size, hypo_len, noise_size): batch_size = gen_test[0].input_layers[0].input_shape[0] per_batch = batch_size / beam_size while True: mb = load_data.get_minibatches_idx(len(dev[0]), per_batch, shuffle=False) for i, train_index in mb: if len(train_index) != per_batch: continue premises = dev[0][train_index] noise_input = np.random.normal(scale=0.11, size=(per_batch, 1, noise_size)) class_indices = dev[2][train_index] words, loss = generative_predict_beam(gen_test, premises, noise_input, class_indices, True, hypo_len) yield premises, words, loss, noise_input, class_indices
def generation_test(train, glove, model, batch_size=64, prem_len=22, hypo_len=12): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: X_prem, X_hypo, _ = load_data.prepare_split_vec_dataset( [train[k] for k in train_index], glove) X_p = load_data.pad_sequences(X_prem, maxlen=prem_len, dim=50) X_h = load_data.pad_sequences(X_hypo, maxlen=hypo_len, dim=50) train_loss = model.train_on_batch(X_p, X_h)[0] p.add(len(X_p), [('train_loss', train_loss)])
def adverse_model2_train(train, ad_model, gen_model, word_index, glove, nb_epochs = 20, batch_size=64, ci=False): for e in range(nb_epochs): print "Epoch ", e mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) p = Progbar(len(train)) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None train_b, gen_b, y = adverse_batch([train[k] for k in train_index], word_index, gen_model, len(train), class_indices = class_indices, separate = False) data = {'train_hypo' : train_b, 'gen_hypo': gen_b, 'output2': y} loss = ad_model.train_on_batch(data)[0] p.add(len(train_b),[('train_loss', loss)])
def adverse_generate(gen_model, ad_model, train, word_index, threshold = 0.95, batch_size = 64, ci = False): mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) results = [] for i, train_index in mb: if len(train_index) != batch_size: continue orig_batch = [train[k] for k in train_index] class_indices = [i % 3] * batch_size if ci else None probs = generation.generation_predict_embed(gen_model, word_index.index, orig_batch, np.random.random_integers(0, len(train), len(orig_batch)), class_indices = class_indices) gen_batch = generation.get_classes(probs) preds = ad_model.predict_on_batch(gen_batch)[0].flatten() zipped = zip(preds, [word_index.print_seq(gen) for gen in gen_batch]) results += [el for el in zipped if el[0] > threshold] if len(results) > 64: print (i + 1) * batch_size return results
def adverse_generator(train, gen_model, word_index, cache_prob, batch_size, hypo_len): cache = [] while True: mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) for i, train_index in mb: if len(train_index) != batch_size: continue orig_batch = [train[k] for k in train_index] if np.random.random() > cache_prob or len(cache) < 100: gen_batch, _ = make_gen_batch(orig_batch, gen_model, word_index, hypo_len) cache.append(gen_batch) else: gen_batch = cache[np.random.random_integers(0, len(cache) - 1)] train_batch = make_train_batch(orig_batch, word_index, hypo_len) yield [train_batch, gen_batch], np.zeros(batch_size)
def train_generator(train, batch_size, hypo_len, cinput, ninput, vae): while True: mb = load_data.get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for i, train_index in mb: if len(train_index) != batch_size: continue padded_p = train[0][train_index] padded_h = train[1][train_index] label = train[2][train_index] hypo_input = np.concatenate([np.zeros((batch_size, 1)), padded_h], axis = 1) train_input = np.concatenate([padded_h, np.zeros((batch_size, 1))], axis = 1) inputs = [padded_p, hypo_input] + ([train_index[:, None]] if ninput else []) + [train_input] if cinput: inputs.append(label) outputs = [np.ones((batch_size, hypo_len + 1, 1))] if vae: outputs += [np.zeros(batch_size)] yield (inputs, outputs)
def generative_train_generator(train, word_index, batch_size = 64, prem_len = 22, hypo_len = 12): while True: mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True) for i, train_index in mb: if len(train_index) != batch_size: continue X_train_p, X_train_h , y_train = load_data.prepare_split_vec_dataset([train[k] for k in train_index], word_index.index) padded_p = load_data.pad_sequences(X_train_p, maxlen = prem_len, dim = -1, padding = 'pre') padded_h = load_data.pad_sequences(X_train_h, maxlen = hypo_len, dim = -1, padding = 'post') hypo_input = np.concatenate([np.zeros((batch_size, 1)), padded_h], axis = 1) train_input = np.concatenate([padded_h, np.zeros((batch_size, 1))], axis = 1) yield {'premise_input': padded_p, 'hypo_input': hypo_input, 'train_input' : train_input, 'noise_input' : np.expand_dims(train_index, axis=1), 'class_input' : y_train, 'output': np.ones((batch_size, hypo_len + 1, 1))}
def train_generator(train, batch_size, hypo_len, cinput, ninput, vae): while True: mb = load_data.get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for i, train_index in mb: if len(train_index) != batch_size: continue padded_p = train[0][train_index] padded_h = train[1][train_index] label = train[2][train_index] hypo_input = np.concatenate([np.zeros((batch_size, 1)), padded_h], axis=1) train_input = np.concatenate( [padded_h, np.zeros((batch_size, 1))], axis=1) inputs = [padded_p, hypo_input] + ([train_index[:, None]] if ninput else []) + [train_input] if cinput: inputs.append(label) outputs = [np.ones((batch_size, hypo_len + 1, 1))] if vae: outputs += [np.zeros(batch_size)] yield (inputs, outputs)