示例#1
0
def train(train_loc, dev_loc, shape, settings):
    train_texts1, train_texts2, train_labels = read_snli(train_loc)
    dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)

    print("Loading spaCy")
    nlp = spacy.load("en_vectors_web_lg")
    assert nlp.path is not None
    print("Processing texts...")
    train_X = create_dataset(nlp, train_texts1, train_texts2, 100, shape[0])
    dev_X = create_dataset(nlp, dev_texts1, dev_texts2, 100, shape[0])

    print("Compiling network")
    model = build_model(get_embeddings(nlp.vocab), shape, settings)

    print(settings)
    model.fit(
        train_X,
        train_labels,
        validation_data=(dev_X, dev_labels),
        epochs=settings["nr_epoch"],
        batch_size=settings["batch_size"],
    )
    if not (nlp.path / "similarity").exists():
        (nlp.path / "similarity").mkdir()
    print("Saving to", nlp.path / "similarity")
    weights = model.get_weights()
    # remove the embedding matrix.  We can reconstruct it.
    del weights[1]
    with (nlp.path / "similarity" / "model").open("wb") as file_:
        pickle.dump(weights, file_)
    with (nlp.path / "similarity" / "config.json").open("w") as file_:
        file_.write(model.to_json())
示例#2
0
def train(train_loc, dev_loc, shape, settings):
    train_texts1, train_texts2, train_labels = read_snli(train_loc)
    dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)

    print("Loading spaCy")
    nlp = spacy.load('en')
    assert nlp.path is not None
    print("Compiling network")
    model = build_model(get_embeddings(nlp.vocab), shape, settings)
    print("Processing texts...")
    Xs = []
    for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2):
        Xs.append(get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)),
                         max_length=shape[0],
                         rnn_encode=settings['gru_encode'],
                         tree_truncate=settings['tree_truncate']))
    train_X1, train_X2, dev_X1, dev_X2 = Xs
    print(settings)
    model.fit(
        [train_X1, train_X2],
        train_labels,
        validation_data=([dev_X1, dev_X2], dev_labels),
        nb_epoch=settings['nr_epoch'],
        batch_size=settings['batch_size'])
    if not (nlp.path / 'similarity').exists():
        (nlp.path / 'similarity').mkdir()
    print("Saving to", nlp.path / 'similarity')
    weights = model.get_weights()
    with (nlp.path / 'similarity' / 'model').open('wb') as file_:
        pickle.dump(weights[1:], file_)
    with (nlp.path / 'similarity' / 'config.json').open('wb') as file_:
        file_.write(model.to_json())
示例#3
0
def train(train_loc, dev_loc, shape, settings):
    train_texts1, train_texts2, train_labels = read_snli(train_loc)
    dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)

    print("Loading spaCy")
    nlp = spacy.load('en')
    assert nlp.path is not None
    print("Compiling network")
    model = build_model(get_embeddings(nlp.vocab), shape, settings)
    print("Processing texts...")
    Xs = []
    for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2):
        Xs.append(get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)),
                         max_length=shape[0],
                         rnn_encode=settings['gru_encode'],
                         tree_truncate=settings['tree_truncate']))
    train_X1, train_X2, dev_X1, dev_X2 = Xs
    print(settings)
    model.fit(
        [train_X1, train_X2],
        train_labels,
        validation_data=([dev_X1, dev_X2], dev_labels),
        nb_epoch=settings['nr_epoch'],
        batch_size=settings['batch_size'])
    if not (nlp.path / 'similarity').exists():
        (nlp.path / 'similarity').mkdir()
    print("Saving to", nlp.path / 'similarity')
    weights = model.get_weights()
    with (nlp.path / 'similarity' / 'model').open('wb') as file_:
        pickle.dump(weights[1:], file_)
    with (nlp.path / 'similarity' / 'config.json').open('wb') as file_:
        file_.write(model.to_json())
示例#4
0
def train(model_dir, train_loc, dev_loc, shape, settings):
    train_texts1, train_texts2, train_labels = read_snli(train_loc)
    dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)

    print("Loading spaCy")
    nlp = spacy.load('en')
    print("Compiling network")
    model = build_model(get_embeddings(nlp.vocab), shape, settings)
    print("Processing texts...")
    Xs = []
    for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2):
        Xs.append(
            get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)),
                         max_length=shape[0],
                         rnn_encode=settings['gru_encode'],
                         tree_truncate=settings['tree_truncate']))
    train_X1, train_X2, dev_X1, dev_X2 = Xs
    print(settings)
    model.fit([train_X1, train_X2],
              train_labels,
              validation_data=([dev_X1, dev_X2], dev_labels),
              nb_epoch=settings['nr_epoch'],
              batch_size=settings['batch_size'])
示例#5
0
def train(model_dir, train_loc, dev_loc, shape, settings):
    train_texts1, train_texts2, train_labels = read_snli(train_loc)
    dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
    
    print("Loading spaCy")
    nlp = spacy.load('en')
    print("Compiling network")
    model = build_model(get_embeddings(nlp.vocab), shape, settings)
    print("Processing texts...")
    Xs = []    
    for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2):
        Xs.append(get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)),
                         max_length=shape[0],
                         rnn_encode=settings['gru_encode'],
                         tree_truncate=settings['tree_truncate']))
    train_X1, train_X2, dev_X1, dev_X2 = Xs
    print(settings)
    model.fit(
        [train_X1, train_X2],
        train_labels,
        validation_data=([dev_X1, dev_X2], dev_labels),
        nb_epoch=settings['nr_epoch'],
        batch_size=settings['batch_size'])
def attention_foldrun(X, X2, y, name, Xte = None, Xte2 = None, start_fold = 0):
    
    skf = StratifiedKFold(n_splits = 10, random_state = 111, shuffle = True)
    if isinstance(X, pd.core.frame.DataFrame):
        X = X.values
    if isinstance(y, pd.core.frame.DataFrame):
        y = y.is_duplicate.values
    if isinstance(y, pd.core.frame.Series):
        y = y.values
    print('Running Decomposable Attention model with parameters:', settings)
    
    i = 1
    losses = []
    train_splits = []
    val_splits = []
    for tr_index, val_index in skf.split(X, y):
        train_splits.append(tr_index)
        val_splits.append(val_index)
        
    for i in range(start_fold, start_fold + 2):
        X_trq1, X_valq1 = X[train_splits[i]], X[val_splits[i]]
        X_trq2, X_valq2 = X2[train_splits[i]], X2[val_splits[i]]
        y_tr, y_val = y[train_splits[i]], y[val_splits[i]]
        y_tr = to_categorical(y_tr)
        y_val = to_categorical(y_val)
        t = time.time()
        
        print('Start training on fold: {}'.format(i))
        callbacks = [ModelCheckpoint('checks/decomposable_{}_10SKF_fold{}.h5'.format(i, name),
                                    monitor='val_loss', 
                                    verbose = 0, save_best_only = True),
                 EarlyStopping(monitor='val_loss', patience = 4, verbose = 1)]
        
        model = build_model(get_embeddings(nlp.vocab), shape, settings)
        model.fit([X_trq1, X_trq2], y_tr, validation_data=([X_valq1, X_valq2], y_val),
        nb_epoch=settings['nr_epoch'], batch_size=settings['batch_size'], callbacks = callbacks)
        val_pred = model.predict([X_valq1, X_valq2], batch_size = 64)
        score = log_loss(y_val, val_pred)
        losses.append(score)
        
        print('Predicting training set.')
        val_pred = pd.DataFrame(val_pred, index = val_splits[i])
        val_pred.columns = ['attention_feat1', 'attention_feat2']
        val_pred.to_pickle('OOF_preds/train_attentionpreds_fold{}.pkl'.format(i))
        print(val_pred.head())
        if Xte is not None:
            print('Predicting test set.')
            test_preds = model.predict([Xte, Xte2], batch_size = 64)
            test_preds = pd.DataFrame(test_preds)
            test_preds.columns = ['attention_feat1', 'attention_feat2']
            test_preds.to_pickle('OOF_preds/test_attentionpreds_fold{}.pkl'.format(i))
            del test_preds
            gc.collect()
            
        print('Final score for fold {} :'.format(i), score, '\n',
              'Time it took to train and predict on fold:', time.time() - t, '\n')
        del X_trq1, X_valq1, X_trq2, X_valq2, y_tr, y_val, val_pred
        gc.collect()
        i += 1
    print('Mean logloss for model in 10-folds SKF:', np.array(losses).mean(axis = 0))
    return
示例#7
0
def train(train_loc, dev_loc, shape, settings):
    train_texts1, train_texts2, train_labels, train_styling_arrays_1, train_styling_arrays_2, train_TWPs_1, train_TWPs_2 = read_snli(
        train_loc)
    dev_texts1, dev_texts2, dev_labels, dev_styling_arrays_1, dev_styling_arrays_2, dev_TWPs_1, dev_TWPs_2 = read_snli(
        dev_loc)

    print("Loading spaCy")
    nlp = en_core_web_sm.load()
    # en_vectors_web_lg.load(vocab=nlp.vocab)
    path = '/home/ankesh/div_merging_models/alpha1/'
    print("Compiling network")
    # sense = sense2vec.load()
    model = build_model(get_embeddings(nlp.vocab), shape, settings)
    print("Processing texts...")
    Xs = []
    # train_texts1 = train_texts1[:1000]
    # train_styling_arrays_1 = train_styling_arrays_1[:10]
    # train_TWPs_1 = train_TWPs_1[:10]
    # train_texts2 = train_texts2[:10]
    # train_styling_arrays_2 = train_styling_arrays_2[:10]
    # train_TWPs_2 = train_TWPs_2[:10]
    # dev_texts1 = dev_texts1[:1]
    # dev_styling_arrays_1 = dev_styling_arrays_1[:1]
    # dev_TWPs_1 = dev_TWPs_1[:1]
    # dev_texts2 = dev_texts2[:1]
    # dev_styling_arrays_2 = dev_styling_arrays_2[:1]
    # dev_TWPs_2 = dev_TWPs_2[:1]
    # print (train_texts1[0])
    # print (train_texts2[0])
    for texts, styling_array, TWP in ((train_texts1, train_styling_arrays_1,
                                       train_TWPs_1),
                                      (train_texts2, train_styling_arrays_2,
                                       train_TWPs_2),
                                      (dev_texts1, dev_styling_arrays_1,
                                       dev_TWPs_1), (dev_texts2,
                                                     dev_styling_arrays_2,
                                                     dev_TWPs_2)):
        Xs.append(
            get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)),
                         styling_array,
                         TWP,
                         max_length=shape[0],
                         rnn_encode=settings['gru_encode'],
                         tree_truncate=settings['tree_truncate']))
    train_X1, train_X2, dev_X1, dev_X2 = Xs
    # print (train_X1[0])
    # print ('-'*10)
    # print (train_X2[0])
    # print ("shape of train X1", train_X1.shape)
    # print("+"*40)
    print(settings)
    model.fit([train_X1, train_X2],
              train_labels,
              validation_data=([dev_X1, dev_X2], dev_labels),
              nb_epoch=settings['nr_epoch'],
              batch_size=settings['batch_size'])
    # if not (nlp.path / 'similarity').exists():
    #     (nlp.path / 'similarity').mkdir()
    print("Saving to", path + 'similarity')
    weights = model.get_weights()
    with open(path + 'similarity/' + 'model', 'wb') as file_:
        pickle.dump(weights[1:], file_)
    with open(path + 'similarity/' + 'config.json', 'wb') as file_:
        file_.write(model.to_json())