예제 #1
0
def Train(model, batch_size, n_epochs, train_c_new, train_r_new, train_l_new,
          dev_c_new, dev_r_new, dev_l_new):
    print("Now training the model...")
    # histories = Histories()
    histories = my_callbacks.Histories()
    start_time = time.time()

    # start_time = time.time()
    # compute_recall_ks(y_pred[:,0])
    # print("---model evaluation time takes %s seconds ---" % (time.time() - start_time))
    bestAcc = 0.0
    patience = 0

    print("\tbatch_size={}, nb_epoch={}".format(batch_size, n_epochs))

    # for ep in range(1, args.n_epochs):
    for ep in range(1, n_epochs):

        model.fit([train_c_new, train_r_new],
                  train_l_new,
                  batch_size=batch_size,
                  epochs=n_epochs,
                  callbacks=[histories],
                  validation_data=([dev_c_new, dev_r_new], dev_l_new),
                  verbose=1)

        curAcc = histories.accs[0]
        if curAcc >= bestAcc:
            bestAcc = curAcc
            patience = 0
        else:
            patience = patience + 1

        # classify the test set
        y_pred = model.predict([test_c_new, test_r_new])

        print("Perform on test set after Epoch: " + str(ep) + "...!")
        recall_k = compute_recall_ks(y_pred[:, 0])

        # stop training the model when patience = 10
        if patience > 10:
            print("Early stopping at epoch: " + str(ep))
            break
    print("---Training finished, model training time takes %s seconds ---" %
          (time.time() - start_time))
    return model
예제 #2
0
    neg_branch = shared_cnn(neg_input)

    #concatenated = merge([pos_branch, neg_branch], mode='concat',name="coherence_out")
    concatenated = concatenate([pos_branch, neg_branch],
                               axis=-1,
                               name="coherence_out")
    # output is two latent coherence score

    final_model = Model([pos_input, neg_input], concatenated)

    #final_model.compile(loss='ranking_loss', optimizer='adam')
    final_model.compile(loss={'coherence_out': ranking_loss},
                        optimizer=opts.learn_alg)

    # setting callback
    histories = my_callbacks.Histories()

    print(shared_cnn.summary())
    #print(final_model.summary())

    print("------------------------------------------------")

    #writing model name
    if opts.f_list != "":
        ff = opts.f_list
        m_type = "Ext.CNN."
    else:
        ff = "None"
        m_type = "CNN."

    model_name = opts.model_dir + m_type + str(opts.p_num) + "_" + str(opts.dropout_ratio) + "_"+ str(opts.emb_size) + "_"+ str(opts.maxlen) + "_" \
예제 #3
0
def main():

    parser = argparse.ArgumentParser()
    parser.register('type', 'bool', str2bool)
    parser.add_argument('--emb_dim',
                        type=str,
                        default=300,
                        help='Embeddings dimension')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=300,
                        help='Hidden size')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='Batch size')
    parser.add_argument('--n_epochs', type=int, default=50, help='Num epochs')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='Learning rate')
    parser.add_argument('--optimizer',
                        type=str,
                        default='adam',
                        help='Optimizer')
    parser.add_argument('--n_recurrent_layers',
                        type=int,
                        default=1,
                        help='Num recurrent layers')
    parser.add_argument('--input_dir',
                        type=str,
                        default='./dataset/',
                        help='Input dir')
    parser.add_argument('--save_model',
                        type='bool',
                        default=True,
                        help='Whether to save the model')
    parser.add_argument('--model_fname',
                        type=str,
                        default='model/dual_encoder_lstm_classifier.h5',
                        help='Model filename')
    parser.add_argument('--embedding_file',
                        type=str,
                        default='embeddings/glove.840B.300d.txt',
                        help='Embedding filename')
    parser.add_argument('--seed', type=int, default=1337, help='Random seed')
    args = parser.parse_args()
    print('Model args: ', args)
    np.random.seed(args.seed)

    print("Starting...")

    # first, build index mapping words in the embeddings set
    # to their embedding vector

    print('Now indexing word vectors...')

    embeddings_index = {}
    f = open(args.embedding_file, 'r')
    for line in f:
        values = line.split()
        word = values[0]
        try:
            coefs = np.asarray(values[1:], dtype='float32')
        except ValueError:
            continue
        embeddings_index[word] = coefs
    f.close()

    MAX_SEQUENCE_LENGTH, MAX_NB_WORDS, word_index = pickle.load(
        open(args.input_dir + 'params.pkl', 'rb'))

    print("MAX_SEQUENCE_LENGTH: {}".format(MAX_SEQUENCE_LENGTH))
    print("MAX_NB_WORDS: {}".format(MAX_NB_WORDS))

    print("Now loading embedding matrix...")
    num_words = min(MAX_NB_WORDS, len(word_index)) + 1
    embedding_matrix = np.zeros((num_words, args.emb_dim))
    for word, i in word_index.items():
        if i >= MAX_NB_WORDS:
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector

    print("Now building dual encoder lstm model...")
    # define lstm encoder
    encoder = Sequential()
    encoder.add(
        Embedding(output_dim=args.emb_dim,
                  input_dim=MAX_NB_WORDS,
                  input_length=MAX_SEQUENCE_LENGTH,
                  weights=[embedding_matrix],
                  mask_zero=True,
                  trainable=True))

    encoder.add(LSTM(units=args.hidden_size))

    context_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    response_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')

    # encode the context and the response
    context_branch = encoder(context_input)
    response_branch = encoder(response_input)

    concatenated = merge([context_branch, response_branch], mode='mul')
    out = Dense((1), activation="sigmoid")(concatenated)

    dual_encoder = Model([context_input, response_input], out)
    dual_encoder.compile(loss='binary_crossentropy', optimizer=args.optimizer)

    print(encoder.summary())
    print(dual_encoder.summary())

    print("Now loading UDC data...")

    train_c, train_r, train_l = pickle.load(
        open(args.input_dir + 'train.pkl', 'rb'))
    test_c, test_r, test_l = pickle.load(
        open(args.input_dir + 'test.pkl', 'rb'))
    dev_c, dev_r, dev_l = pickle.load(open(args.input_dir + 'dev.pkl', 'rb'))

    print('Found %s training samples.' % len(train_c))
    print('Found %s dev samples.' % len(dev_c))
    print('Found %s test samples.' % len(test_c))

    print("Now training the model...")

    histories = my_callbacks.Histories()

    bestAcc = 0.0
    patience = 0

    print("\tbatch_size={}, nb_epoch={}".format(args.batch_size,
                                                args.n_epochs))

    for ep in range(1, args.n_epochs):

        dual_encoder.fit([train_c, train_r],
                         train_l,
                         batch_size=args.batch_size,
                         epochs=1,
                         callbacks=[histories],
                         validation_data=([dev_c, dev_r], dev_l),
                         verbose=1)

        curAcc = histories.accs[0]
        if curAcc >= bestAcc:
            bestAcc = curAcc
            patience = 0
        else:
            patience = patience + 1

        # classify the test set
        y_pred = dual_encoder.predict([test_c, test_r])

        print("Perform on test set after Epoch: " + str(ep) + "...!")
        recall_k = compute_recall_ks(y_pred[:, 0])

        # stop training the model when patience = 10
        if patience > 10:
            print("Early stopping at epoch: " + str(ep))
            break

    if args.save_model:
        print("Now saving the model... at {}".format(args.model_fname))
        dual_encoder.save(args.model_fname)
예제 #4
0
def main():

    parser = argparse.ArgumentParser()
    parser.register('type', 'bool', str2bool)
    parser.add_argument('--emb_dim',
                        type=str,
                        default=300,
                        help='Embeddings dimension')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=300,
                        help='Hidden size')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='Batch size')
    parser.add_argument('--n_epochs', type=int, default=50, help='Num epochs')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='Learning rate')
    parser.add_argument('--optimizer',
                        type=str,
                        default='adam',
                        help='Optimizer')
    parser.add_argument("--dropout_ratio",
                        type=float,
                        default=0.5,
                        help="ratio of cells to drop out")
    parser.add_argument('--n_recurrent_layers',
                        type=int,
                        default=1,
                        help='Num recurrent layers')
    parser.add_argument("--w_size",
                        type=int,
                        default=5,
                        help="window size length of neighborhood in words")
    parser.add_argument("--pool_length",
                        type=int,
                        default=6,
                        help="length for max pooling")
    parser.add_argument(
        "--nb_filter",
        type=int,
        default=150,
        help="nb of filter to be applied in convolution over words")
    parser.add_argument('--input_dir',
                        type=str,
                        default='./dataset/',
                        help='Input dir')
    parser.add_argument('--save_model',
                        type='bool',
                        default=True,
                        help='Whether to save the model')
    parser.add_argument('--model_fname',
                        type=str,
                        default='model/dual_encoder_lstm_classifier.h5',
                        help='Model filename')
    parser.add_argument('--embedding_file',
                        type=str,
                        default='embeddings/glove.840B.300d.txt',
                        help='Embedding filename')
    parser.add_argument('--seed', type=int, default=1337, help='Random seed')
    args = parser.parse_args()
    print('Model args: ', args)
    np.random.seed(args.seed)

    if not os.path.exists(args.model_fname):
        print("No pre-trained model...")
        print("Start building model...")

        # first, build index mapping words in the embeddings set
        # to their embedding vector

        print('Indexing word vectors.')

        embeddings_index = {}
        f = open(args.embedding_file, 'r')
        for line in f:
            values = line.split()
            word = values[0]
            #coefs = np.asarray(values[1:], dtype='float32')

            try:
                coefs = np.asarray(values[1:], dtype='float32')
            except ValueError:
                continue
            embeddings_index[word] = coefs
        f.close()

        print("Now loading UDC data...")

        train_c, train_r, train_l = pickle.load(
            open(args.input_dir + 'train.pkl', 'rb'))
        test_c, test_r, test_l = pickle.load(
            open(args.input_dir + 'test.pkl', 'rb'))
        dev_c, dev_r, dev_l = pickle.load(
            open(args.input_dir + 'dev.pkl', 'rb'))

        print('Found %s training samples.' % len(train_c))
        print('Found %s dev samples.' % len(dev_c))
        print('Found %s test samples.' % len(test_c))

        MAX_SEQUENCE_LENGTH, MAX_NB_WORDS, word_index = pickle.load(
            open(args.input_dir + 'params.pkl', 'rb'))

        print("MAX_SEQUENCE_LENGTH: {}".format(MAX_SEQUENCE_LENGTH))
        print("MAX_NB_WORDS: {}".format(MAX_NB_WORDS))

        vocabs, E = init_vocab(args.emb_dim)

        print("Now loading entity-grid data...")

        train_egrid, train_label = load_and_numberize_egrids_with_labels(
            filelist="./dataset/list.train",
            maxlen=MAX_SEQUENCE_LENGTH,
            w_size=args.w_size,
            vocabs=vocabs)

        dev_egrid, dev_label = load_and_numberize_egrids_with_labels(
            filelist="./dataset/list.dev",
            maxlen=MAX_SEQUENCE_LENGTH,
            w_size=args.w_size,
            vocabs=vocabs)

        test_egrid, test_label = load_and_numberize_egrids_with_labels(
            filelist="./dataset/list.test",
            maxlen=MAX_SEQUENCE_LENGTH,
            w_size=args.w_size,
            vocabs=vocabs)

        #print (train_label[:10])
        #print (list(train_l[:10]))

        #assert train_label == list(train_l)
        #assert dev_label == list(dev_l)
        #assert test_label == list(test_l)

        #randomly shuffle the training data
        #np.random.shuffle(train_egrid)

        print("Now loading embedding matrix...")
        num_words = min(MAX_NB_WORDS, len(word_index)) + 1
        embedding_matrix = np.zeros((num_words, args.emb_dim))
        for word, i in word_index.items():
            if i >= MAX_NB_WORDS:
                continue
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                # words not found in embedding index will be all-zeros.
                embedding_matrix[i] = embedding_vector

        print("Now building the dual encoder lstm model...")

        encoder = Sequential()
        encoder.add(
            Embedding(output_dim=args.emb_dim,
                      input_dim=MAX_NB_WORDS,
                      input_length=MAX_SEQUENCE_LENGTH,
                      weights=[embedding_matrix],
                      mask_zero=True,
                      trainable=True))

        encoder.add(LSTM(units=args.hidden_size))

        print("Now building the CNN egrid model...")

        sent_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')

        x = Embedding(output_dim=args.emb_dim,
                      weights=[E],
                      input_dim=len(vocabs),
                      input_length=MAX_SEQUENCE_LENGTH,
                      trainable=True)(sent_input)

        x = Convolution1D(nb_filter=args.nb_filter,
                          filter_length=args.w_size,
                          border_mode='valid',
                          activation='relu',
                          subsample_length=1)(x)

        x = MaxPooling1D(pool_length=args.pool_length)(x)
        x = Dropout(args.dropout_ratio)(x)
        x = Flatten()(x)
        x = Dropout(args.dropout_ratio)(x)
        x = Dense(300)(x)

        cnn = Model(sent_input, x)

        context_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
        response_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
        egrid_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')

        # these two models will share eveything from shared_cnn
        context_branch = encoder(context_input)
        response_branch = encoder(response_input)

        context_branch_cnn = cnn(egrid_input)

        concatenated = merge([context_branch, response_branch], mode='mul')
        concatenated = merge([concatenated, context_branch_cnn], mode='concat')
        out = Dense((1), activation="sigmoid")(concatenated)

        model = Model([context_input, response_input, egrid_input], out)

        model.compile(loss='binary_crossentropy', optimizer=args.optimizer)

        print(model.summary())

        print("Now training the model...")

        histories = my_callbacks.Histories()

        bestAcc = 0.0
        patience = 0

        print("\tbatch_size={}, nb_epoch={}".format(args.batch_size,
                                                    args.n_epochs))

        for ep in range(1, args.n_epochs):

            #model.fit([train_c, train_r], train_l,
            #batch_size=args.batch_size, nb_epoch=1, callbacks=[histories],
            #validation_data=([dev_c, dev_r], dev_l), verbose=1)

            model.fit([train_c, train_r, train_egrid],
                      train_l,
                      batch_size=args.batch_size,
                      epochs=1,
                      callbacks=[histories],
                      validation_data=([dev_c, dev_r, dev_egrid], dev_l),
                      verbose=1)

            #model.save(model_name + "_ep." + str(ep) + ".h5")

            curAcc = histories.accs[0]
            if curAcc >= bestAcc:
                bestAcc = curAcc
                patience = 0
            else:
                patience = patience + 1

            #doing classify the test set
            y_pred = model.predict([test_c, test_r, test_egrid])

            print("Perform on test set after Epoch: " + str(ep) + "...!")
            recall_k = compute_recall_ks(y_pred[:, 0])

            #stop the model whch patience = 8
            if patience > 10:
                print("Early stopping at epoch: " + str(ep))
                break

        if args.save_model:
            print("Now saving the model... at {}".format(args.model_fname))
            model.save(args.model_fname)

    else:
        print("Found pre-trained model...")
        model = K_load_model(args.model_fname)

    return model
예제 #5
0
def main():

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    parser = argparse.ArgumentParser()
    parser.register('type', 'bool', str2bool)
    parser.add_argument('--emb_dim',
                        type=str,
                        default=300,
                        help='Embeddings dimension')
    parser.add_argument('--emb_trainable',
                        type='bool',
                        default=True,
                        help='Whether fine tune embeddings')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=300,
                        help='Hidden size')
    parser.add_argument('--hidden_size_lstm',
                        type=int,
                        default=200,
                        help='Hidden size')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='Batch size')
    parser.add_argument('--n_epochs', type=int, default=50, help='Num epochs')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='Learning rate')
    parser.add_argument('--optimizer',
                        type=str,
                        default='adam',
                        help='Optimizer')
    parser.add_argument('--n_recurrent_layers',
                        type=int,
                        default=1,
                        help='Num recurrent layers')
    parser.add_argument('--input_dir',
                        type=str,
                        default='./dataset/',
                        help='Input dir')
    parser.add_argument('--save_model',
                        type='bool',
                        default=True,
                        help='Whether to save the model')
    parser.add_argument('--model_fname',
                        type=str,
                        default='model/model.h5',
                        help='Model filename')
    parser.add_argument('--embedding_file',
                        type=str,
                        default='embeddings/embeddings.vec',
                        help='Embedding filename')
    parser.add_argument('--seed', type=int, default=1337, help='Random seed')
    args = parser.parse_args()
    print('Model args: ', args)
    np.random.seed(args.seed)

    print("Starting...")

    # first, build index mapping words in the embeddings set
    # to their embedding vector

    print('Now indexing word vectors...')

    embeddings_index = {}
    f = open(args.embedding_file, 'r')
    for line in f:
        values = line.split()
        word = values[0]
        try:
            coefs = np.asarray(values[1:], dtype='float32')
        except ValueError:
            continue
        embeddings_index[word] = coefs
    f.close()

    MAX_SEQUENCE_LENGTH, MAX_NB_WORDS, word_index = pickle.load(
        open(args.input_dir + 'params.pkl', 'rb'))

    print("MAX_SEQUENCE_LENGTH: {}".format(MAX_SEQUENCE_LENGTH))
    print("MAX_NB_WORDS: {}".format(MAX_NB_WORDS))

    print("Now loading embedding matrix...")
    num_words = min(MAX_NB_WORDS, len(word_index))
    embedding_matrix = np.zeros((num_words, args.emb_dim))
    for word, i in word_index.items():
        if i >= MAX_NB_WORDS:
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

    print("Now building dual encoder lstm model...")

    # define lstm encoder

    encoder = Sequential()
    encoder_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    embedding = Embedding(output_dim=args.emb_dim,
                          input_dim=MAX_NB_WORDS,
                          input_length=MAX_SEQUENCE_LENGTH,
                          weights=[embedding_matrix],
                          mask_zero=True,
                          trainable=args.emb_trainable)
    embedded_input = embedding(encoder_input)
    output = LSTM(units=args.hidden_size)(embedded_input)
    encoder = Model(encoder_input, [output, embedded_input])
    print(encoder.summary())

    context_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
    response_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')

    # encode the context and the response
    context_branch, context_embed = encoder(context_input)
    response_branch, response_embed = encoder(response_input)

    # compute the sequence level similarity vector
    S = keras.layers.multiply([context_branch, response_branch])

    # compute the word level similarity matrix
    embed_mul = keras.layers.dot([context_embed, response_embed], axes=2)
    # transform the word level similarity matrix into a vector
    W = LSTM(units=200)(embed_mul)

    # concatenate the word and sequence level similarity vectors
    concatenated = keras.layers.concatenate([S, W])

    out = Dense((1), activation="sigmoid")(concatenated)

    model = Model([context_input, response_input], out)
    model.compile(loss='binary_crossentropy', optimizer=args.optimizer)

    print(model.summary())

    print("Now loading data...")

    train_c, train_r, train_l = pickle.load(
        open(args.input_dir + 'train.pkl', 'rb'))
    test_c, test_r, test_l = pickle.load(
        open(args.input_dir + 'test.pkl', 'rb'))
    dev_c, dev_r, dev_l = pickle.load(open(args.input_dir + 'dev.pkl', 'rb'))

    print('Found %s training samples.' % len(train_c))
    print('Found %s dev samples.' % len(dev_c))
    print('Found %s test samples.' % len(test_c))

    print("Now training the model...")

    histories = my_callbacks.Histories()

    bestAcc = 0.0
    patience = 0

    print("\tbatch_size={}, nb_epoch={}".format(args.batch_size,
                                                args.n_epochs))

    for ep in range(1, args.n_epochs):

        model.fit([train_c, train_r],
                  train_l,
                  batch_size=args.batch_size,
                  epochs=1,
                  callbacks=[histories],
                  validation_data=([dev_c, dev_r], dev_l),
                  verbose=1)

        curAcc = histories.accs[0]
        if curAcc >= bestAcc:
            bestAcc = curAcc
            patience = 0

            if args.save_model:
                print("Now saving the model... at {}".format(args.model_fname))
                model.save(args.model_fname)

        else:
            patience = patience + 1

        # stop training the model when patience = 5
        if patience > 5:
            print("Early stopping at epoch: " + str(ep))
            break