Пример #1
0
def main():
    # Directory Setting
    train_dir = "./data/multi_train.csv"
    test_dir = "./data/multi_test.csv"
    model_dir = "./model_save"

    # HyperParameter
    epoch = 1
    batch = 128
    max_len = 50
    hidden_units = 64
    target_names = ['0', '1', '2', '3']

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir, len(target_names))

    print("2. pre processing")
    train_x, val_x, test_x = train_x.tolist(), val_x.tolist(), test_x.tolist()

    train_x = [' '.join(t.split()[0:max_len]) for t in train_x]
    train_x = np.array(train_x, dtype=object)[:, np.newaxis]

    val_x = [' '.join(t.split()[0:max_len]) for t in val_x]
    val_x = np.array(val_x, dtype=object)[:, np.newaxis]

    test_x = [' '.join(t.split()[0:max_len]) for t in test_x]
    test_x = np.array(test_x, dtype=object)[:, np.newaxis]

    print("3. build model")
    model = ELMo(hidden_units=hidden_units,
                 data_type="multi",
                 category_size=len(target_names))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    callbacks = create_callbacks(model_dir)
    model.fit(x=train_x,
              y=train_y,
              epochs=epoch,
              batch_size=batch,
              validation_data=(val_x, val_y),
              callbacks=callbacks)

    print("4. evaluation")
    evaluation = Evaluation(model, test_x, test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification(
        data_type="multi")
    print("## Target Names : ", target_names)
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)
Пример #2
0
def main():
    # Directory Setting
    train_dir = "./data/multi_train.csv"
    test_dir = "./data/multi_test.csv"
    model_dir = "./model_save"
    embedding_dir = "./glove.6B.50d.txt"

    # HyperParameter
    epoch = 1
    batch = 256
    embedding_dim = 50
    target_names = ['0', '1', '2', '3']

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir, len(target_names))

    print("2. pre processing")
    train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x)

    print("3. text to vector")
    embedding_matrix = text_to_vector(tokenizer.word_index,
                                      embedding_dir,
                                      word_dimension=embedding_dim)

    print("4. build model")
    model = TextCNN(sequence_len=train_x.shape[1],
                    embedding_matrix=embedding_matrix,
                    embedding_dim=embedding_dim,
                    filter_sizes=[3, 4, 5],
                    flag="pre_training",
                    data_type="multi",
                    category_num=len(target_names))
    model.compile(optimizer="adam",
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    callbacks = create_callbacks(model_dir)

    model.fit(x=train_x,
              y=train_y,
              epochs=epoch,
              batch_size=batch,
              validation_data=(val_x, val_y),
              callbacks=callbacks)

    print("5. evaluation")
    evaluation = Evaluation(model, test_x, test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification(
        data_type="multi")
    print("## Target Names : ", target_names)
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)
def main():
    # Directory Setting
    train_dir = "../data/binary_train.csv"
    test_dir = "../data/binary_test.csv"
    model_dir = "./model_save"

    # HyperParameter
    epoch = 2
    batch = 256

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir)

    print("2. pre processing")
    train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x)

    print("3. build model")
    model = TextCNN(sequence_len=train_x.shape[1],
                    embedding_matrix=len(tokenizer.word_index) + 1,
                    embedding_dim=300,
                    filter_sizes=[3, 4, 5],
                    flag="self_training",
                    data_type="binary")
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    callbacks = create_callbacks(model_dir)
    model.fit(x=train_x,
              y=train_y,
              epochs=epoch,
              batch_size=batch,
              validation_data=(val_x, val_y),
              callbacks=callbacks)

    print("4. evaluation")
    evaluation = Evaluation(model, test_x, test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification(
        data_type="binary")
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)