def main(): # Directory Setting train_dir = "./data/multi_train.csv" test_dir = "./data/multi_test.csv" model_dir = "./model_save" # HyperParameter epoch = 1 batch = 128 max_len = 50 hidden_units = 64 target_names = ['0', '1', '2', '3'] # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir, len(target_names)) print("2. pre processing") train_x, val_x, test_x = train_x.tolist(), val_x.tolist(), test_x.tolist() train_x = [' '.join(t.split()[0:max_len]) for t in train_x] train_x = np.array(train_x, dtype=object)[:, np.newaxis] val_x = [' '.join(t.split()[0:max_len]) for t in val_x] val_x = np.array(val_x, dtype=object)[:, np.newaxis] test_x = [' '.join(t.split()[0:max_len]) for t in test_x] test_x = np.array(test_x, dtype=object)[:, np.newaxis] print("3. build model") model = ELMo(hidden_units=hidden_units, data_type="multi", category_size=len(target_names)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) callbacks = create_callbacks(model_dir) model.fit(x=train_x, y=train_y, epochs=epoch, batch_size=batch, validation_data=(val_x, val_y), callbacks=callbacks) print("4. evaluation") evaluation = Evaluation(model, test_x, test_y) accuracy, cf_matrix, report = evaluation.eval_classification( data_type="multi") print("## Target Names : ", target_names) print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)
def main(): # Directory Setting train_dir = "./data/multi_train.csv" test_dir = "./data/multi_test.csv" model_dir = "./model_save" embedding_dir = "./glove.6B.50d.txt" # HyperParameter epoch = 1 batch = 256 embedding_dim = 50 target_names = ['0', '1', '2', '3'] # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir, len(target_names)) print("2. pre processing") train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x) print("3. text to vector") embedding_matrix = text_to_vector(tokenizer.word_index, embedding_dir, word_dimension=embedding_dim) print("4. build model") model = TextCNN(sequence_len=train_x.shape[1], embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, filter_sizes=[3, 4, 5], flag="pre_training", data_type="multi", category_num=len(target_names)) model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy']) callbacks = create_callbacks(model_dir) model.fit(x=train_x, y=train_y, epochs=epoch, batch_size=batch, validation_data=(val_x, val_y), callbacks=callbacks) print("5. evaluation") evaluation = Evaluation(model, test_x, test_y) accuracy, cf_matrix, report = evaluation.eval_classification( data_type="multi") print("## Target Names : ", target_names) print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)
def main(): # Directory Setting train_dir = "../data/binary_train.csv" test_dir = "../data/binary_test.csv" model_dir = "./model_save" # HyperParameter epoch = 2 batch = 256 # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir) print("2. pre processing") train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x) print("3. build model") model = TextCNN(sequence_len=train_x.shape[1], embedding_matrix=len(tokenizer.word_index) + 1, embedding_dim=300, filter_sizes=[3, 4, 5], flag="self_training", data_type="binary") model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) callbacks = create_callbacks(model_dir) model.fit(x=train_x, y=train_y, epochs=epoch, batch_size=batch, validation_data=(val_x, val_y), callbacks=callbacks) print("4. evaluation") evaluation = Evaluation(model, test_x, test_y) accuracy, cf_matrix, report = evaluation.eval_classification( data_type="binary") print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)