import pandas as pd if __name__ == "__main__": file_x = "X.csv" file_y = "Y_.csv" content = pd.read_csv(file_x, encoding="utf-8") Label = pd.read_csv(file_y, encoding="utf-8") # content = content.as_matrix() content = content.ix[:, 1] content = np.array(content).ravel() print(np.array(content).transpose().shape) Label = Label.as_matrix() Label = np.matrix(Label) np.random.seed(7) # print(Label) content = [txt.text_cleaner(x, deep_clean=True) for x in content] X_train, X_test, y_train, y_test = train_test_split(content, Label, test_size=0.1, random_state=42) batch_size = 256 sparse_categorical = 0 n_epochs = [100, 100, 100] ## DNN--RNN-CNN Random_Deep = [2, 2, 2] ## DNN--RNN-CNN RMDL.Text_Classification(X_train, y_train, X_test, y_test, batch_size=batch_size, sparse_categorical=True,
''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '' import os import numpy as np from RMDL import text_feature_extraction as txt from RMDL.Download import Download_WOS as WOS from RMDL import RMDL_Text as RMDL from sklearn.cross_validation import train_test_split if __name__ == "__main__": path_WOS = WOS.download_and_extract() fname = os.path.join(path_WOS, "WebOfScience/WOS5736/X.txt") fnamek = os.path.join(path_WOS, "WebOfScience/WOS5736/Y.txt") with open(fname, encoding="utf-8") as f: content = f.readlines() content = [txt.text_cleaner(x) for x in content] with open(fnamek) as fk: contentk = fk.readlines() contentk = [x.strip() for x in contentk] Label = np.matrix(contentk, dtype=int) Label = np.transpose(Label) np.random.seed(7) print(Label.shape) X_train, X_test, y_train, y_test = train_test_split(content, Label, test_size=0.2, random_state=42) batch_size = 100 sparse_categorical = 0 n_epochs = [100, 100, 100] ## DNN--RNN-CNN
from RMDL import text_feature_extraction as txt from keras.datasets import imdb import numpy as np from RMDL import RMDL_Text as RMDL if __name__ == "__main__": print("Load IMDB dataset....") MAX_NB_WORDS = 75000 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=MAX_NB_WORDS) print(len(X_train)) print(y_test) word_index = imdb.get_word_index() index_word = {v: k for k, v in word_index.items()} X_train = [ txt.text_cleaner(' '.join(index_word.get(w) for w in x)) for x in X_train ] X_test = [ txt.text_cleaner(' '.join(index_word.get(w) for w in x)) for x in X_test ] X_train = np.array(X_train) X_train = np.array(X_train).ravel() print(X_train.shape) X_test = np.array(X_test) X_test = np.array(X_test).ravel() batch_size = 100 sparse_categorical = 0 n_epochs = [500, 500, 500] ## DNN--RNN-CNN