io.write_file_text('\n'.join(list_reviews_fail), 'data/list_reviews_fail') def process_test_data_for_fail_reviews(): data_raw = io.read_file_json("data/data_raw/test_raw/test.json") corpus = [] for i in data_raw: corpus.append( clean.number_process( clean.negation_process(clean.clean_review(i['review_body']), 'negation'))) labels = [x['rating'] for x in data_raw] # write data to txt file to get review fail predicted last_data = list(zip(labels, corpus)) with open('data/test_fail_review', 'w') as fp: fp.write('\n'.join('%s -- %s' % x for x in last_data)) Y_test = io.read_file_text("data/datatestsvm_label1").split("\n") #filterTestDataByDict("data/data_raw/test_raw/test.json", "data/result_test/test_raw", "data/result_test/test_clean",'dictionary') predict = io.read_file_text('data/predict_label').split("\n") index_fail = get_index_fail(predict, Y_test) corpus_raw = io.read_file_text("data/result_test/test_raw").split("\n") corpus_clean = io.read_file_text("data/result_test/test_clean").split("\n") get_reviews_fail(index_fail, corpus_raw, corpus_clean)
from joblib import load from scipy import sparse from fileio import FileIO from sklearn.metrics import precision_score, recall_score, f1_score io = FileIO() # nhan lop classes = [0, 1, 2] # path data DATATEST = "data/libsvm-3.22/datatestsvm" DATATRAIN = "data/libsvm-3.22/datatrainsvm" LABEL_PREDICT_PATH = 'data/libsvm-3.22/test/result' X_train = sparse.load_npz("data/datatrainsvm1.npz") Y_train = io.read_file_text("data/datatrainsvm_label1").split("\n") X_test = sparse.load_npz("data/datatestsvm1.npz").toarray() Y_test = io.read_file_text("data/datatestsvm_label1").split("\n") def read_file(file_path): """ Read file from disk file_path """ file = open(file_path, 'r') try: text = file.read() except UnicodeDecodeError: print("fail open file: " + file_path) text = ''