io.write_file_text('\n'.join(list_reviews_fail), 'data/list_reviews_fail')


def process_test_data_for_fail_reviews():

    data_raw = io.read_file_json("data/data_raw/test_raw/test.json")
    corpus = []
    for i in data_raw:
        corpus.append(
            clean.number_process(
                clean.negation_process(clean.clean_review(i['review_body']),
                                       'negation')))

    labels = [x['rating'] for x in data_raw]
    # write data to txt file to get review fail predicted
    last_data = list(zip(labels, corpus))
    with open('data/test_fail_review', 'w') as fp:
        fp.write('\n'.join('%s -- %s' % x for x in last_data))


Y_test = io.read_file_text("data/datatestsvm_label1").split("\n")

#filterTestDataByDict("data/data_raw/test_raw/test.json", "data/result_test/test_raw", "data/result_test/test_clean",'dictionary')
predict = io.read_file_text('data/predict_label').split("\n")
index_fail = get_index_fail(predict, Y_test)

corpus_raw = io.read_file_text("data/result_test/test_raw").split("\n")
corpus_clean = io.read_file_text("data/result_test/test_clean").split("\n")

get_reviews_fail(index_fail, corpus_raw, corpus_clean)
示例#2
0
from joblib import load
from scipy import sparse
from fileio import FileIO
from sklearn.metrics import precision_score, recall_score, f1_score

io = FileIO()
# nhan lop
classes = [0, 1, 2]

# path data
DATATEST = "data/libsvm-3.22/datatestsvm"
DATATRAIN = "data/libsvm-3.22/datatrainsvm"
LABEL_PREDICT_PATH = 'data/libsvm-3.22/test/result'

X_train = sparse.load_npz("data/datatrainsvm1.npz")
Y_train = io.read_file_text("data/datatrainsvm_label1").split("\n")
X_test = sparse.load_npz("data/datatestsvm1.npz").toarray()
Y_test = io.read_file_text("data/datatestsvm_label1").split("\n")


def read_file(file_path):
    """
    Read file from disk
    file_path
    """
    file = open(file_path, 'r')
    try:
        text = file.read()
    except UnicodeDecodeError:
        print("fail open file: " + file_path)
        text = ''