sys.path.insert(0, '..')
from utils.TextPreprocess import review_to_words, tag_reviews
'''
Training Data
'''
train = pd.read_csv("../../data/labeledTrainData.tsv",
                    header=0,
                    delimiter='\t',
                    quoting=3,
                    error_bad_lines=False)
num_reviews = train["review"].size

print("Cleaning and parsing the training set movie reviews...")
clean_train_reviews = []
for i in range(0, num_reviews):
    clean_train_reviews.append(review_to_words(train["review"][i]))
'''
Test Data
'''
test = pd.read_csv("../../data/testData.tsv",
                   header=0,
                   delimiter="\t",
                   quoting=3)
num_reviews = len(test["review"])
clean_test_reviews = []

print("Cleaning and parsing the test set movie reviews...")
for i in range(0, num_reviews):
    clean_review = review_to_words(test["review"][i])
    clean_test_reviews.append(clean_review)
示例#2
0
@author: GongYu
'''
import pandas as pd
from utils.TextPreprocess import review_to_words

'''
Training Data
'''
train = pd.read_csv("data\\labeledTrainData.tsv", header = 0, delimiter = "\t", quoting = 3, error_bad_lines=False)
num_reviews = train["review"].size

print "Cleaning and parsing the training set movie reviews..."
clean_train_reviews = []
for i in xrange(0, num_reviews):
    clean_train_reviews.append(review_to_words(train["review"][i]))

'''
Test Data
'''
test = pd.read_csv("data\\testData.tsv", header = 0, delimiter = "\t", quoting = 3)
num_reviews = len(test["review"])
clean_test_reviews = []

print "Cleaning and parsing the test set movie reviews..."
for i in xrange(0, num_reviews):
    clean_review = review_to_words(test["review"][i])
    clean_test_reviews.append(clean_review)

'''
Train and Test