Exemplo n.º 1
0
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from utils import load_reviews
import time

start = time.time()

filePath = '../../corpus/100k/allTrimed.csv'
_, reviews = load_reviews(filePath)
reviews = [review.split() for review in reviews]

cost = time.time() - start
print(f'Loading reviews cost: {cost:.4f} Sec')
start = time.time()

documents = [TaggedDocument(doc, [i]) for i, doc in enumerate(reviews)]
# print(document)
model = Doc2Vec(documents, vector_size=20, window=2, min_count=5, epochs=10)
model.save_word2vec_format('./data/d2v.txt')
model.save('./data/d2v.model')

cost = time.time() - start
print(f'Training model cost: {cost:.4f} Sec')
Exemplo n.º 2
0
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from utils import load_reviews, data_suffle

# stopwordPath = './data/stopword.txt'
# userDictPath = './data/user_dict.txt'
csvFilePath = '../../corpus/100k/allTrimed.csv'
modelPath = './data/bayes.model'

# 载入自定义字典
# jieba.load_userdict(userDictPath)

time_start = time.time()

labels, reviews = load_reviews(csvFilePath)
labels, reviews = data_suffle(labels, reviews)

# 1/4 分割数据集
n = len(labels) // 5
labels_train, reviews_train = labels[n:], reviews[n:]
labels_test, reviews_test = labels[:n], reviews[:n]

print(f'Load Corpus Cost {time.time() - time_start:.4f} Sec')
print(reviews[:5], type(reviews), type(reviews[0]))

time_start = time.time()

vectorizer = CountVectorizer()
vec_train = vectorizer.fit_transform(
    [np.str_(review) for review in reviews_train])
Exemplo n.º 3
0
    # initalize Mecab tagger
    tagger = Mecab()

    # initalize regular expression
    exp = re.compile(POS, re.IGNORECASE)

    # load sentiment dictionary
    bag = utils.load_dictionary()

    # load model if exist
    try:
        with open("../Resources/models/model", "rb") as model_file:
            model = pickle.load(model_file)
    except IOError as err:
        # load training reviews from file
        train_review = utils.load_reviews("../Resources/samples/train_data")
        # get feature from train data
        train_data, train_label = feature_data(tagger, exp, bag, train_review)
        # initalize classifer class
        model = SVM()
        # train model
        model.train(train_data, train_label)
        #save model
        with open("../Resources/models/model", "wb") as model_file:
            pickle.dump(model, model_file)
    else:
        print("use saved model..")

    # load test reviews from file
    test_review = utils.load_reviews("../Resources/samples/test_data")
    # get feature from test data
Exemplo n.º 4
0
	# initalize Mecab tagger
	tagger = Mecab()

	# initalize regular expression	
	exp = re.compile(POS, re.IGNORECASE)
	
	# load sentiment dictionary
	bag = utils.load_dictionary()

	# load model if exist
	try:
		with open("../Resources/models/model", "rb") as model_file:
			model = pickle.load(model_file)
	except IOError as err:
		# load training reviews from file	
		train_review = utils.load_reviews("../Resources/samples/train_data")
		# get feature from train data
		train_data, train_label = feature_data(tagger, exp, bag, train_review)
		# initalize classifer class
		model = SVM()
		# train model
		model.train(train_data, train_label)
		#save model
		with open("../Resources/models/model", "wb") as model_file:
			pickle.dump(model, model_file)
	else:
		print("use saved model..")
	
	# load test reviews from file
	test_review  = utils.load_reviews("../Resources/samples/test_data")
	# get feature from test data
Exemplo n.º 5
0
	# initalize Mecab tagger
	tagger = Mecab()

	# initalize regular expression	
	exp = re.compile(POS, re.IGNORECASE)
	
	# load sentiment dictionary
	bag = utils.load_dictionary()

	# load model if exist
	try:
		with open("./models/model", "rb") as model_file:
			model = pickle.load(model_file)
	except IOError as err:
		# load training reviews from file	
		train_review = utils.load_reviews("./samples/train_data")
		# get feature from train data
		train_data, train_label = feature_data(tagger, exp, bag, train_review)
		# initalize classifer class
		model = SVM()
		# train model
		model.train(train_data, train_label)
		#save model
		with open("./models/model", "wb") as model_file:
			pickle.dump(model, model_file)
	else:
		print("use saved model..")
	
	# load test reviews from file
	#test_review  = utils.load_reviews("./samples/test_data")
	# get feature from test data