示例#1
0
def store_name_brand_attribute_features(filepath, sheetnum, colnum, data, storepath):
	data = tp.seg_fil_excel(filepath, sheetnum, colnum)
	n_b_a = name_brand_attribute(data) # Need initiallized

	f = open(storepath, 'w')
	for i in n_b_a:
	    f.write(str(i[0])+' '+str(i[1])+' '+str(i[2])+'\n')
	f.close()
def store_name_brand_attribute_features(filepath, sheetnum, colnum, data, storepath):
	data = tp.seg_fil_excel(filepath, sheetnum, colnum)
	n_b_a = name_brand_attribute(data) # Need initiallized

	f = open(storepath, 'w')
	for i in n_b_a:
	    f.write(str(i[0])+' '+str(i[1])+' '+str(i[2])+'\n')
	f.close()
示例#3
0
def store_name_brand_attribute_features(review_data, filepath, sheetnum,
                                        colnum, storepath):
    lm = NgramModel(1, review_data, estimator=None)
    data = tp.seg_fil_excel(filepath, sheetnum, colnum)
    ep = entropy_perplexity(lm, data)
    p = open(storepath, 'w')
    for j in ep:
        p.write(str(j[0]) + '\t' + str(j[1]) + '\n')
    p.close()
def store_adj_adv_v_num_feature(filepath, sheetnum, colnum, data, storepath):
    data = tp.seg_fil_excel(filepath, sheetnum, colnum)

    adj_adv_num = count_adj_adv(data)

    f = open(storepath, "w")
    for i in adj_adv_num:
        f.write(str(i[0]) + " " + str(i[1]) + " " + str(i[2]) + "\n")
    f.close()
def store_word_sent_num_features(filepath, sheetnum, colnum, data, storepath):
    data = tp.seg_fil_excel(filepath, sheetnum, colnum)

    word_sent_num = word_sent_count(data) # Need initiallized

    f = open(storepath,'w')
    for i in word_sent_num:
        f.write(str(i[0])+' '+str(i[1])+' '+str(i[2])+'\n')
    f.close()
def store_word_sent_num_features(filepath, sheetnum, colnum, data, storepath):
    data = tp.seg_fil_excel(filepath, sheetnum, colnum)

    word_sent_num = word_sent_count(data)  # Need initiallized

    f = open(storepath, "w")
    for i in word_sent_num:
        f.write(str(i[0]) + " " + str(i[1]) + " " + str(i[2]) + "\n")
    f.close()
def store_adj_adv_v_num_feature(filepath, sheetnum, colnum, data, storepath):
    data = tp.seg_fil_excel(filepath, sheetnum, colnum)

    adj_adv_num = count_adj_adv(data)

    f = open(storepath, 'w')
    for i in adj_adv_num:
        f.write(str(i[0]) + ' ' + str(i[1]) + ' ' + str(i[2]) + '\n')
    f.close()
def store_seg_fil_result(filepath, sheetnum, colnum, storepath):
    # Read excel file of review and segmention and filter stopwords
    seg_fil_result = tp.seg_fil_excel(filepath, sheetnum, colnum)

    # Store filtered reviews
    fil_file = open(storepath, "w")
    for sent in seg_fil_result:
        for word in sent:
            fil_file.write(word.encode("utf8") + " ")
        fil_file.write("\n")
    fil_file.close()
def store_seg_fil_result(filepath, sheetnum, colnum, storepath):
    # Read excel file of review and segmention and filter stopwords
    seg_fil_result = tp.seg_fil_excel(filepath, sheetnum, colnum)

    # Store filtered reviews
    fil_file = open(storepath, 'w')
    for sent in seg_fil_result:
        for word in sent:
            fil_file.write(word.encode('utf8') + ' ')
        fil_file.write('\n')
    fil_file.close()
def store_name_brand_attribute_features(review_data, filepath, sheetnum, colnum, storepath):
	# Building an ngram language model of a certain product category review
	lm = NgramModel(1, review_data, estimator=None) # Need initiallized

	# Read full review dataset
	data =  tp.seg_fil_excel(filepath, sheetnum, colnum)

	ep = entropy_perplexity(lm, data)

	p = open(storepath,'w')
	for j in ep:
	    p.write(str(j[0]) + '\t' + str(j[1]) + '\n')
	p.close()
def store_name_brand_attribute_features(review_data, filepath, sheetnum,
                                        colnum, storepath):
    # Building an ngram language model of a certain product category review
    lm = NgramModel(1, review_data, estimator=None)  # Need initiallized

    # Read full review dataset
    data = tp.seg_fil_excel(filepath, sheetnum, colnum)

    ep = entropy_perplexity(lm, data)

    p = open(storepath, 'w')
    for j in ep:
        p.write(str(j[0]) + '\t' + str(j[1]) + '\n')
    p.close()
示例#12
0
    ent_per = []
    for r in dataset:
        ent = model.entropy(r)
        per = model.perplexity(r)
        ent_per.append((ent, per))
    return ent_per


"""
testing
"""
filepath = 'E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Motorala.xlsx'
storepath = 'E:/GraduationProject/pythoncode/project/Prediction/main/result/DifferenceFeature.txt'
review = word_by_word_review(filepath, 1, 11)

data = tp.seg_fil_excel(filepath, 1, 11)

# print len_data
# lm = NgramModel(1, review, estimator=None)
# ep = entropy_perplexity(lm,data[1:990])
# print ep
# Store features
# def store_name_brand_attribute_features(review_data, filepath, sheetnum, colnum, storepath):
# 	# Building an ngram language model of a certain product category review
# 	lm = NgramModel(1, review_data, estimator=None) # Need initiallized
#
# 	# Read full review dataset
# 	data =  tp.seg_fil_excel(filepath, sheetnum, colnum)
#
# 	ep = entropy_perplexity(lm, data)
#