def store_name_brand_attribute_features(filepath, sheetnum, colnum, data, storepath): data = tp.seg_fil_excel(filepath, sheetnum, colnum) n_b_a = name_brand_attribute(data) # Need initiallized f = open(storepath, 'w') for i in n_b_a: f.write(str(i[0])+' '+str(i[1])+' '+str(i[2])+'\n') f.close()
def store_name_brand_attribute_features(review_data, filepath, sheetnum, colnum, storepath): lm = NgramModel(1, review_data, estimator=None) data = tp.seg_fil_excel(filepath, sheetnum, colnum) ep = entropy_perplexity(lm, data) p = open(storepath, 'w') for j in ep: p.write(str(j[0]) + '\t' + str(j[1]) + '\n') p.close()
def store_adj_adv_v_num_feature(filepath, sheetnum, colnum, data, storepath): data = tp.seg_fil_excel(filepath, sheetnum, colnum) adj_adv_num = count_adj_adv(data) f = open(storepath, "w") for i in adj_adv_num: f.write(str(i[0]) + " " + str(i[1]) + " " + str(i[2]) + "\n") f.close()
def store_word_sent_num_features(filepath, sheetnum, colnum, data, storepath): data = tp.seg_fil_excel(filepath, sheetnum, colnum) word_sent_num = word_sent_count(data) # Need initiallized f = open(storepath,'w') for i in word_sent_num: f.write(str(i[0])+' '+str(i[1])+' '+str(i[2])+'\n') f.close()
def store_word_sent_num_features(filepath, sheetnum, colnum, data, storepath): data = tp.seg_fil_excel(filepath, sheetnum, colnum) word_sent_num = word_sent_count(data) # Need initiallized f = open(storepath, "w") for i in word_sent_num: f.write(str(i[0]) + " " + str(i[1]) + " " + str(i[2]) + "\n") f.close()
def store_adj_adv_v_num_feature(filepath, sheetnum, colnum, data, storepath): data = tp.seg_fil_excel(filepath, sheetnum, colnum) adj_adv_num = count_adj_adv(data) f = open(storepath, 'w') for i in adj_adv_num: f.write(str(i[0]) + ' ' + str(i[1]) + ' ' + str(i[2]) + '\n') f.close()
def store_seg_fil_result(filepath, sheetnum, colnum, storepath): # Read excel file of review and segmention and filter stopwords seg_fil_result = tp.seg_fil_excel(filepath, sheetnum, colnum) # Store filtered reviews fil_file = open(storepath, "w") for sent in seg_fil_result: for word in sent: fil_file.write(word.encode("utf8") + " ") fil_file.write("\n") fil_file.close()
def store_seg_fil_result(filepath, sheetnum, colnum, storepath): # Read excel file of review and segmention and filter stopwords seg_fil_result = tp.seg_fil_excel(filepath, sheetnum, colnum) # Store filtered reviews fil_file = open(storepath, 'w') for sent in seg_fil_result: for word in sent: fil_file.write(word.encode('utf8') + ' ') fil_file.write('\n') fil_file.close()
def store_name_brand_attribute_features(review_data, filepath, sheetnum, colnum, storepath): # Building an ngram language model of a certain product category review lm = NgramModel(1, review_data, estimator=None) # Need initiallized # Read full review dataset data = tp.seg_fil_excel(filepath, sheetnum, colnum) ep = entropy_perplexity(lm, data) p = open(storepath,'w') for j in ep: p.write(str(j[0]) + '\t' + str(j[1]) + '\n') p.close()
def store_name_brand_attribute_features(review_data, filepath, sheetnum, colnum, storepath): # Building an ngram language model of a certain product category review lm = NgramModel(1, review_data, estimator=None) # Need initiallized # Read full review dataset data = tp.seg_fil_excel(filepath, sheetnum, colnum) ep = entropy_perplexity(lm, data) p = open(storepath, 'w') for j in ep: p.write(str(j[0]) + '\t' + str(j[1]) + '\n') p.close()
ent_per = [] for r in dataset: ent = model.entropy(r) per = model.perplexity(r) ent_per.append((ent, per)) return ent_per """ testing """ filepath = 'E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Motorala.xlsx' storepath = 'E:/GraduationProject/pythoncode/project/Prediction/main/result/DifferenceFeature.txt' review = word_by_word_review(filepath, 1, 11) data = tp.seg_fil_excel(filepath, 1, 11) # print len_data # lm = NgramModel(1, review, estimator=None) # ep = entropy_perplexity(lm,data[1:990]) # print ep # Store features # def store_name_brand_attribute_features(review_data, filepath, sheetnum, colnum, storepath): # # Building an ngram language model of a certain product category review # lm = NgramModel(1, review_data, estimator=None) # Need initiallized # # # Read full review dataset # data = tp.seg_fil_excel(filepath, sheetnum, colnum) # # ep = entropy_perplexity(lm, data) #