Python get_excel_data примеры, textprocessing.get_excel_data Python примеры использования

Пример #1

0

Показать файл

def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(
            filepath, sheetnum, colnum,
            'data')[0:tp.get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list'))  # Seg every reivew

    # Read txt file contain stopwords
    """
    stopwords = tp.get_txt_data('D:/code/stopword.txt', 'lines')
    """
    stopwords = tp.get_txt_data(
        'E:/GraduationProject/pythoncode/project/Prediction/main/PreprocessingModule/stopword.txt',
        'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [
            word for word in review if word not in stopwords and word != ' '
        ]
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review

Пример #2

0

Показать файл

def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(
            filepath, sheetnum, colnum,
            'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list'))  # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data(
        '/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt',
        'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [
            word for word in review if word not in stopwords and word != ' '
        ]
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review

Пример #3

0

Показать файл

Файл: WordSentenceLengthFeature.py Проект: shawshany/RESEARCH-ON-PRICE-FORECASTING-ALGORITHM-WITH-INTERNET-BIG-DATA

def store_word_sent_num_features(filepath, sheetnum, colnum, data, storepath):
    data = tp.get_excel_data(filepath, sheetnum, colnum, 'data')

    word_sent_num = word_sent_count(data)  #需要初始化
    print word_sent_num
    f = open(storepath, 'w')
    for i in word_sent_num:
        f.write(str(i[0]) + ' ' + str(i[1]) + ' ' + str(i[2]) + '\n')
    f.close()

Пример #4

0

Показать файл

Файл: adj_adv_v_feature.py Проект: shawshany/RESEARCH-ON-PRICE-FORECASTING-ALGORITHM-WITH-INTERNET-BIG-DATA

def store_adj_adv_v_num_feature(filepath, sheetnum, colnum, data, storepath):
    data = tp.get_excel_data(filepath, sheetnum, colnum, 'data')

    adj_adv_num = count_adj_adv(data)

    f = open(storepath, 'w')
    for i in adj_adv_num:
        f.write(str(i[0]) + ' ' + str(i[1]) + ' ' + str(i[2]) + '\n')
    f.close()

Пример #5

0

Показать файл

Файл: entropy_perplexity_feature.py Проект: vsooda/Review-Helpfulness-Prediction

def word_by_word_review(filepath, sheetnum, colnum):
    # Read product review data from excel file and segment every review
    review_data = []
    for cell in tp.get_excel_data(filepath, sheetnum, colnum, 'data')[0:get_excel_data(filepath, sheetnum, colnum, 'rownum')]:
        review_data.append(tp.segmentation(cell, 'list')) # Seg every reivew

    # Read txt file contain stopwords
    stopwords = tp.get_txt_data('/home/sooda/nlp/Review-Helpfulness-Prediction/data/stopword.txt', 'lines')

    # Filter stopwords from reviews
    seg_fil_result = []
    for review in review_data:
        fil = [word for word in review if word not in stopwords and word != ' ']
        seg_fil_result.append(fil)
        fil = []

    # Return review set as onedimentional list
    review = list(itertools.chain(*seg_fil_result))
    return review

Пример #6

0

Показать файл

import sklearn
from nltk.classify.scikitlearn import SklearnClassifier

##my classifier path
filefeature = 'E:/GraduationProject/pythoncode/project/Prediction/main/result/feature_word_ngram.txt'
filename = 'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/MachineLearningFeatures/senti_class_word_ngram.pkl'

# 1. Load data
"""
review = tp.get_excel_data("D:/code/sentiment_test/review_set.xlsx", "1", "1", "data")
sentiment_review = tp.seg_fil_senti_excel("D:/code/sentiment_test/review_set.xlsx", "1", "1")
"""

review = tp.get_excel_data(
    "E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Samsung.xlsx",
    1, 12, "data")

sentiment_review = tp.seg_fil_senti_excel(
    "E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Samsung.xlsx",
    1, 12)


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel(
        "E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/MachineLearningFeatures/SenimentReviewSet/pos_review.xlsx",
        1, 1)
    negdata = tp.seg_fil_senti_excel(
        "E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/MachineLearningFeatures/SenimentReviewSet/neg_review.xlsx",

Пример #7

0

Показать файл

Файл: pos neg ml feature.py Проект: EricChanBD/Review-Helpfulness-Prediction

import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("D:/code/sentiment_test/review_set.xlsx", "1", "1")
sentiment_review = tp.seg_fil_senti_excel("D:/code/sentiment_test/review_set.xlsx", "1", "1")


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("D:/code/sentiment_test/pos_review.xlsx", "1", "1")
    negdata = tp.seg_fil_senti_excel("D:/code/sentiment_test/neg_review.xlsx", "1", "1")
    
    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)
    bigram_finder = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 5000)

Пример #8

0

Показать файл

Файл: pos neg ml feature.py Проект: wac81/Sentiment-Classifier-base-on-Machine-learning

sys.path.append("./Preprocessing module/")
import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("../../../Review set/review_set.xlsx", 1, 7, "data")
sentiment_review = tp.seg_fil_senti_excel("../../../Review set/review_set.xlsx", 1, 7)


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("pos_review.xlsx", 1, 1)
    negdata = tp.seg_fil_senti_excel("neg_review.xlsx", 1, 1)
    
    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)
    bigram_finder = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 5000)

Пример #9

0

Показать файл

Файл: test.py Проект: timeahead/Chinese-Sentiment

__author__ = 'anchengwu'
#coding=utf-8

import sys
sys.path.append("../../../Preprocessing module")

import pos_neg_senti_dict_feature as pn
import textprocessing as tp

# Load dataset
review = tp.get_excel_data(
    "../Machine learning features/seniment review set/pos_review.xlsx", 1, 1,
    "data")

#test single dataset
print pn.single_review_sentiment_score(
    '买过散装的粽子才来买礼盒的，礼盒很大气，比超市买的100多的还要好，配置也不错，肉的素的都有，刚煮了个蛋黄粽子很不错，米好蛋黄也黄很香，老板态度很好，还想买一份～'
    .decode('utf8'))

#test all dataset
for i in pn.all_review_sentiment_score(pn.sentence_sentiment_score(review)):
    print i

Пример #10

0

Показать файл

#! /usr/bin/env python2.7
#coding=utf-8
import textprocessing as tp

review = tp.get_excel_data("../data/review_set.xlsx", 1, 1, "data")
review_txt = open('reivew.txt', 'wb+')
for r in review:
    print r
    review_txt.write(r)
    review_txt.write('\n')

review_txt.close()

Пример #11

0

Показать файл

Файл: pos_neg_ml_feature.py Проект: wac81/Sentiment-Classifier-base-on-Machine-learning

sys.path.append("./Preprocessing module/")
import textprocessing as tp
import cPickle as pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("review_set.xlsx", 1, 7, "data")
sentiment_review = tp.seg_fil_senti_excel("review_set.xlsx", 1, 7)


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("pos_review.xlsx", 1, 1)
    negdata = tp.seg_fil_senti_excel("neg_review.xlsx", 1, 1)
    
    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)
    bigram_finder = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 5000)

Пример #12

0

Показать файл

Файл: pos neg ml feature.py Проект: lihui19891118/Sentimental-analysis

import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("/home/hadoop/coding/Review set/HTC Z710t_review_2013.6.5.xlsx",1,12, "data")
sentiment_review = tp.seg_fil_senti_excel("/home/hadoop/coding/Review set/Meizu MX_review_2013.6.7.xlsx", 1, 12)



# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("/home/hadoop/coding/Sentiment features/Machine learning features/seniment review set/pos_review.xlsx",1,1)
    negdata = tp.seg_fil_senti_excel("/home/hadoop/coding/Sentiment features/Machine learning features/seniment review set/neg_review.xlsx", 1, 1)
    
    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)#把文本变成双词搭配的形式
    bigram_finder = BigramCollocationFinder.from_words(negWords)

Пример #13

0

Показать файл

"""

import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn

# 1. Load data
review = tp.get_excel_data("D:/code/sentiment_test/review_set.xlsx", "1", "1",
                           "data")
sentiment_review = tp.seg_fil_senti_excel(
    "D:/code/sentiment_test/review_set.xlsx", "1", "1")


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("D:/code/sentiment_test/pos_review.xlsx",
                                     "1", "1")
    negdata = tp.seg_fil_senti_excel("D:/code/sentiment_test/neg_review.xlsx",
                                     "1", "1")

    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

Пример #14

0

Показать файл

Файл: test.py Проект: lanxinxichen/Chinese-Sentiment

__author__ = 'anchengwu'
#coding=utf-8

import sys
sys.path.append("../../../Preprocessing module")

import pos_neg_senti_dict_feature as pn
import textprocessing as tp

# Load dataset
review = tp.get_excel_data("../Machine learning features/seniment review set/pos_review.xlsx", 1, 1, "data")

#test single dataset
print pn.single_review_sentiment_score('买过散装的粽子才来买礼盒的，礼盒很大气，比超市买的100多的还要好，配置也不错，肉的素的都有，刚煮了个蛋黄粽子很不错，米好蛋黄也黄很香，老板态度很好，还想买一份～'.decode('utf8'))

#test all dataset
for i in pn.all_review_sentiment_score(pn.sentence_sentiment_score(review)):
	print i

Пример #15

0

Показать файл

Файл: pos neg senti dict feature.py Проект: zhounan0014/Review-Helpfulness-Prediction

# Load sentiment dictionary
posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt", "lines")
negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt", "lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data(
    'D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt',
                              'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")


# 2. Sentiment dictionary analysis basic function
# Function of matching adverbs of degree and set weights
def match(word, sentiment_value):
    if word in mostdict:
        sentiment_value *= 2.0
    elif word in verydict:
        sentiment_value *= 1.5
    elif word in moredict:
        sentiment_value *= 1.25
    elif word in ishdict:
        sentiment_value *= 0.5
    elif word in insufficientdict:
        sentiment_value *= 0.25

Пример #16

0

Показать файл

Файл: posNegSentiDictFeature.py Проект: shawshany/RESEARCH-ON-PRICE-FORECASTING-ALGORITHM-WITH-INTERNET-BIG-DATA

    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/ish.txt',
    'lines')
insufficientdict = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/insufficiently.txt',
    'lines')
inversedict = tp.get_txt_data(
    'E:/GraduationProject/pythoncode/project/Prediction/main/FeatureExtractionModule/SentimentFeatures/SentimentDictionaryFeatures/SentimentDictionary/AdverbsOfDegreeDictionary/inverse.txt',
    'lines')

# Load dataset
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/HTC.xlsx", 1, 12, "data")
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/OPPO.xlsx", 1, 12, "data")
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/MeiZuMX.xlsx", 1, 12, "data")
#review = tp.get_excel_data("E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Samsung.xlsx", 1, 12, "data")
review = tp.get_excel_data(
    "E:/GraduationProject/pythoncode/project/Prediction/main/ReviewSet/Motorala.xlsx",
    1, 11, "data")

#获取excel中第一页的第一列的值
#print review[1]


# 2. Sentiment dictionary analysis basic function
# Function of matching adverbs of degree and set weights
def match(word, sentiment_value):
    if word in mostdict:
        sentiment_value *= 2.0
    elif word in verydict:
        sentiment_value *= 1.5
    elif word in moredict:
        sentiment_value *= 1.25

Пример #17

0

Показать файл

Файл: pos neg senti dict feature.py Проект: 0rchard/Review-Helpfulness-Prediction

# 1. Load dictionary and dataset
# Load sentiment dictionary
posdict = tp.get_txt_data("D:/code/sentiment_dictionary/posdict.txt","lines")
negdict = tp.get_txt_data("D:/code/sentiment_dictionary/negdict.txt","lines")

# Load adverbs of degree dictionary
mostdict = tp.get_txt_data('D:/code/sentiment_dictionary/most.txt', 'lines')
verydict = tp.get_txt_data('D:/code/sentiment_dictionary/very.txt', 'lines')
moredict = tp.get_txt_data('D:/code/sentiment_dictionary/more.txt', 'lines')
ishdict = tp.get_txt_data('D:/code/sentiment_dictionary/ish.txt', 'lines')
insufficientdict = tp.get_txt_data('D:/code/sentiment_dictionary/insufficiently.txt', 'lines')
inversedict = tp.get_txt_data('D:/code/sentiment_dictionary/inverse.txt', 'lines')

# Load dataset
review = tp.get_excel_data("D:/code/review_set.xlxs", "1", "1", "data")


# 2. Sentiment dictionary analysis basic function
# Function of matching adverbs of degree and set weights
def match(word, sentiment_value):
	if word in mostdict:
		sentiment_value *= 2.0
	elif word in verydict:
	    sentiment_value *= 1.5
	elif word in moredict:
	    sentiment_value *= 1.25
	elif word in ishdict:
	    sentiment_value *= 0.5
	elif word in insufficientdict:
	    sentiment_value *= 0.25

Пример #18

0

Показать файл

Файл: xlsx2txt.py Проект: xzm2004260/AByteOfNLP

#! /usr/bin/env python2.7
# coding=utf-8
import textprocessing as tp

review = tp.get_excel_data("../data/review_set.xlsx", 1, 1, "data")
review_txt = open("reivew.txt", "wb+")
for r in review:
    print r
    review_txt.write(r)
    review_txt.write("\n")

review_txt.close()

Пример #19

0

Показать файл

    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/more.txt',
    'lines')
ishdict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/ish.txt',
    'lines')
insufficientdict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/insufficiently.txt',
    'lines')
inversedict = tp.get_txt_data(
    '/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_dictionary/inverse.txt',
    'lines')

# Load dataset
#review = tp.get_excel_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/review_set.xlxs", "1", "1", "data")
review = tp.get_excel_data(
    "/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/review_set.xlsx",
    1, 1, "data")


# 2. Sentiment dictionary analysis basic function
# Function of matching adverbs of degree and set weights
def match(word, sentiment_value):
    if word in mostdict:
        sentiment_value *= 2.0
    elif word in verydict:
        sentiment_value *= 1.5
    elif word in moredict:
        sentiment_value *= 1.25
    elif word in ishdict:
        sentiment_value *= 0.5
    elif word in insufficientdict:

Пример #20

0

Показать файл

Файл: pos_neg_ml_feature.py Проект: vsooda/Review-Helpfulness-Prediction

import textprocessing as tp
import pickle
import itertools
from random import shuffle

import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist

import sklearn


# 1. Load data
review = tp.get_excel_data("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/review_set.xlsx", 1, 1, "data")
sentiment_review = tp.seg_fil_senti_excel("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/review_set.xlsx", 1, 1)


# 2. Feature extraction method
# Used for transform review to features, so it can calculate sentiment probability by classifier
def create_words_bigrams_scores():
    posdata = tp.seg_fil_senti_excel("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/pos_review.xlsx", 1, 1)
    negdata = tp.seg_fil_senti_excel("/home/sooda/nlp/Review-Helpfulness-Prediction/data/sentiment_test/neg_review.xlsx", 1, 1)

    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder = BigramCollocationFinder.from_words(posWords)
    bigram_finder = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 5000)

Python get_excel_data примеры использования