def perform_vader_classification(review_id, review): # Replace with new input # new_review = "You When I booked with your company on line you showed me pictures of a room I thought I was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly Which was completely false advertising After being there we realised that you have grouped lots of rooms on the photos together leaving me the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday present Please make your website more clear through pricing and photos as again I didn t really know what I was paying for and how much it had wnded up being Your photos told me I was getting something I wasn t Not happy and won t be using you again " sent_list = [] splitter = NNSplit("en") sent = splitter.split([review]) for i in sent[0]: new_string = '' for j in i: new_string += j.text + " " sent_list.append(new_string) sent_list_lower = [sent.lower() for sent in sent_list] data = pd.DataFrame(sent_list_lower, columns=["sentence"]) data['review_id'] = review_id data['sen_lvl_polarity'] = data['sentence'].apply(get_polarity) data['sen_lvl_sentiment'] = data['sen_lvl_polarity'].apply(det_sentiment) length = (data['sentence'].apply(number_words) >= 8) data = data.loc[length] data = data.reindex(columns=['review_id','sentence', 'sen_lvl_polarity', 'sen_lvl_sentiment']) # review level polarity data['review_lvl_polarity'] = data['sen_lvl_polarity'].mean() data['review_lvl_sentiment'] = data['review_lvl_polarity'].apply(det_sentiment) return data
def SplitingText(text): """ split text into Sentences Parameters ---------- text : string take text as input. Returns ------- normalized : list list contains Sentences. """ splitter = NNSplit("en") text = text.replace("but", " ") text = [text] split_text = splitter.split(text) normalized_split_text = normalize(split_text) return normalized_split_text
# Moses: Used in cc_net https://github.com/luismsgomes/mosestokenizer # ============================================================================= if False: from mosestokenizer import * splitsents = MosesSentenceSplitter('de') splitsents([data]) # ============================================================================= # https://github.com/bminixhofer/nnsplit # ============================================================================= if False: from nnsplit import NNSplit splitter = NNSplit("de") res = splitter.split([data]) # ============================================================================= # More advanced: Deepsegment: Does not support German # ============================================================================= if False: from deepsegment import DeepSegment # The default language is 'en' segmenter = DeepSegment('de') with open('data/start.txt', 'r') as myfile: data = myfile.read() segmenter.segment('I am Batman i live in gotham') # =============================================================================
score = analyser.polarity_scores(sentence) compound = score['compound'] return compound def number_words(sentence): return len(re.findall(r'\w+', str(sentence))) # Replace with new input new_review = "You When I booked with your company on line you showed me pictures of a room I thought I was getting and paying for and then when we arrived that s room was booked and the staff told me we could only book the villa suite theough them directly Which was completely false advertising After being there we realised that you have grouped lots of rooms on the photos together leaving me the consumer confused and extreamly disgruntled especially as its my my wife s 40th birthday present Please make your website more clear through pricing and photos as again I didn t really know what I was paying for and how much it had wnded up being Your photos told me I was getting something I wasn t Not happy and won t be using you again " sent_list = [] splitter = NNSplit("en") sent = splitter.split([new_review]) for i in sent[0]: new_string = '' for j in i: new_string += j.text + " " sent_list.append(new_string) sent_list_lower = [sent.lower() for sent in sent_list] # stop_list = stopwords.words('english') # sent_list_lower_no_stopword_list = [[word for word in sent.split() if not word in stop_list] for sent in sent_list_lower] # sent_list_lower_no_stopword = [] # for sent in sent_list_lower_no_stopword_list: # new_sent = ' '.join(sent) # sent_list_lower_no_stopword.append(new_sent)