def main(): """ The true main. """ reviews = dp.dataframecomplete('Apple-iPhone-Space-Grey-32GB.json', 'apple-iphone-6-space-grey-32-gb.json') score = {} score[business] = aspect_opinions(reviews.encode('utf-8', 'ignore')) print score
def get_aspects(Amazon, Flipkart, input_text): if input_text not in STOPWORDS: STOPWORDS.add(input_text) skip_words = input_text.split(' ') for word in skip_words: if word not in STOPWORDS: STOPWORDS.add(word) #Using preprocessed text for categorization reviews = dp.dataframecomplete(Amazon, Flipkart) #initnalizing the aspects dictionary aspects_dict = {} #if reviews are taken after preprocessing use below line for looping for review in reviews['review']: #converting all reviews to lowercase review = review.lower() res = ' ' for ch in review: if not re.match('[a-zA-Z0-9_\' ]', ch): res = res + ' ' + ch + ' ' else: res = res + ch review = res[1:] print "\n\n\nreview: " + review word_tokens = word_tokenize(review) #pos tagging pos_tuples = nltk.pos_tag(word_tokens) split_sets = [] split_list = ' ' final_sets = [] split_words = [',', '.', ';', '!', '?', '+'] for word, pos in pos_tuples: if pos == 'CC': split_words.append(word) #print "word tokens:" #print word_tokens #print "pos tuples:" #print pos_tuples for word in word_tokens: if word in split_words: split_sets.append(split_list[2:]) split_list = ' ' if word not in split_words: split_list += ' ' + word split_sets.append(split_list[2:]) for i in range(0, len(split_sets)): if len(split_sets[i]) != 0: final_sets.append(split_sets[i]) split_words = [',', '.', ';'] print "sets:" print final_sets for sentence in final_sets: tokenized_data = tokenize(sentence.encode('utf-8', 'ignore')) #print "tokenized_data:" #print tokenized_data pos_tagged_data = pos_tag(tokenized_data) #print "pos_tagged_data:" #print pos_tagged_data final_aspects = {} aspects_data = aspects_from_tagged_sents(pos_tagged_data) print "aspects and entities :" print aspects_data for asp in aspects_data: if asp in aspects_dict: for ent in aspects_data[asp]: if ent in aspects_dict[asp]: aspects_dict[asp][ent] += 1 else: aspects_dict[asp][ent] = 1 else: aspects_dict[asp] = aspects_data[asp] i += 1 pprint.pprint(aspects_dict) i = 0 print '\n\n\n\n\n\n\n\n\n\n' for key, value in sorted(aspects_dict.iteritems(), key=lambda (k, v): (v, k), reverse=True): if i < 15: pprint.pprint(key) #for ke in aspects_dict[key].keys(): #wrd = key + ' ' + ke #txt = TextBlob(wrd) #print "text :" + wrd + "\t\t\tsentiment: " + str(txt.sentiment.polarity) i = i + 1
def read_data(): """ INPUT: None OUTPUT: pandas data frame from file """ return pd.read_csv('high_review_restaurants.csv') def main(): """ The true main. """ reviews = dp.dataframecomplete('Apple-iPhone-Space-Grey-32GB.json', 'apple-iphone-6-space-grey-32-gb.json') score = {} score[business] = aspect_opinions(reviews.encode('utf-8', 'ignore')) print score if __name__ == "__main__": reviews = dp.dataframecomplete('Apple-iPhone-Space-Grey-32GB.json', 'apple-iphone-6-space-grey-32-gb.json') score = {} score[business] = aspect_opinions(reviews) print score