__author__ = 'daksh' import time from dishingOut.Database.database import MongoOperator as mongo from dishingOut.NPChunking.NPChunker import NPChunker import csv import json chunker = NPChunker() chunker.train() ''' Get all test adjectives : Unfiltered. Around 1 lakh adjectives. Contains a lot of non adjectives''' # database = mongo('DishingOut') # database.setUpConnection() # database.setUpCollection('reviews') # restaurants = database.getAll() restaurants = None with open('../data/restaurants.json') as data_file: restaurants = json.load(data_file) restaurants = restaurants[2000:] start = time.time() # for restaurant in restaurants: # for review in restaurant['userReviews']: # text = review['reviewText'] # sentences = chunker.split(text) # for sent in sentences: # tree,terms = chunker.extractChunk(sent) # print(tree) vocabulary = list()
import nltk import csv import json from dishingOut.NPChunking.NPChunker import NPChunker nltk.data.path.append("/home/daksh/Documents/Softwares/nltk_data") from nltk.corpus import sentiwordnet as wn from textblob import Word pos_words = list() neg_words = list() chunker = NPChunker() chunker.train() with open('../data/posAdj.csv', 'r') as csvfile: spamwriter = csv.reader(csvfile) pos_words = list(spamwriter) with open('../data/negAdj.csv', 'r') as csvfile: spamwriter = csv.reader(csvfile) neg_words = list(spamwriter) pos_words = [word[0] for word in pos_words] neg_words = [word[0] for word in neg_words] posSeed_list = {key:True for key in pos_words}
__author__ = 'daksh' from dishingOut.NPChunking.NPChunker import NPChunker chunker = NPChunker() chunker.train() # tree,terms = chunker.extractChunk("Dosa was good") # text = 'An institution in south indian offerings with Benne Masala Dosa at its best along with rava upma,kesari bath and mangalore bhajji with same level of excellence.Masala Dosa- You cant eat just one,will crave for more for sure.It tastes heavenly with the chutney CTR offers,its own kind.' # text = 'Biryani was aromatic and superb' # text = "Brilliant Idli Vada but Kara Bhath is not good!" # text = "Amritsari fish were my favorites of the day" # text = "Rasgulla and jamun were good" # text = "Idli Vada and Kara Bhath are good!" # text= "We had a crispy dosa and the coffee was not hot but the paneer was pathetic" # text = "crispy dosa and coffee not hot" # text = "Double masala chicken biryani served here is the best biryani I have tried till date" # text = "Other starters that I have ordered like lemon chicken chicken kebab paneer Manchurian are okayish but the taste of biryani covers it all for me" # text = "paneer was tasty" # text = "I did not expect palak paneer to be so good but to my wonder it was excellent." text = "Paneer tikka - Soft and tasty." tree, terms = chunker.extractChunk(text) tree.draw() # tree.draw() print(terms) # sentences = chunker.getSentences(text) # print(type(sentences[0])) #for sent in sentences: # print(chunker.getAdjetives(sent))
class AntonymReplacer(object): def __init__(self): self.chunker = NPChunker() self.chunker.train() def replace(self, adj, pos=None): '''antonyms = set() for syn in wordnet.synsets(adj, pos=pos): for lemma in syn.lemmas(): for antonym in lemma.antonyms(): antonyms.add(antonym.name()) if len(antonyms) >= 1: return antonyms.pop() else: return None''' antonyms = [] synonyms = [] # word = Word(adj) # for syn in word.synsets[:]: for syn in list(wn.senti_synsets(adj)): # for l in syn.lemmas(): for l in syn.synset.lemmas(): synonyms.append(l.name()) if l.antonyms(): antonyms.append(l.antonyms()[0].name()) if len(antonyms) > 0: return antonyms[0] else: return None def replace_negations(self, sent): i, l = 0, len(sent) words = [] flag = False while i < l: word = sent[i] if word == 'not' and i + 1 < l: ant = self.replace(sent[i + 1]) if ant: words.append(ant) i += 2 flag = True continue words.append(word) i += 1 return flag, words def checkNegationWords(self, words): if 'not' in words: return True elif 'nothing' in words: return True else: return False def NegationCheck(self, sentence): words = self.chunker.getWords(sentence) flag, new_sent = self.replace_negations(words) if flag: return False, new_sent else: check = self.checkNegationWords(words) if check: return check, words else: return False, words # a = AntonymReplacer() # print(a.NegationCheck('The dosa was good'))
def __init__(self): self.chunker = NPChunker() self.chunker.train()
class AntonymReplacer(object): def __init__(self): self.chunker = NPChunker() self.chunker.train() def replace(self, adj, pos=None): '''antonyms = set() for syn in wordnet.synsets(adj, pos=pos): for lemma in syn.lemmas(): for antonym in lemma.antonyms(): antonyms.add(antonym.name()) if len(antonyms) >= 1: return antonyms.pop() else: return None''' antonyms=[] synonyms = [] # word = Word(adj) # for syn in word.synsets[:]: for syn in list(wn.senti_synsets(adj)): # for l in syn.lemmas(): for l in syn.synset.lemmas(): synonyms.append(l.name()) if l.antonyms(): antonyms.append(l.antonyms()[0].name()) if len(antonyms) > 0: return antonyms[0] else: return None def replace_negations(self, sent): i, l = 0, len(sent) words = [] flag = False while i < l: word = sent[i] if word == 'not' and i+1 < l: ant = self.replace(sent[i+1]) if ant: words.append(ant) i += 2 flag = True continue words.append(word) i += 1 return flag,words def checkNegationWords(self,words): if 'not' in words: return True elif 'nothing' in words: return True else: return False def NegationCheck(self,sentence): words = self.chunker.getWords(sentence) flag,new_sent = self.replace_negations(words) if flag: return False,new_sent else: check = self.checkNegationWords(words) if check: return check,words else: return False,words # a = AntonymReplacer() # print(a.NegationCheck('The dosa was good'))
__author__ = "Daksh" import nltk import csv import json from dishingOut.NPChunking.NPChunker import NPChunker nltk.data.path.append("/home/daksh/Documents/Softwares/nltk_data") from nltk.corpus import sentiwordnet as wn from textblob import Word pos_words = list() neg_words = list() chunker = NPChunker() chunker.train() with open('../data/posAdj.csv', 'r') as csvfile: spamwriter = csv.reader(csvfile) pos_words = list(spamwriter) with open('../data/negAdj.csv', 'r') as csvfile: spamwriter = csv.reader(csvfile) neg_words = list(spamwriter) pos_words = [word[0] for word in pos_words] neg_words = [word[0] for word in neg_words] posSeed_list = {key: True for key in pos_words} negSeed_list = {key: False for key in neg_words}
import json from dishingOut.NPChunking.NPChunker import NPChunker from dishingOut.SentimentAnalysis.antonymReplacer import AntonymReplacer import pickle import time with open('../data/final_data.json','r') as f: data = json.load(f) chunker = NPChunker() chunker.train() replacer = AntonymReplacer() f = open('../SentimentAnalysis/nb_classifier.pickle', 'rb') classifier = pickle.load(f) f.close() print("Done pre-processing") def word_feats(words): return dict([word,True] for word in words) start = time.time() restaurants = data for restaurant in restaurants: dishes = restaurant['dishes'] restaurant['dishRatings'] = dict() print(dishes.keys()) allChunks = dict() for dish in dishes.keys(): # print(dish)
__author__ = 'daksh' from dishingOut.NPChunking.NPChunker import NPChunker chunker = NPChunker() chunker.train() # tree,terms = chunker.extractChunk("Dosa was good") # text = 'An institution in south indian offerings with Benne Masala Dosa at its best along with rava upma,kesari bath and mangalore bhajji with same level of excellence.Masala Dosa- You cant eat just one,will crave for more for sure.It tastes heavenly with the chutney CTR offers,its own kind.' # text = 'Biryani was aromatic and superb' # text = "Brilliant Idli Vada but Kara Bhath is not good!" # text = "Amritsari fish were my favorites of the day" # text = "Rasgulla and jamun were good" # text = "Idli Vada and Kara Bhath are good!" # text= "We had a crispy dosa and the coffee was not hot but the paneer was pathetic" # text = "crispy dosa and coffee not hot" # text = "Double masala chicken biryani served here is the best biryani I have tried till date" # text = "Other starters that I have ordered like lemon chicken chicken kebab paneer Manchurian are okayish but the taste of biryani covers it all for me" # text = "paneer was tasty" # text = "I did not expect palak paneer to be so good but to my wonder it was excellent." text = "Paneer tikka - Soft and tasty." tree,terms = chunker.extractChunk(text) tree.draw() # tree.draw() print(terms) # sentences = chunker.getSentences(text) # print(type(sentences[0])) #for sent in sentences: # print(chunker.getAdjetives(sent))