def negations(text): replacer = AntonymReplacer() sent = text.split() noneg = replacer.replace_negations(sent) separator = ' ' out = separator.join(noneg) return out
def evaluate_without_negations(feature_select): replacer = AntonymReplacer() posSentences = open('\\resources\\original files\\pos.txt', 'r') negSentences = open('\\resources\\original files\\neg.txt', 'r') posSentences = re.split(r'\n', posSentences.read().translate(None, string.punctuation)) negSentences = re.split(r'\n', negSentences.read().translate(None, string.punctuation)) posFeatures = [] negFeatures = [] for i in posSentences: posWords = re.findall(r"[\w']+|[.,!?;]", i) posWords = replacer.replace_negations(posWords) posWords = [feature_select(posWords), 'pos'] posFeatures.append(posWords) for i in negSentences: negWords = re.findall(r"[\w']+|[.,!?;]", i) negWords = replacer.replace_negations(negWords) negWords = [feature_select(negWords), 'neg'] negFeatures.append(negWords) posCutoff = int(math.floor(len(posFeatures) * 9 / 10)) negCutoff = int(math.floor(len(negFeatures) * 9 / 10)) trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff] testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:] # print trainFeatures classifier = NaiveBayesClassifier.train(trainFeatures) referenceSets = defaultdict(set) testSets = defaultdict(set) for i, (features, label) in enumerate(testFeatures): referenceSets[label].add(i) predicted = classifier.classify(features) testSets[predicted].add(i) # print testFeatures accuracy = nltk.classify.util.accuracy(classifier, testFeatures) print 'train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures)) print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures) print 'pos precision:', nltk.metrics.precision(referenceSets['pos'], testSets['pos']) print 'pos recall:', nltk.metrics.recall(referenceSets['pos'], testSets['pos']) print 'neg precision:', nltk.metrics.precision(referenceSets['neg'], testSets['neg']) print 'neg recall:', nltk.metrics.recall(referenceSets['neg'], testSets['neg']) classifier.show_most_informative_features(10) return accuracy
def antonym_dealer(document): from replacers import AntonymReplacer replacer = AntonymReplacer() return replacer.replace_negations(document)
from replacers import AntonymReplacer from replacers import SpellingReplacer # from pickle import dump # # output = open('t2.pkl', 'wb') # dump(t2, output, -1) # output.close() test = "DO NOT GO THERE !!!\n\n1. I knew it was questionbale when i brought in oil i purchased for them to change out. He said they don't do this, because they like to purchase it. In other words, he needed to mark up the price for the same oil.\n\n2. He told me that our Shocks were blown out and said that we can't drive too far. Normally, when your shocks are blown out, your ride will be like a bouncing ball. I closely monitored my drive and i did not have a bumpy ride that indicated blown out shocks. I took it to two separate mechanics and they tested the car and said if the shocks were bad, the car would bounce up and down. \n\nBasically, the owner lied about the shocks to get me to pay to fix them. \n\n3. One of my light bulbs is going out. I looked up the model # to replace them and i went to autozone to purchase the ones for my car. The owner said that these are the wrong headlights and I needed a more expensive set. Now, mind you- the model's I had were based on Lexus' recommendation. \n\nHe then said that it would cost over $300 dollars to change out the bulbs. The bulbs he recommend was about $80 bucks, which means over 200 of labor. \n\nHe will over exaggerate everything to get you to pay more. \n\n\nBtw, I sent my wife in to see if he would try to run up maintenance. \n\nI would not recommend this place at all. He is not goood." test = test.lower() regex_replacer = RegexpReplacer() repeat_replacer = RepeatReplacer() spell_replacer = SpellingReplacer() antonym_replacer = AntonymReplacer() test = regex_replacer.replace(test) # test = repeat_replacer.replace(test) # tokens = antonym_replacer.replace_negations(sentence) # tokens = repeat_replacer.replace(word) # print(test) sentences = nltk.sent_tokenize(test) # # print(sentences) stopwords = nltk.corpus.stopwords.words('english') puncs = set(string.punctuation) pattern = r'''(?x) # set flag to allow verbose regexps (?:[A-Z]\.)+ # abbreviations, e.g. U.S.A.
from replacers import RepeatReplacer from replacers import AntonymReplacer from replacers import RegexpReplacer from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer from nltk.tokenize import word_tokenize #reading the csv file and extracting the column of tweets into a list csv_file=sys.argv[1] df=pd.read_csv(csv_file) saved_column=df['text'] list1=list(saved_column) #print (list1) replacer=AntonymReplacer() rep1=RepeatReplacer() rep2=RegexpReplacer() for i in range(0,len(list1)): list1[i]=re.sub(r'[^\x00-\x7F]',r' ',list1[i]) #Replacing non-ascii characters with a space list1[i]=rep2.replace(list1[i]) #texts like can't are converted into can not list1[i]=list1[i].split() #Splitting each sentence into words #list1[i]=[w for w in list1[i] if (len(w)>2)] #String length of a word is more than 2 list1[i]=replacer.replace_negations(list1[i]) #Replaces the negative words with antonyms emo={} f=open('emotions.txt','r') for line in f: line=line.split(',') emo[line[0]]=line[1].rstrip()
from nltk.tokenize import word_tokenize from replacers import AntonymReplacer replacer = AntonymReplacer() sent = "let's not uglify our code" print(replacer.replace_negations(word_tokenize(sent))) print(replacer.replace_negations(word_tokenize("it is not small"))) print(replacer.replace_negations(word_tokenize("it is not high"))) print(replacer.replace_negations(word_tokenize("it is not fine")))
nlp = spacy.load('en_core_web_sm') from replacers import AntonymReplacer def negations(text): sent = text.split() noneg = replacer.replace_negations(sent) separator = ' ' out = separator.join(noneg) return out replacer = AntonymReplacer() #replacer.replace('good') #replacer.replace('uglify') sent = ['good', 'do', 'not', 'go'] aaa = replacer.replace_negations(sent) L = pd.read_csv("hip_hop_nocontracted_v4_lowercase.csv", index_col=0) separator = ' ' bbb = separator.join(aaa) L3 = L L3['Lyrics'] = L3['Lyrics'].apply(negations)
import nltk #Importing lemmatizer from nltk.stem import WordNetLemmatizer lemmatizer = WordNetLemmatizer() #Importing replacers from replacers import AntonymReplacer replacer=AntonymReplacer() from nltk.tokenize import RegexpTokenizer tokenizer =RegexpTokenizer("[\w']+") #Importing Chunkers patterns = """ NP: {<DT|PP\$>?<JJ>*<NN>} {<NNP>+} {<NN>+} """ #chunker=nltk.RegexpParser(patterns) import chunkers import pickle #from nltk.corpus import treebank_chunk #chunker=chunkers.TagChunker(treebank_chunk.chunked_sents()) f=open("chunker.dump",'r') chunker=pickle.load(f) # training the chunker, ChunkParser is a class defined in the next slide #NPChunker = ChunkParser(train_sents) TxT="This method doesn't work well, because xxx." from replacers import RegexReplacer neg_replacer=RegexReplacer();