Пример #1
0
def negations(text):
    replacer = AntonymReplacer()

    sent = text.split()
    noneg = replacer.replace_negations(sent)
    separator = ' '
    out = separator.join(noneg)

    return out
def evaluate_without_negations(feature_select):
    replacer = AntonymReplacer()

    posSentences = open('\\resources\\original files\\pos.txt', 'r')
    negSentences = open('\\resources\\original files\\neg.txt', 'r')

    posSentences = re.split(r'\n', posSentences.read().translate(None, string.punctuation))
    negSentences = re.split(r'\n', negSentences.read().translate(None, string.punctuation))

    posFeatures = []
    negFeatures = []

    for i in posSentences:
        posWords = re.findall(r"[\w']+|[.,!?;]", i)
        posWords = replacer.replace_negations(posWords)
        posWords = [feature_select(posWords), 'pos']
        posFeatures.append(posWords)
    for i in negSentences:
        negWords = re.findall(r"[\w']+|[.,!?;]", i)
        negWords = replacer.replace_negations(negWords)
        negWords = [feature_select(negWords), 'neg']
        negFeatures.append(negWords)

    posCutoff = int(math.floor(len(posFeatures) * 9 / 10))
    negCutoff = int(math.floor(len(negFeatures) * 9 / 10))
    trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
    testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]

    # print trainFeatures

    classifier = NaiveBayesClassifier.train(trainFeatures)

    referenceSets = defaultdict(set)
    testSets = defaultdict(set)

    for i, (features, label) in enumerate(testFeatures):
        referenceSets[label].add(i)
        predicted = classifier.classify(features)
        testSets[predicted].add(i)

    # print testFeatures


    accuracy = nltk.classify.util.accuracy(classifier, testFeatures)
    print 'train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures))
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)
    print 'pos precision:', nltk.metrics.precision(referenceSets['pos'], testSets['pos'])
    print 'pos recall:', nltk.metrics.recall(referenceSets['pos'], testSets['pos'])
    print 'neg precision:', nltk.metrics.precision(referenceSets['neg'], testSets['neg'])
    print 'neg recall:', nltk.metrics.recall(referenceSets['neg'], testSets['neg'])
    classifier.show_most_informative_features(10)

    return accuracy
Пример #3
0
def antonym_dealer(document):
	from replacers import AntonymReplacer
	replacer = AntonymReplacer()
	return replacer.replace_negations(document)
Пример #4
0
from replacers import AntonymReplacer
from replacers import SpellingReplacer

# from pickle import dump
#
# output = open('t2.pkl', 'wb')
# dump(t2, output, -1)
# output.close()

test = "DO NOT GO THERE !!!\n\n1. I knew it was questionbale when i brought in oil i purchased for them to change out. He said they don't do this, because they like to purchase it. In other words, he needed to mark up the price for the same oil.\n\n2. He told me that our Shocks were blown out and said that we can't drive too far. Normally, when your shocks are blown out, your ride will be like a bouncing ball. I closely monitored my drive and i did not have a bumpy ride that indicated blown out shocks. I took it to two separate mechanics and they tested the car and said if the shocks were bad, the car would bounce up and down. \n\nBasically, the owner lied about the shocks to get me to pay to fix them. \n\n3. One of my light bulbs is going out. I looked up the model # to replace them and i went to autozone to purchase the ones for my car. The owner said that these are the wrong headlights and I needed a more expensive set. Now, mind you- the model's I had were based on Lexus' recommendation. \n\nHe then said that it would cost over $300 dollars to change out the bulbs. The bulbs he recommend was about $80 bucks, which means over 200 of labor. \n\nHe will over exaggerate everything to get you to pay more. \n\n\nBtw, I sent my wife in to see if he would try to run up maintenance. \n\nI would not recommend this place at all. He is not goood."
test = test.lower()

regex_replacer = RegexpReplacer()
repeat_replacer = RepeatReplacer()
spell_replacer = SpellingReplacer()
antonym_replacer = AntonymReplacer()

test = regex_replacer.replace(test)

# test = repeat_replacer.replace(test)
# tokens = antonym_replacer.replace_negations(sentence)
# tokens = repeat_replacer.replace(word)

# print(test)

sentences = nltk.sent_tokenize(test)
# # print(sentences)
stopwords = nltk.corpus.stopwords.words('english')
puncs = set(string.punctuation)
pattern = r'''(?x)          # set flag to allow verbose regexps
            (?:[A-Z]\.)+        # abbreviations, e.g. U.S.A.
Пример #5
0
from replacers import RepeatReplacer
from replacers import AntonymReplacer
from replacers import RegexpReplacer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

#reading the csv file and extracting the column of tweets into a list
csv_file=sys.argv[1]

df=pd.read_csv(csv_file)
saved_column=df['text']
list1=list(saved_column)
#print (list1)

replacer=AntonymReplacer()
rep1=RepeatReplacer()
rep2=RegexpReplacer()

for i in range(0,len(list1)):
    list1[i]=re.sub(r'[^\x00-\x7F]',r' ',list1[i]) #Replacing non-ascii characters with a space
    list1[i]=rep2.replace(list1[i])                 #texts like can't are converted into can not
    list1[i]=list1[i].split()                       #Splitting each sentence into words
    #list1[i]=[w for w in list1[i] if (len(w)>2)]    #String length of a word is more than 2
    list1[i]=replacer.replace_negations(list1[i])   #Replaces the negative words with antonyms

emo={}
f=open('emotions.txt','r')
for line in f:
    line=line.split(',')
    emo[line[0]]=line[1].rstrip()
Пример #6
0
from nltk.tokenize import word_tokenize
from replacers import AntonymReplacer

replacer = AntonymReplacer()

sent = "let's not uglify our code"
print(replacer.replace_negations(word_tokenize(sent)))

print(replacer.replace_negations(word_tokenize("it is not small")))
print(replacer.replace_negations(word_tokenize("it is not high")))
print(replacer.replace_negations(word_tokenize("it is not fine")))
Пример #7
0
nlp = spacy.load('en_core_web_sm')

from replacers import AntonymReplacer


def negations(text):

    sent = text.split()
    noneg = replacer.replace_negations(sent)
    separator = ' '
    out = separator.join(noneg)

    return out


replacer = AntonymReplacer()
#replacer.replace('good')
#replacer.replace('uglify')

sent = ['good', 'do', 'not', 'go']
aaa = replacer.replace_negations(sent)

L = pd.read_csv("hip_hop_nocontracted_v4_lowercase.csv", index_col=0)

separator = ' '
bbb = separator.join(aaa)

L3 = L

L3['Lyrics'] = L3['Lyrics'].apply(negations)
Пример #8
0
import nltk

#Importing lemmatizer 
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

#Importing replacers
from replacers import AntonymReplacer
replacer=AntonymReplacer()
from nltk.tokenize import RegexpTokenizer
tokenizer =RegexpTokenizer("[\w']+")

#Importing Chunkers
patterns = """ 
 NP: {<DT|PP\$>?<JJ>*<NN>} 
 {<NNP>+} 
 {<NN>+} 
""" 
#chunker=nltk.RegexpParser(patterns)
import chunkers
import pickle
#from nltk.corpus import treebank_chunk
#chunker=chunkers.TagChunker(treebank_chunk.chunked_sents())
f=open("chunker.dump",'r')
chunker=pickle.load(f) 
 
# training the chunker, ChunkParser is a class defined in the next slide 
#NPChunker = ChunkParser(train_sents) 
TxT="This method doesn't work well, because xxx."
from replacers import RegexReplacer
neg_replacer=RegexReplacer();