def evaluate_without_negations(feature_select):
    replacer = AntonymReplacer()

    posSentences = open('\\resources\\original files\\pos.txt', 'r')
    negSentences = open('\\resources\\original files\\neg.txt', 'r')

    posSentences = re.split(r'\n', posSentences.read().translate(None, string.punctuation))
    negSentences = re.split(r'\n', negSentences.read().translate(None, string.punctuation))

    posFeatures = []
    negFeatures = []

    for i in posSentences:
        posWords = re.findall(r"[\w']+|[.,!?;]", i)
        posWords = replacer.replace_negations(posWords)
        posWords = [feature_select(posWords), 'pos']
        posFeatures.append(posWords)
    for i in negSentences:
        negWords = re.findall(r"[\w']+|[.,!?;]", i)
        negWords = replacer.replace_negations(negWords)
        negWords = [feature_select(negWords), 'neg']
        negFeatures.append(negWords)

    posCutoff = int(math.floor(len(posFeatures) * 9 / 10))
    negCutoff = int(math.floor(len(negFeatures) * 9 / 10))
    trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff]
    testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:]

    # print trainFeatures

    classifier = NaiveBayesClassifier.train(trainFeatures)

    referenceSets = defaultdict(set)
    testSets = defaultdict(set)

    for i, (features, label) in enumerate(testFeatures):
        referenceSets[label].add(i)
        predicted = classifier.classify(features)
        testSets[predicted].add(i)

    # print testFeatures


    accuracy = nltk.classify.util.accuracy(classifier, testFeatures)
    print 'train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures))
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures)
    print 'pos precision:', nltk.metrics.precision(referenceSets['pos'], testSets['pos'])
    print 'pos recall:', nltk.metrics.recall(referenceSets['pos'], testSets['pos'])
    print 'neg precision:', nltk.metrics.precision(referenceSets['neg'], testSets['neg'])
    print 'neg recall:', nltk.metrics.recall(referenceSets['neg'], testSets['neg'])
    classifier.show_most_informative_features(10)

    return accuracy
Exemplo n.º 2
0
def negations(text):
    replacer = AntonymReplacer()

    sent = text.split()
    noneg = replacer.replace_negations(sent)
    separator = ' '
    out = separator.join(noneg)

    return out
Exemplo n.º 3
0
def antonym_dealer(document):
	from replacers import AntonymReplacer
	replacer = AntonymReplacer()
	return replacer.replace_negations(document)
Exemplo n.º 4
0
df=pd.read_csv(csv_file)
saved_column=df['text']
list1=list(saved_column)
#print (list1)

replacer=AntonymReplacer()
rep1=RepeatReplacer()
rep2=RegexpReplacer()

for i in range(0,len(list1)):
    list1[i]=re.sub(r'[^\x00-\x7F]',r' ',list1[i]) #Replacing non-ascii characters with a space
    list1[i]=rep2.replace(list1[i])                 #texts like can't are converted into can not
    list1[i]=list1[i].split()                       #Splitting each sentence into words
    #list1[i]=[w for w in list1[i] if (len(w)>2)]    #String length of a word is more than 2
    list1[i]=replacer.replace_negations(list1[i])   #Replaces the negative words with antonyms

emo={}
f=open('emotions.txt','r')
for line in f:
    line=line.split(',')
    emo[line[0]]=line[1].rstrip()
#print(emo)
abb={}
f=open('abb.txt','r')
for line in f:
    line=line.split(',')
    abb[line[0].lower()]=(line[1].rstrip()).lower()
#print(abb)
#for i in ran
Exemplo n.º 5
0
from nltk.tokenize import word_tokenize
from replacers import AntonymReplacer

replacer = AntonymReplacer()

sent = "let's not uglify our code"
print(replacer.replace_negations(word_tokenize(sent)))

print(replacer.replace_negations(word_tokenize("it is not small")))
print(replacer.replace_negations(word_tokenize("it is not high")))
print(replacer.replace_negations(word_tokenize("it is not fine")))
Exemplo n.º 6
0
def negations(text):

    sent = text.split()
    noneg = replacer.replace_negations(sent)
    separator = ' '
    out = separator.join(noneg)

    return out


replacer = AntonymReplacer()
#replacer.replace('good')
#replacer.replace('uglify')

sent = ['good', 'do', 'not', 'go']
aaa = replacer.replace_negations(sent)

L = pd.read_csv("hip_hop_nocontracted_v4_lowercase.csv", index_col=0)

separator = ' '
bbb = separator.join(aaa)

L3 = L

L3['Lyrics'] = L3['Lyrics'].apply(negations)


def negations(text):

    sent = text.split()
    noneg = replacer.replace_negations(sent)