Пример #1
0
def preprocess(utterance):

    stop_words, missing, quantifier, replacements, start, wrongNE = loadConfig(
        'Retrieve')
    sent = choppunc(utterance)
    b = sent.split()
    b[0] = b[0].lower().capitalize()
    c = nltk.pos_tag(b)
    d = nltk.ne_chunk(c, binary=True)
    m = {}
    for val in d:
        if str(type(val)) == "<class 'nltk.tree.Tree'>":
            for v in val:
                m[v[0]] = val.label()
        else:
            m[val[0]] = "NNE"
    for u, v in c:
        if (v == 'NNP' and m[u] == 'NE' and u not in wrongNE) or (
                v == 'CD' and u not in quantifier) or u == ',' or u == 'U.S':
            stop_words.append(u.lower())

    x = sent.lower().split()

    elem = []
    for i in range(len(x)):
        w = x[i]
        if (w not in stop_words):
            elem.append(w)

    if len(elem) >= 5 and missing[0] in elem:
        elem = ' '.join(elem).replace(' .', '.')
        elem = elem.replace(' ' + missing[0], '')
    else:
        elem = ' '.join(elem).replace(' .', '.')

    for v in start:
        if elem.startswith(v):
            elem = elem.replace(v + ' ', '')

    for v in replacements:
        rep = v.split(',')
        if rep[0] in elem:
            elem = elem.replace(rep[0], rep[1])

    return elem
Пример #2
0
def ifTwoNegation(utterance):
	exception_vadarneg_words, missing_vadarneg_words= loadConfig('ROV')
	utterance = utterance.replace(',','')
	sid = SentimentIntensityAnalyzer()
	arr = []
	sent = word_tokenize(utterance)
	for i in range(len(sent)):
		w = sent[i]
		if w == 'no':
			continue
		ss = sid.polarity_scores(w)
		if (ss['neg']==1.0 or w in missing_vadarneg_words) and (w not in exception_vadarneg_words):
			arr.append((w,i,abs(ss['compound'])))
	if len(arr)==2:
		if abs(arr[0][1]-arr[1][1])==2:
			return [arr[0][0],arr[1][0]],True
		else:
			return [arr[1][0]],True
	else:
		return [],False
Пример #3
0
def isThereOnlyOneNegation(utterance):
	exception_vadarneg_words, missing_vadarneg_words= loadConfig('ROV')
	sid = SentimentIntensityAnalyzer()
	count = 0
	word = ''
	arr = []
	for w in word_tokenize(utterance):
		if w=='no':
			continue
		ss = sid.polarity_scores(w)
		if ss['neg']==1.0 and w not in exception_vadarneg_words:
			count = count+1
			if count<=1:
				word = w
			arr.append(word)
		elif w in missing_vadarneg_words and count==0:
			count = count+1
			if count<=1:
				word = w
	if count==1:
		return word,True
	return 'cant_change',False
Пример #4
0
import torch
import numpy
from sentence_retriever import getSentences
from grammar import correct_grammar
from loadconfig import loadConfig
import random
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

correct_phrase = loadConfig('Rank')


def getRoberta():
    roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
    roberta.cuda()
    roberta.eval()
    return roberta


def getContradictionScores(roberta, sentences, rov):
    scores = []
    gender = ''
    for sent in sentences:
        sent, gender = correct_grammar(rov, sent, gender)
        tokens = roberta.encode(rov, sent)
        value = roberta.predict('mnli', tokens).cpu().detach().numpy()
        value = round(value[0].tolist()[0], 3)
        scores.append((value, sent.capitalize()))

    return scores
Пример #5
0
import os
from urllib.parse import quote
import nltk
from loadconfig import loadConfig

words = []
nonoverlap_words = loadConfig('Sentences')


def islenPermissible(rov, retrieved):
    c = 0
    for w in nonoverlap_words:
        if w in rov:
            c = c + 1
        if w in retrieved:
            c = c + 1
    tokens = nltk.word_tokenize(rov)
    tokens1 = nltk.word_tokenize(retrieved)
    if len(tokens1) > 2 * len(tokens) or len(tokens1) <= 3 or c >= 2:
        return False

    return True


def getAllConcepts():
    for line in open('./data/concept.txt'):
        words.append(line.strip())
    return words


def updateConcept(concept):
Пример #6
0
from loadconfig import loadConfig
import os
import re
import random

__software__ = "FormatText"
__author__ = "MENG Yidong"
__version__ = "2.2"


__params__, __replacements__, __bullshits__ = loadConfig()

def control_illegal_characters(stringToControl):
    # illegal characters 非法字符
    __illegal_chars__ = __params__["illegal characters"]

    for c in __illegal_chars__:
        if c in stringToControl:
            print("find '" + c + "'")
            stringToControl = stringToControl.replace(c, " ")
            print("replaced by space")
            print("*"*10)
    return stringToControl

def control_illegal_combinations(stringToControl):
    for key in __replacements__.keys():
        if key in stringToControl:
            # print("+"*10, stringToControl, key)
            stringToControl = stringToControl.replace(key, __replacements__[key])
            # print(stringToControl, "-"*10)
    return stringToControl