def compare(self, first_statement, second_statement): statement = first_statement.lower() other_statement = second_statement.lower() similarity = SequenceMatcher(None, statement, other_statement) return round(similarity.ratio(), 10)
def levenshtein_distance(statement, other_statement): """ Compare two statements based on the Levenshtein distance of each statement's text. For example, there is a 65% similarity between the statements "where is the post office?" and "looking for the post office" based on the Levenshtein distance algorithm. :return: The percent of similarity between the text of the statements. :rtype: float """ import sys # Use python-Levenshtein if available try: from Levenshtein.StringMatcher import StringMatcher as SequenceMatcher except ImportError: from difflib import SequenceMatcher PYTHON = sys.version_info[0] # Return 0 if either statement has a falsy text value if not statement.text or not other_statement.text: return 0 # Get the lowercase version of both strings if PYTHON < 3: statement_text = unicode(statement.text.lower()) other_statement_text = unicode(other_statement.text.lower()) else: statement_text = str(statement.text.lower()) other_statement_text = str(other_statement.text.lower()) similarity = SequenceMatcher( None, statement_text, other_statement_text ) # Calculate a decimal percent of the similarity percent = int(round(100 * similarity.ratio())) / 100.0 return percent
def compare(self, statement, other_statement): """ Compare the two input statements. :return: The percent of similarity between the text of the statements. :rtype: float """ import sys # Use python-Levenshtein if available try: from Levenshtein.StringMatcher import StringMatcher as SequenceMatcher except ImportError: from difflib import SequenceMatcher PYTHON = sys.version_info[0] # Return 0 if either statement has a falsy text value if not statement.text or not other_statement.text: return 0 # Get the lowercase version of both strings if PYTHON < 3: statement_text = unicode(statement.text.lower()) # NOQA other_statement_text = unicode(other_statement.text.lower()) # NOQA else: statement_text = str(statement.text.lower()) other_statement_text = str(other_statement.text.lower()) similarity = SequenceMatcher( None, statement_text, other_statement_text ) # Calculate a decimal percent of the similarity percent = round(similarity.ratio(), 2) return percent
def compare(self, statement, other_statement): """ 比较两个输入 :return: 返回两个句子之间的相似度 :rtype: 浮点型 """ # Return 0 if either statement has a falsy text value if not statement.text or not other_statement.text: return 0 statement_text = str(statement.text.lower()) other_statement_text = str(other_statement.text.lower()) similarity = SequenceMatcher(None, statement_text, other_statement_text) # Calculate a decimal percent of the similarity percent = round(similarity.ratio(), 2) return percent
def compare(self, statement, other_statement): """ Compare the two input statements. :return: The percent of similarity between the text of the statements. :rtype: float """ import sys from nltk import word_tokenize from chatterbot import utils logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s') global counter #global model # Use python-Levenshtein if available try: from Levenshtein.StringMatcher import StringMatcher as SequenceMatcher except ImportError: from difflib import SequenceMatcher PYTHON = sys.version_info[0] # Return 0 if either statement has a falsy text value if not statement or not other_statement: return 0 # Get the lowercase version of both strings if PYTHON < 3: statement_text = unicode(statement.lower()) other_statement_text = unicode(other_statement.lower()) else: statement_text = str(statement.text.lower()) other_statement_text = str(other_statement.text.lower()) similarity = SequenceMatcher( None, statement_text, other_statement_text ) counter += 1 #print "calculating similarity ****************************************************************************",counter # Calculate a decimal percent of the similarity percent = int(round(100 * similarity.ratio())) / 100.0 sentence_1 = clean_sent(statement_text).lower().split() sentence_2 = clean_sent(other_statement_text).lower().split() tokens1 = (sentence_1) tokens2 = (sentence_2) # Remove all stop words from the list of word tokens s1 = utils.remove_stopwords(tokens1, language='english') s2 = utils.remove_stopwords(tokens2, language='english') #s1 = [w for w in sentence_1 if w not in stop_words] #s2 = [w for w in sentence_2 if w not in stop_words] distance = model.wmdistance(s1, s2) distance_gensim = model.wmdistance(s1, s2) if distance == infinity: return percent elif percent > distance: if percent - distance < 0.25: #print other_statement_text, percent + 0.08, '%', '***DECENT MATCH****' #print 'percent: ', percent, 'distance: ', distance #print return percent + 0.08 + (0.15 * abs(1 - distance)) else: #print other_statement_text, '*****CLOSE MATCH*****' #print 'percent: ', percent, 'distance: ', distance #print return percent + 1.0 + (0.15 * abs(1 - distance)) elif percent > 0.4: if distance - percent < 0.15: #print other_statement_text, percent + 0.06, '%' #print 'percent: ', percent, 'distance: ', distance #print return percent + 0.06 + (0.15 * abs(1 - distance)) else: #print other_statement_text, percent - 0.04, '%' #print 'percent: ', percent, 'distance: ', distance #print return (percent - 0.04) - (0.15 * abs(1 - distance))
def compare(self, statement, other_statement): """ Compare the two input statements. :return: The percent of similarity between the text of the statements. :rtype: float """ PYTHON = sys.version_info[0] # Return 0 if either statement has a falsy text value if not statement.text or not other_statement.text: return 0 # Get the lowercase version of both strings if PYTHON < 3: statement_text = unicode(statement.text.lower()) # NOQA other_statement_text = unicode( other_statement.text.lower()) # NOQA else: statement_text = str(statement.text.lower()) other_statement_text = str(other_statement.text.lower()) similarity = SequenceMatcher(None, statement_text, other_statement_text) # Calculate a decimal percent of the similarity percent = round(similarity.ratio(), 2) return percent
def compare(self, statement, other_statement): """ Compare the two input statements. :return: The percent of similarity between the text of the statements. :rtype: float """ # Return 0 if either statement has a falsy text value # if not statement.text or not other_statement.text: # return 0 # # statement_text = str(statement.text.lower()) # other_statement_text = str(other_statement.text.lower()) if not statement or not other_statement: return 0 statement_text = str(statement.lower()) other_statement_text = str(other_statement.lower()) similarity = SequenceMatcher(None, statement_text, other_statement_text) # Calculate a decimal percent of the similarity percent = round(similarity.ratio(), 4) return percent