def accepts_sentence(self, words_general): # sentence_general: string # Returns True if similarity of sentence_general is either: # > threshold1 according to tf-idf of one of stored sentences # > threshold2 according to ngramperplexity of one of stored sentences # > threshold3 according to levenshtein of one of stored sentences self.sentences_asked += 1 accept_ngp = False accept_tfidf = False accept_edit_distance = False perplexity = self.ngp.calc_perplexity(words_general) self.sum_ngp += perplexity if perplexity <= self.threshold_perplexity_ngram: if RUN_CONFIGURATION.mode == MODE.TURBO: return True self.accepted_by_ngp += 1 accept_ngp = True for words_specific in self.sentences: self.queries_asked += 1 if accept_tfidf and accept_edit_distance: return True if not accept_tfidf: sim = self.tfidf.calc_cosine_similarity( words_general, words_specific) self.sum_tfidf += sim if sim >= self.threshold_tfidf: if RUN_CONFIGURATION.mode == MODE.TURBO: return True self.accepted_by_tfidf += 1 accept_tfidf = True if not accept_edit_distance: edit_distance = Levenshtein.normalized_distance( words_general, words_specific) self.sum_edit += edit_distance if edit_distance <= self.threshold_edit_distance: if RUN_CONFIGURATION.mode == MODE.TURBO: return True self.accepted_by_edit_distance += 1 accept_edit_distance = True if accept_tfidf or accept_ngp or accept_edit_distance: return True return False
def accepts_sentence(self, words_general): # sentence_general: string # Returns True if similarity of sentence_general is either: # > threshold1 according to tf-idf of one of stored sentences # > threshold2 according to ngramperplexity of one of stored sentences # > threshold3 according to levenshtein of one of stored sentences self.sentences_asked += 1 accept_ngp = False accept_tfidf = False accept_edit_distance = False perplexity = self.ngp.calc_perplexity(words_general) self.sum_ngp += perplexity if perplexity <= self.threshold_perplexity_ngram: if RUN_CONFIGURATION.mode == MODE.TURBO: return True self.accepted_by_ngp += 1 accept_ngp = True for words_specific in self.sentences: self.queries_asked += 1 if accept_tfidf and accept_edit_distance: return True if not accept_tfidf: sim = self.tfidf.calc_cosine_similarity(words_general, words_specific) self.sum_tfidf += sim if sim >= self.threshold_tfidf: if RUN_CONFIGURATION.mode == MODE.TURBO: return True self.accepted_by_tfidf += 1 accept_tfidf = True if not accept_edit_distance: edit_distance = Levenshtein.normalized_distance(words_general, words_specific) self.sum_edit += edit_distance if edit_distance <= self.threshold_edit_distance: if RUN_CONFIGURATION.mode == MODE.TURBO: return True self.accepted_by_edit_distance += 1 accept_edit_distance = True if accept_tfidf or accept_ngp or accept_edit_distance: return True return False
from levenshtein import Levenshtein assert(Levenshtein.distance("", "abc") == 3) assert(Levenshtein.distance("abc", "") == 3) assert(Levenshtein.distance("", "") == 0) assert(Levenshtein.distance("abc", "abc") == 0) assert(Levenshtein.distance("abcdef", "xxxxxx") == 6) assert(Levenshtein.distance("xxxxxx", "abcdef") == 6) assert(Levenshtein.distance("abcdef", "abefcd") == 4) assert(Levenshtein.distance("abefcd", "abcdef") == 4) assert(Levenshtein.distance("acdefg", "abcdef") == 2) assert(Levenshtein.distance("abcdef", "acdefg") == 2) assert(Levenshtein.normalized_distance("abcdef", "abc") == 0.5) assert(Levenshtein.normalized_distance("abcdef", "") == 1) assert(Levenshtein.normalized_distance("a", "b") == 1) assert(Levenshtein.normalized_distance("a", "") == 1) assert(Levenshtein.normalized_distance("a", "a") == 0) assert(Levenshtein.normalized_distance("abcd", "c") == 0.75) assert(Levenshtein.normalized_distance("abcd", "bd") == 0.5) assert(Levenshtein.normalized_distance("abcd", "db") == 0.75) print("Success")
from levenshtein import Levenshtein assert (Levenshtein.distance("", "abc") == 3) assert (Levenshtein.distance("abc", "") == 3) assert (Levenshtein.distance("", "") == 0) assert (Levenshtein.distance("abc", "abc") == 0) assert (Levenshtein.distance("abcdef", "xxxxxx") == 6) assert (Levenshtein.distance("xxxxxx", "abcdef") == 6) assert (Levenshtein.distance("abcdef", "abefcd") == 4) assert (Levenshtein.distance("abefcd", "abcdef") == 4) assert (Levenshtein.distance("acdefg", "abcdef") == 2) assert (Levenshtein.distance("abcdef", "acdefg") == 2) assert (Levenshtein.normalized_distance("abcdef", "abc") == 0.5) assert (Levenshtein.normalized_distance("abcdef", "") == 1) assert (Levenshtein.normalized_distance("a", "b") == 1) assert (Levenshtein.normalized_distance("a", "") == 1) assert (Levenshtein.normalized_distance("a", "a") == 0) assert (Levenshtein.normalized_distance("abcd", "c") == 0.75) assert (Levenshtein.normalized_distance("abcd", "bd") == 0.5) assert (Levenshtein.normalized_distance("abcd", "db") == 0.75) print("Success")