def testIssueSeven(self): s1 = "HSINCHUANG" s2 = "SINJHUAN" s3 = "LSINJHUANG DISTRIC" s4 = "SINJHUANG DISTRICT" self.assertTrue(fuzz.partial_ratio(s1, s2) > 75) self.assertTrue(fuzz.partial_ratio(s1, s3) > 75) self.assertTrue(fuzz.partial_ratio(s1, s4) > 75)
def detect_old_header_style(file_name: str, comments: list, args: argparse.Namespace) -> Tuple[int, int]: """ Detects old header style (Apache-2.0) and extracts the year and line number. returns: Tuple[year, comment line number] """ comments_text = str() for comment in comments: if comment.line_number() > args.max_lines: break comments_text = f'{comments_text}\n{comment.text().strip()}' ratio = fuzz.partial_ratio(comments_text, OLD_APACHE_HEADER) if args.debug: print(f'{TERMINAL_GRAY}ratio for {file_name}: {ratio}{TERMINAL_RESET}') if ratio > args.fuzzy_ratio: for comment in comments: # only check up to line number MAX_LINES if comment.line_number() > args.max_lines: break try: year = extract_year_from_espressif_notice(comment.text()) except NotFound: pass else: return (year, comment.line_number()) raise NotFound('Old Espressif header')
def testPartialRatioUnicodeString(self): s1 = "\u00C1" s2 = "ABCD" score = fuzz.partial_ratio(s1, s2) self.assertEqual(0, score)
def testEmptyStringsScore100(self): self.assertEqual(fuzz.ratio("", ""), 100) self.assertEqual(fuzz.partial_ratio("", ""), 100)
def testPartialRatio(self): self.assertEqual(fuzz.partial_ratio(self.s1, self.s3), 100)
# -*- coding:utf-8 -*- """ 参考 https://github.com/seatgeek/thefuzz """ __author__ = "aaron.qiu" from pprint import pprint from thefuzz import fuzz from thefuzz import process if __name__ == '__main__': pprint(fuzz.ratio("this is a test", "this is a test!")) pprint(fuzz.partial_ratio("this is a test", "this is a test!")) pprint(fuzz.ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")) pprint( fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")) pprint(fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")) pprint(fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")) choices = [ "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" ] pprint(process.extract("new york jets", choices, limit=2)) pprint(process.extractOne("cowboys", choices)) songs = "/data/soft" pprint(process.extractOne("System of a down - Hypnotize - apache", songs)) process.extractOne("System of a down - Hypnotize - Heroin", songs, scorer=fuzz.token_sort_ratio)