Exemplo n.º 1
0
    def testIssueSeven(self):
        s1 = "HSINCHUANG"
        s2 = "SINJHUAN"
        s3 = "LSINJHUANG DISTRIC"
        s4 = "SINJHUANG DISTRICT"

        self.assertTrue(fuzz.partial_ratio(s1, s2) > 75)
        self.assertTrue(fuzz.partial_ratio(s1, s3) > 75)
        self.assertTrue(fuzz.partial_ratio(s1, s4) > 75)
Exemplo n.º 2
0
def detect_old_header_style(file_name: str, comments: list, args: argparse.Namespace) -> Tuple[int, int]:
    """
    Detects old header style (Apache-2.0) and extracts the year and line number.
    returns: Tuple[year, comment line number]
    """
    comments_text = str()
    for comment in comments:
        if comment.line_number() > args.max_lines:
            break
        comments_text = f'{comments_text}\n{comment.text().strip()}'
    ratio = fuzz.partial_ratio(comments_text, OLD_APACHE_HEADER)
    if args.debug:
        print(f'{TERMINAL_GRAY}ratio for {file_name}: {ratio}{TERMINAL_RESET}')
    if ratio > args.fuzzy_ratio:
        for comment in comments:
            # only check up to line number MAX_LINES
            if comment.line_number() > args.max_lines:
                break
            try:
                year = extract_year_from_espressif_notice(comment.text())
            except NotFound:
                pass
            else:
                return (year, comment.line_number())
    raise NotFound('Old Espressif header')
Exemplo n.º 3
0
 def testPartialRatioUnicodeString(self):
     s1 = "\u00C1"
     s2 = "ABCD"
     score = fuzz.partial_ratio(s1, s2)
     self.assertEqual(0, score)
Exemplo n.º 4
0
 def testEmptyStringsScore100(self):
     self.assertEqual(fuzz.ratio("", ""), 100)
     self.assertEqual(fuzz.partial_ratio("", ""), 100)
Exemplo n.º 5
0
 def testPartialRatio(self):
     self.assertEqual(fuzz.partial_ratio(self.s1, self.s3), 100)
Exemplo n.º 6
0
# -*- coding:utf-8 -*-
"""
参考 https://github.com/seatgeek/thefuzz
"""
__author__ = "aaron.qiu"

from pprint import pprint
from thefuzz import fuzz
from thefuzz import process

if __name__ == '__main__':
    pprint(fuzz.ratio("this is a test", "this is a test!"))
    pprint(fuzz.partial_ratio("this is a test", "this is a test!"))
    pprint(fuzz.ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear"))
    pprint(
        fuzz.token_sort_ratio("fuzzy wuzzy was a bear",
                              "wuzzy fuzzy was a bear"))
    pprint(fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear"))
    pprint(fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear"))
    choices = [
        "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"
    ]
    pprint(process.extract("new york jets", choices, limit=2))
    pprint(process.extractOne("cowboys", choices))
    songs = "/data/soft"
    pprint(process.extractOne("System of a down - Hypnotize - apache", songs))
    process.extractOne("System of a down - Hypnotize - Heroin",
                       songs,
                       scorer=fuzz.token_sort_ratio)