Python get_jaro_distance примеры, pyjarowinkler.distance.get_jaro_distance Python примеры использования

Пример #1

0

Показать файл

def get_similarities(Features, url_input):
    """
    similarity metrics include: Levenshtein, jaro, damerau levenshtein, normalized_damerau_levenshtein,
    and hamming distance
    :param Features: input dictionary to add things to
    :param url_input
    :return: Features: after adding all similarity metrics
    """
    for n in itertools.chain(product_domain_names, brand_names):
        Features['url_levenshtein_distance_' + n] = Levenshtein.distance(
            url_input, n)
        Features['fqdn_levenshtein_distance_' + n] = Levenshtein.distance(
            Features['fqdn'], n)
        Features['url_jaro_winkler_distance_' + n] = jw.get_jaro_distance(
            url_input, n)
        Features['fqdn_jaro_winkler_distance_' + n] = jw.get_jaro_distance(
            Features['fqdn'], n)
        Features['url_damerau_levenshtein_distance_' +
                 n] = dl.damerau_levenshtein_distance(url_input, n)
        Features['fqdn_damerau_levenshtein_distance_' +
                 n] = dl.damerau_levenshtein_distance(Features['fqdn'], n)
        Features['url_damerau_levenshtein_normalized_distance_' +
                 n] = dl.normalized_damerau_levenshtein_distance(url_input, n)
        Features['fqdn_damerau_levenshtein_normalized_distance_' +
                 n] = dl.normalized_damerau_levenshtein_distance(
                     Features['fqdn'], n)
        if len(n) == len(url_input):
            Features['url_length_equals_' + n] = 1
            Features['url_hamming_distance_' + n] = hamming(url_input, n)
            Features['fqdn_hamming_distance_' + n] = hamming(
                Features['fqdn'], n)
        else:
            Features['url_length_equals_' + n] = 0
    return Features

Пример #2

0

Показать файл

Файл: lemma_finder.py Проект: ronidrell/lemata

def jwsim(word, otherword):
    # called distance but is actually similarity
    sim = distance.get_jaro_distance(word, otherword)
    uword = unidecode.unidecode(word)
    uotherword = unidecode.unidecode(otherword)
    usim = distance.get_jaro_distance(uword, uotherword)
    return (sim + usim) / 2

Пример #3

0

Показать файл

Файл: GetData2.py Проект: juilie/VeryFunChordsHaha

def check(artist1, song1, artist2, song2):
    artistScore = JW.get_jaro_distance(artist1, artist2, winkler=True, winkler_ajustment=True, scaling=0.1) #calculate jaro winkler distance of artist name
    songScore = JW.get_jaro_distance(song1, song2, winkler=True, winkler_ajustment=True, scaling=0.1) #calculate the jaro winkler distance of song name
    if artistScore > 0.75 and songScore > 0.75: #if both the artist and song name have over 75% matching
        return True #return that it was a match
    else: #if not enough matching
        return False #return that it was not a match

Пример #4

0

Показать файл

Файл: knn_approximated.py Проект: scferrada/self-sim-join

def sim_join(input_array, k, group_size=1):
    make_hash(input_array)
    data, centers = get_centers(input_array)
    results = []
    print("making %d *sqrt(n)-sized groups..." % group_size)
    groups = make_groups(data, centers, k, group_size * len(centers), results)
    print("nested loop %d groups" % len(groups))
    for i, group in enumerate(groups):
        if len(group) <= 1: continue
        for current, elem_i in enumerate(group.elems):
            j = 1
            dist_to_groups = [
                distance.get_jaro_distance(g.center, elem_i) - g.r
                for g in groups
            ]
            closest_group = dist_to_groups.index(min(dist_to_groups))
            if groups[closest_group].id == group.id:
                closest_group = np.argpartition(np.array(dist_to_groups), j)[j]
                j += 1
            target = group.all_but(current) + groups[closest_group].all()
            while len(target) <= k:
                closest_group = np.argpartition(np.array(dist_to_groups), j)[j]
                j += 1
                if groups[closest_group].id == group.id: continue
                target = target + groups[closest_group].all()
            distances = np.array(
                [distance.get_jaro_distance(x, elem_i) for x in target])
            knn = np.argpartition(distances, k)[:k].tolist()
            results.append((hash[elem_i], [hash[target[x]] for x in knn]))
    return results

Пример #5

0

Показать файл

Файл: wordBlendChecker.py Проект: haripriyaramesh/BlendWordsIdentification

def dictionaryMatches(word):
    j = 2
    prefixDistCheck = False
    suffixDistCheck = False
    while (j <= len(word) - 2):
        prefix = word[0:j]
        suffix = word[j:]
        dictprefixList = []
        dictprefixList = [i for i in dictList if i.startswith(prefix)]
        dictsuffixList = [i for i in dictList if i.endswith(suffix)]
        if (not prefixDistCheck):
            for dict in dictprefixList:
                if (distance.get_jaro_distance(
                        word, dict, winkler=True, scaling=0.1) > avgPreDist):
                    prefixDistCheck = True
                    break
        if (not suffixDistCheck):
            for dict in dictsuffixList:
                if (distance.get_jaro_distance(
                        word, dict, winkler=True, scaling=0.1) > avgSufDist):
                    suffixDistCheck = True
                    break
        if (prefixDistCheck and suffixDistCheck):
            break
        j = j + 1
    if (prefixDistCheck and suffixDistCheck):
        return "True"
    else:
        return "False"

Пример #6

0

Показать файл

def statJW():
    d = []

    with open('data/blends.txt') as f:
        for line in f:

            t, tt, ttt = line.split()

            jw1 = distance.get_jaro_distance(t, tt, False)
            jw2 = distance.get_jaro_distance(t, ttt, False)

            d.extend([jw1, jw2])
    stat('JW', d, 'similarity value', 'frequency')

Пример #7

0

Показать файл

def run_program(result):
    program_path = ""
    found = False
    response = {
        "tts": "",
        "file": "",
        "save": False,
    }
    if result['entities']:
        for entity in result['entities']:
            if entity["entity"] == "program":
                for subdir, dirs, files in os.walk(settings.PROGRAMS_DIR1):
                    for file in files:
                        if (entity["value"] in file
                                and jarowinkler.get_jaro_distance(
                                    entity["value"], file, winkler=True) >
                                0.8):
                            program_path = subdir + "/" + file
                            found = True
                            break
                    if (found):
                        break

                if not program_path:
                    for subdir, dirs, files in os.walk(settings.PROGRAMS_DIR2):
                        for file in files:
                            if (entity["value"] in file
                                    and jarowinkler.get_jaro_distance(
                                        entity["value"], file, winkler=True) >
                                    0.8):
                                program_path = subdir + "/" + file
                                found = True
                                break
                        if (found):
                            break
    if not program_path:
        response[
            "tts"] = "I was unable to find the program you wanted. It may not be in the start programs directory."
        response["file"] = "program_not_found.mp3"
        response["save"] = True
        return response

    response["tts"] = "Ok"
    response["file"] = "ok.mp3"
    response["save"] = True
    print(f"Opening path {program_path}")
    os.startfile(program_path)

    return response

Пример #8

0

Показать файл

Файл: contract_agents.py Проект: nemoware/analyser

def normalize_legal_entity_type(txt) -> (str, str, float):
  knowns = find_known_legal_entity_type(txt.strip())
  if len(knowns) > 0:
    if len(knowns) == 1:
      k = knowns[0]
      return k[0], k[1], distance.get_jaro_distance(k[0], txt, winkler=True, scaling=0.1)
    else:
      finding = '', '', 0
      for k in knowns:
        d = distance.get_jaro_distance(k[0], txt, winkler=True, scaling=0.1)
        if d > finding[2]:
          finding = k[0], k[1], d
      return finding
  else:
    return txt, '', 0.5

Пример #9

0

Показать файл

Файл: model_conciseness.py Проект: vishalsunder/CIKM-AnalytiCup-2017-Lazada-Product-Title-Quality-Challenge

def get_synmat4title(title, maxlen):

    splitTitle = preprocess_line_syn(title, exclude)
    List1 = splitTitle
    List2 = splitTitle
    Matrix = np.zeros((maxlen, maxlen), dtype=np.float)
    if len(List1) < maxlen:
        for i in range(0, len(List1)):
            for j in range(0, len(List2)):
                Matrix[i, j] = distance.get_jaro_distance(List1[i], List2[j])
    else:
        for i in range(0, maxlen):
            for j in range(0, maxlen):
                Matrix[i, j] = distance.get_jaro_distance(List1[i], List2[j])
    return Matrix

Пример #10

0

Показать файл

def pareamentoself(dataframebase,
                   colunas,
                   highest_only=False,
                   valor_match=0.89):

    dataframebase['KEY'] = reduce(
        lambda a, b: a + b, [dataframebase[coluna] for coluna in colunas])
    size = len(dataframebase)
    perc = 0
    matches = {}
    id_key = {}
    for i, line in dataframebase.iterrows():
        id_key[line['KEY']] = line['ID']
        highest_match = 0
        highest_match_name = None
        for key in matches.keys():
            jaro_value = distance.get_jaro_distance(line['KEY'], key)
            if jaro_value > valor_match:  # deu match
                if not highest_only:
                    matches[key].append(line['KEY'])
                elif jaro_value > highest_match:
                    highest_match = jaro_value
                    highest_match_name = key
        if highest_match_name is not None:
            matches[highest_match_name].append(line['KEY'])
        else:
            matches[line['KEY']] = []

        if i / size * 100 > perc:
            print(perc, '%')
            perc += 1

    return matches, id_key

Пример #11

0

Показать файл

Файл: ftns.py Проект: choi15255/ra_uspto

def get_JW_matrix(NAMELIST):
    """
    input:
        NAMELIST: = unique namelist (of length n)
        make_csv: make JW.csv
        distance: "JW" or "metaphone"
    output:
        upper triangluar n x n numpy matrix of Jaro-Winkler distances.
    """
    print("creating JW.csv")
    namelist = list(NAMELIST)
    n = len(namelist)
    for i in range(n):
        for sep in ["`", "'", ".", "-", ',']:
            namelist[i] = namelist[i].replace(sep, " ")
    matrix = np.zeros([n, n])
    max_iter = int(0.5 * (n - 1) * n)
    iter = 0
    for i in range(n):
        for j in range(i + 1, n):  # diagonal is 1
            iter += 1
            progress = iter / max_iter * 100
            sys.stdout.write("\riter {0}({1},{2}) out of {3}({4}%)   ".format(
                int(iter), int(i), int(j), int(max_iter), int(progress)))
            sys.stdout.flush()
            matrix[i, j] = distance.get_jaro_distance(namelist[i],
                                                      namelist[j],
                                                      winkler=True,
                                                      scaling=0.1)
    print("\ncreated JW.csv")
    return (matrix)

Пример #12

0

Показать файл

 def fuzzy_match_facet(text, facet):
     score = distance.get_jaro_distance(text.lower(),
                                        facet.lower(),
                                        winkler=True,
                                        scaling=0.1)
     print(facet, score)
     return score

Пример #13

0

Показать файл

def filter_entry(source, raw_table):

    # count is the primary criterion
    best_count = 0
    bests = []
    for target in raw_table[source]:
        count = raw_table[source][target]
        if count > best_count:
            best_count = count
            bests = [target]
        elif count == best_count:
            bests.append(target)

    # jaro winkler is the secondary criterion
    if len(bests) > 1:
        # alphabetic is the third criterion
        # (this is not meaningful, this is just to be deterministic)
        bests.sort()
        best_jw = -1
        best = None
        source_word = source
        for target in bests:
            jw = distance.get_jaro_distance(source_word, target)
            if jw > best_jw:
                best_jw = jw
                best = target
            # not accounting for ties -- we just take the first one as best
        return best
    else:
        return bests[0]

Пример #14

0

Показать файл

def jwOnSortedFunction(s1,
                       s2,
                       collator=icu.Collator.createInstance(
                           icu.Locale('de_DE.UTF-8'))):
    s1_s = ''.join(sorted(list(s1), key=collator.getSortKey))
    s2_s = ''.join(sorted(list(s2), key=collator.getSortKey))
    return jw_distance.get_jaro_distance(s1_s, s2_s, winkler=True)

Пример #15

0

Показать файл

 def phase_3_validation(self, interpretation, min_confidence):
     result = Result()
     last_underscore_index = self.profile.name.rfind(
         STRUCTURED_KEY_SEPARATOR)
     if last_underscore_index >= 0:
         normalized_name = self.profile.name[last_underscore_index:]
     else:
         normalized_name = self.profile.name
     max_confidence = 0.0
     for matching_name in interpretation['iMatchingNames']:
         if len(matching_name) > 0:
             jaroDistance = get_jaro_distance(normalized_name.lower(),
                                              matching_name.lower())
             if jaroDistance > max_confidence:
                 max_confidence = jaroDistance
     interpretation['iConfidence'] = max_confidence
     if max_confidence <= min_confidence:
         message = "Matching confidence not high enough for field " + normalized_name + " with interpretation " + interpretation[
             'iName'] + "."
         logging.debug(message)
         result.message = message
         return result
     logging.debug("Highest matching confidence for " + normalized_name +
                   " was " + str(max_confidence) + ".")
     result.result = True
     return result

Пример #16

0

Показать файл

Файл: jaro_kNN.py Проект: jiangcici/CS542-digaai

def knn(inputs, dataset, labels, k):
    '''Main function for doing kNN'''

    numsamples = len(dataset)
    Distance = []  # a list of distances
    Weight = []
    for i in range(numsamples):
        dist = distance.get_jaro_distance(inputs,
                                          dataset[i],
                                          winkler=True,
                                          scaling=0.1)
        Distance.append(dist)
        if Metric_mode == 'wt':
            wt = Gaussian(dist, h)
            Weight.append(wt)

    if Metric_mode == 'dist':
        sorted_ = -1 * np.sort(-1 * np.array(Distance))
        sorted_idx = np.argsort(np.array(Distance))
        sorted_idx = sorted_idx[::-1]
    else:
        sorted_ = -1 * np.sort(-1 * np.array(Weight))
        sorted_idx = np.argsort(np.array(Weight))
        sorted_idx = sorted_idx[::-1]

    if CV_mode == 0:
        result = voting(sorted_, sorted_idx, labels, k)
    else:
        result = []
        for i in range(len(k)):
            maxindex = voting(sorted_, sorted_idx, labels, k[i])
            result.append(maxindex)
        result = np.array(result).reshape(1, -1)

    return result

Пример #17

0

Показать файл

Файл: aligner.py Проект: mideind/GreynirSeq

def get_min_hun_distance(
        words1: List[str],
        words2: List[str]) -> Tuple[float, List[Tuple[int, int, float]]]:
    """Calculate a similarity score between all pairs of words."""
    values = []
    hits = []
    min_dist = 0
    for i in range(len(words1)):
        w1 = words1[i]
        row = []
        for j in range(len(words2)):
            w2 = words2[j]
            # Jaro-Winkler distance (not similarity score)
            row.append(
                1 -
                distance.get_jaro_distance(w1, w2, winkler=True, scaling=0.1))
        values.append(row)
    # Calculate the best pairing based on the similarity score.
    row_ids, col_ids = linear_sum_assignment(values)
    row_ids = list(row_ids)
    col_ids = list(col_ids)
    # The best alignment
    hits = []
    valsum = 0
    for i in range(len(row_ids)):
        row_id = row_ids[i]
        col_id = col_ids[i]
        hits.append((row_id, col_id, values[row_id][col_id]))
        valsum += values[row_id][col_id]

    min_dist = valsum / (len(words1) + len(words2))

    return min_dist, hits

Пример #18

0

Показать файл

def match_org_by_score(author_name, author_list):
    score_list = []
    name = clean_name(author_name)
    # author_list_lower = []
    # for author in author_list:
    #     author_list_lower.append(author.lower())

    # author_list_clean = list(map(clean_name, author_list))
    # print("author_list_clen:",author_list_clean)
    name_split = name.split()
    for o in author_list:
        if "name" in o and o["name"] != "":

            author = clean_name(o["name"])

            # lower_name = author.lower()
            score = distance.get_jaro_distance(name,
                                               author,
                                               winkler=True,
                                               scaling=0.1)
            author_split = author.split()
            inter = set(name_split) & set(author_split)
            alls = set(name_split) | set(author_split)
            score += round(len(inter) / len(alls), 6)
            score_list.append(score)

    rank = np.argsort(-np.array(score_list))
    return_list = [author_list[i] for i in rank]

    return return_list[0]

Пример #19

0

Показать файл

Файл: eli_parallel.py Проект: kvraman1/qbank-tools

def similarity(p):
    x, y = p

    if x < y:
        return 0

    return ds.get_jaro_distance(text[x], text[y], winkler=True, scaling=0.1)

Пример #20

0

Показать файл

Файл: Algorithms.py Проект: roshanmadhushanka/FYP-ResearchSupport

def jaroWinklerDistance(text_a, text_b):
    """
    Calculate Jaro Winkler Distance
    :param text_a: Text a
    :param text_b: Text b
    :return: Jaro Winkler distance value
    """
    return distance.get_jaro_distance(text_a, text_b, winkler=True, scaling=0.1)

Пример #21

0

Показать файл

Файл: answers.py Проект: chelleych/ipumsexercise

def score_matches(series_1, series_2):
    """
    Inputs:
    series_1, series_2 : pd.Series
        Series that each contain a single record of census data.
        Labels are the columns in the read_data function above
        Data from series_1 must be 10 years earlier than data from series_2
    Outputs:
    score : float
        Score rating the match between the two inputs. Higher is closer.
    """
    if not pd.isnull(series_1.NAMELAST) and not pd.isnull(series_2.NAMELAST):
        dist_NAMELAST = distance.get_jaro_distance(series_1.NAMELAST, series_2.NAMELAST, 
                                                   winkler=True, scaling=0.1)
    else:
        dist_NAMELAST = 0
    if not pd.isnull(series_1.NAMEFRST) and not pd.isnull(series_2.NAMEFRST):
        dist_NAMEFRST = distance.get_jaro_distance(series_1.NAMEFRST, series_2.NAMEFRST, 
                                                   winkler=True, scaling=0.1)
    else:
        dist_NAMEFRST = 0
    dist_BPL = int(series_1.BPL == series_2.BPL)
    dist_SEX = int(series_1.SEX == series_2.SEX)
    dist_AGE = int(series_1.AGE == series_2.AGE - 10 or
                   series_1.AGE == series_2.AGE - 11 or
                   series_1.AGE == series_2.AGE - 9 )
    dist_SERIAL = int(series_1.SERIAL == series_2.SERIAL)
    
    # Weight columns, where important columns get heigher weights
    weight_NAMELAST = 16
    weight_NAMEFRST = 15
    weight_BPL = 4
    weight_SEX = 3
    weight_AGE = 2
    weight_SERIAL = 1
    
    # add scores weighted by importance
    score = weight_NAMELAST * dist_NAMELAST + \
            weight_NAMEFRST * dist_NAMEFRST + \
            weight_BPL * dist_BPL + \
            weight_SEX * dist_SEX + \
            weight_AGE * dist_AGE + \
            weight_SERIAL * dist_SERIAL
    
    return score

Пример #22

0

Показать файл

def most_similar_word(sentence: str, word: str) -> str:
    msw = ''
    min_dist = 10
    for token in word_tokenize(sentence):
        word_dist = 1 - distance.get_jaro_distance(word, token, winkler=True)
        if word_dist < min_dist:
            min_dist = word_dist
            msw = token
    return msw

Пример #23

0

Показать файл

def usernameSimilarityScore(uname1, uname2):
    """Compare usernames using Jaro distance.
       
    Returns a score between 0 and 1, where 1 means exact match.
    """
    if uname1 == uname2:
        return 1 # matched exatcly
    else:
        return distance.get_jaro_distance(uname1,uname2,winkler=False)

Пример #24

0

Показать файл

Файл: duplicate_detection.py Проект: bgmartins/historical-gazetteer

def jaro_winkler_duplicate_processing(string1, string2):
    similarity = distance.get_jaro_distance(string1,
                                            string2,
                                            winkler=True,
                                            scaling=0.1)
    if (similarity >= 0.9):
        return True
    else:
        return False

Пример #25

0

Показать файл

def locationSimilarityScore(loc1, loc2):
    """Compare location texts using Jaro distance.
       
    Returns a score between 0 and 1, where 1 means exact match.
    """
    if loc1 == loc2:
        return 1 # matched exatcly
    else:
        return distance.get_jaro_distance(loc1,loc2,winkler=False)

Пример #26

0

Показать файл

 def calculate(self, dataX, dataY):
     """ calculates the jaro distance
     Args:
         dataX: 1st string
         dataY: 2nd string
     Yields:
         float of the jaro distance
     """
     return distance.get_jaro_distance(dataX, dataY)

Пример #27

0

Показать файл

Файл: text_tools.py Проект: nemoware/analyser

def compare_masked_strings(a, b, masked_substrings):
    a1 = a
    b1 = b
    for masked in masked_substrings:
        if a1.find(masked) >= 0 and b1.find(masked) >= 0:
            a1 = a1.replace(masked, '')
            b1 = b1.replace(masked, '')

    return jaro.get_jaro_distance(a1, b1, winkler=False, scaling=0.1)

Пример #28

0

Показать файл

def findequivalent(names_list, valor):
    higherbairro = 0
    nome_bairro = ''
    for bairro in names_list:
        x = distance.get_jaro_distance(valor, bairro)
        if x > higherbairro:
            higherbairro = x
            nome_bairro = bairro
    return nome_bairro

Пример #29

0

Показать файл

Файл: benchmark.py Проект: dbousque/batch_jaro_winkler

def pyjarowinkler_jaro_winkler_distance(candidates, inp, min_score, winkler):
    res = []
    for candidate in candidates:
        score = pyjarowinkler_distance.get_jaro_distance(candidate,
                                                         inp,
                                                         winkler=winkler)
        if score >= min_score:
            res.append((candidate, score))
    return res

Пример #30

0

Показать файл

 def matched_tokens(self, words):
     tokens = []
     for word in words:
         for token in self.tokens:
             distance = jarowinkler_distance.get_jaro_distance(
                 token['word'], word, winkler=True, scaling=0.1)
             if distance > 0.90:
                 tokens.append(token)
     return tokens

Пример #31

0

Показать файл

Файл: test_distance.py Проект: nap/jaro-winkler-distance

 def test_get_jaro_distance(self):
     self.assertEquals(0.0, distance.get_jaro_distance("fly", "ant"))
     self.assertEquals(0.44, distance.get_jaro_distance("elephant", "hippo"))
     self.assertEquals(0.91, distance.get_jaro_distance("ABC Corporation", "ABC Corp"))
     self.assertEquals(0.9, distance.get_jaro_distance("PENNSYLVANIA", "PENNCISYLVNIA"))
     self.assertEquals(0.93, distance.get_jaro_distance("D N H Enterprises Inc", "D & H Enterprises, Inc."))
     self.assertEquals(0.94, distance.get_jaro_distance("My Gym Children's Fitness Center",
                                                        "My Gym. Childrens Fitness"))

Пример #32

0

Показать файл

Файл: profile.py Проект: deleidos/de-schema-wizard

 def phase_3_validation(self, interpretation, min_confidence):    
     result = Result() 
     last_underscore_index = self.profile.name.rfind(STRUCTURED_KEY_SEPARATOR)  
     if last_underscore_index >= 0:
         normalized_name = self.profile.name[last_underscore_index:]
     else:
         normalized_name = self.profile.name
     max_confidence = 0.0
     for matching_name in interpretation['iMatchingNames']:
         if len(matching_name) > 0:
             jaroDistance =  get_jaro_distance(normalized_name.lower(), matching_name.lower())
             if jaroDistance > max_confidence:
                 max_confidence = jaroDistance
     interpretation['iConfidence'] = max_confidence
     if max_confidence <= min_confidence:
         message = "Matching confidence not high enough for field " + normalized_name + " with interpretation "+interpretation['iName']+"."
         logging.debug(message)
         result.message = message
         return result
     logging.debug("Highest matching confidence for " + normalized_name + " was " + str(max_confidence) + ".")
     result.result = True
     return result

Пример #33

0

Показать файл

Файл: media.py Проект: nap/plexcleaner

    def __init__(self, mid, title, original_file, year, size, fps, guid, count, jacket, library_path):
        self.mid = mid
        self.original_file = original_file
        self.filepath = os.path.dirname(original_file)
        self.basename = os.path.basename(original_file)
        self.filename, self.file_ext = os.path.splitext(self.basename)

        self.title = title
        self.correct_title = self._clean_filename()
        self.title_distance = distance.get_jaro_distance(self.title, self.correct_title)

        self.year = year
        self.size = size
        self.fps = fps
        self.exist = os.path.exists(original_file)
        self.matched = not guid.startswith('local://')
        self.count = count

        self.library_path = library_path

        if self.matched:
            h = hashlib.sha1(guid).hexdigest()
            self.relative_jacket_path = os.path.join(self._jacket_path.format(h[0], h[1:], jacket[11:]))

Пример #34

0

Показать файл

Файл: example.py Проект: nap/jaro-winkler-distance

__author__ = 'Jean-Bernard Ratte - [email protected]'

from pyjarowinkler import distance

if __name__ == "__main__":
    first = "hello"
    second = "haloa"
    print("The words '{0}' and '{1}' matches at {2}%".format(first, second, distance.get_jaro_distance(first, second)))

Пример #35

0

Показать файл

Файл: test_distance.py Проект: nap/jaro-winkler-distance

 def test_get_jaro_without_winkler(self):
     self.assertEquals(distance.get_jaro_distance("ZDVSXA", "ZWEIUHFSAD",
                                                  winkler_ajustment=False), 0.5111111111111111)
     self.assertEquals(distance.get_jaro_distance("frog", "fog",
                                                  winkler_ajustment=False), 0.9166666666666666)
     self.assertEquals(distance.get_jaro_distance("fly", "ant",
                                                  winkler_ajustment=False), 0.0)
     self.assertEquals(distance.get_jaro_distance("elephant", "hippo",
                                                  winkler_ajustment=False), 0.44166666666666665)
     self.assertEquals(distance.get_jaro_distance("hippo", "elephant",
                                                  winkler_ajustment=False), 0.44166666666666665)
     self.assertEquals(distance.get_jaro_distance("hippo", "zzzzzzzz",
                                                  winkler_ajustment=False), 0.0)
     self.assertEquals(distance.get_jaro_distance("hello", "hallo",
                                                  winkler_ajustment=False), 0.8666666666666667)
     self.assertEquals(distance.get_jaro_distance("ABC Corporation", "ABC Corp",
                                                  winkler_ajustment=False), 0.8444444444444444)
     self.assertEquals(distance.get_jaro_distance("PENNSYLVANIA", "PENNCISYLVNIA",
                                                  winkler_ajustment=False), 0.8300310800310801)
     self.assertEquals(distance.get_jaro_distance("My Gym Children's Fitness Center",
                                                  "My Gym. Childrens Fitness",
                                                  winkler_ajustment=False), 0.9033333333333333)
     self.assertEquals(distance.get_jaro_distance("D N H Enterprises Inc",
                                                  "D & H Enterprises, Inc.",
                                                  winkler_ajustment=False), 0.9073153899240856)

Python get_jaro_distance примеры использования