Python ratio示例，rapidfuzz.fuzz.ratio Python示例

示例#1

0

显示文件

文件： liechtenstein.py 项目： dshix/python-salesforce

def get_best_matches(text,
                     to_compare,
                     top_n=5,
                     case_sensitive=True,
                     include_percentage=False):
    if case_sensitive: compare = lambda a, b: fuzz.ratio(a, b)
    else: compare = lambda a, b: fuzz.ratio(a.lower(), b.lower())
    result = [(compare(text, s), s) for s in to_compare]
    result.sort(key=lambda x: x[0], reverse=True)
    return_lambda = lambda tup: tup if include_percentage else tup[1]
    return [return_lambda(tup) for tup in result[0:top_n]]

示例#2

0

显示文件

文件： multilang_similarity.py 项目： P-Programist/Upwork

def get_candidate_lines(keylines, lines, meta):
    """
    For each key line get the candidate
    line from file lines with similarity metrics
    and line position
    """
    results = []

    # Heuristic 1: limit the search scope with the best candidate
    # for the LAST reference line
    last_line_candidates = []

    # Find the best candidate
    for position, line in enumerate(lines):
        similarity = fuzz.ratio(keylines[-1]['line'], line)
        last_line_candidates.append((similarity, position, line))

    cand_score, cand_pos, cand_line = max(last_line_candidates)

    # if it is not a random match (at least 55%)
    # and it cointains number "3", limit the search scope
    # (because in rare cases sections 2 and 3 are swapped)
    if (cand_score > 55 and '3' in cand_line):
        search_scope = lines[:cand_pos + 1]  # +1 because the line itself is very valuable for matching
    else:
        search_scope = lines

    # Heuristic 2: for very short documents (2-5 pages)
    # there often will be only sections with brief info
    # no adresses and detailed descriptions,
    # so we dont take some lines into account
    if 1 < meta['page_count'] < 5:
        keyline_scope = [kl for kl in keylines if not kl['helper_line']]
    else:
        keyline_scope = keylines

    # Now pick the best candidate for each reference line
    for keyline in keyline_scope:
        candidates = []

        for position, line in enumerate(search_scope):
            similarity = fuzz.ratio(keyline['line'], line)
            candidates.append({
                'line': line,
                'similarity': similarity,
                'position': position
            })

        best_match = max(candidates, key=lambda c: c['similarity'])
        results.append({'keyline': keyline, **best_match})

    return results

示例#3

0

显示文件

文件： pride_leader.py 项目： AwesomeGitHubRepos/sir-lancebot

    def invalid_embed_generate(self, pride_leader: str) -> discord.Embed:
        """
        Generates Invalid Embed.

        The invalid embed contains a list of closely matched names of the invalid pride
        leader the user gave. If no closely matched names are found it would list all
        the available pride leader names.

        Wikipedia is a useful place to learn about pride leaders and we don't have all
        the pride leaders, so the bot would add a field containing the wikipedia
        command to execute.
        """
        embed = discord.Embed(color=constants.Colours.soft_red)
        valid_names = []
        pride_leader = pride_leader.title()
        for name in PRIDE_RESOURCE:
            if fuzz.ratio(pride_leader, name) >= MINIMUM_FUZZ_RATIO:
                valid_names.append(name)

        if not valid_names:
            valid_names = ", ".join(PRIDE_RESOURCE)
            error_msg = "Sorry your input didn't match any stored names, here is a list of available names:"
        else:
            valid_names = "\n".join(valid_names)
            error_msg = "Did you mean?"

        embed.description = f"{error_msg}\n```\n{valid_names}\n```"
        embed.set_footer(
            text="To add more pride leaders, feel free to open a pull request!"
        )

        return embed

示例#4

0

显示文件

文件： base.py 项目： tmetzl/nbgrader

    def init_notebooks(self) -> None:
        self.assignments = {}
        self.notebooks = []
        assignment_glob = self._format_source(self.coursedir.assignment_id,
                                              self.coursedir.student_id)
        for assignment in glob.glob(assignment_glob):
            notebook_glob = os.path.join(assignment,
                                         self.coursedir.notebook_id + ".ipynb")
            found = glob.glob(notebook_glob)
            if len(found) == 0:
                self.log.warning("No notebooks were matched by '%s'",
                                 notebook_glob)
                continue
            self.assignments[assignment] = found

        if len(self.assignments) == 0:
            msg = "No notebooks were matched by '%s'" % assignment_glob
            self.log.error(msg)

            assignment_glob2 = self._format_source("*",
                                                   self.coursedir.student_id)
            found = glob.glob(assignment_glob2)
            if found:
                scores = sorted([(fuzz.ratio(assignment_glob, x), x)
                                 for x in found])
                self.log.error("Did you mean: %s", scores[-1][1])

            raise NbGraderException(msg)

示例#5

0

显示文件

    def has_wake_word(self, phrase):
        phrase_parts = phrase.split()

        test_word = False
        start_index = 0
        retn = False

        if len(phrase_parts) == 1:
            test_word = phrase_parts[0]
            self.heard = ""

        elif len(phrase_parts) > 1:
            prefixes = ["ok", "hey"]

            test_word = False

            first_word, second_word = phrase_parts[0:2]
            extracted_processes = process.extract(first_word, prefixes)
            for extracted_process in extracted_processes:
                if extracted_process[1] > 80:
                    test_word = second_word
                    start_index = 2

            if not test_word:
                test_word = first_word
                start_index = 1

        if test_word and isinstance(test_word, str):
            fuzzed = fuzz.ratio(test_word.lower(), self.wake_word.lower())
            retn = fuzzed >= 80

        if retn:
            self.heard = " ".join(phrase_parts[start_index::])

        return retn

示例#6

0

显示文件

def get_matched_entries(s, field_values, m_theta=0.85, s_theta=0.85):
    if not field_values:
        return None

    if isinstance(s, str):
        n_grams = split(s)
    else:
        n_grams = s

    matched = dict()
    for field_value in field_values:
        if not isinstance(field_value, string_types):
            continue
        fv_tokens = split(field_value)
        sm = difflib.SequenceMatcher(None, n_grams, fv_tokens)
        match = sm.find_longest_match(0, len(n_grams), 0, len(fv_tokens))
        if match.size > 0:
            source_match = get_effecitve_match_source(n_grams, match.a,
                                                      match.a + match.size)
            if source_match and source_match.size > 1:
                match_str = field_value[match.b:match.b + match.size]
                source_match_str = s[source_match.start:source_match.start +
                                     source_match.size]
                c_match_str = match_str.lower().strip()
                c_source_match_str = source_match_str.lower().strip()
                c_field_value = field_value.lower().strip()
                if c_match_str and not utils.is_number(
                        c_match_str) and not utils.is_common_db_term(
                            c_match_str):
                    if utils.is_stopword(c_match_str) or utils.is_stopword(c_source_match_str) or \
                            utils.is_stopword(c_field_value):
                        continue
                    if c_source_match_str.endswith(c_match_str + '\'s'):
                        match_score = 1.0
                    else:
                        if prefix_match(c_field_value, c_source_match_str):
                            match_score = fuzz.ratio(c_field_value,
                                                     c_source_match_str) / 100
                        else:
                            match_score = 0
                    if (utils.is_commonword(c_match_str)
                            or utils.is_commonword(c_source_match_str)
                            or utils.is_commonword(c_field_value)
                        ) and match_score < 1:
                        continue
                    s_match_score = match_score
                    if match_score >= m_theta and s_match_score >= s_theta:
                        if field_value.isupper(
                        ) and match_score * s_match_score < 1:
                            continue
                        matched[match_str] = (field_value, source_match_str,
                                              match_score, s_match_score,
                                              match.size)

    if not matched:
        return None
    else:
        return sorted(matched.items(),
                      key=lambda x: (1e16 * x[1][2] + 1e8 * x[1][3] + x[1][4]),
                      reverse=True)

示例#7

0

显示文件

文件： renamenotebooks.py 项目： DigiKlausur/ilias2nbgrader

 def get_matches(self, file, files):
     matches = [f for f in files if f[-1] == 'ipynb']
     sims = [fuzz.ratio(file[0], m[1]) for m in matches]
     best = sorted(range(len(sims)), key=sims[::-1].__getitem__)
     matches = list(map(lambda i: matches[i], best))
     sims = list(map(lambda i: sims[i], best))
     return matches, sims

示例#8

0

显示文件

文件： search.py 项目： irbraun/oats

def binary_fuzzy_match(pat, txt, threshold, local=1):
    """
	Searches for fuzzy matches to a pattern in a longer string. A fuzzy match does 
	not necessarily need to be a perfect character for character match between a pattern
	and the larger text string, with a tolerance for mismatches controlled by the 
	threhsold parameter. The underlying metric is Levenshtein distance.
	
	Args:
		pat (str): The shorter text to search for.
		
		txt (str): The larger text to search within.
		
		threshold (int): Value between 0 and 1 at which matches are considered real.
		
		local (int, optional): Alignment method, 0 for global 1 for local.
	
	Returns:
		boolean: True if the pattern was found, false if it was not.
	"""
    # Make sure the pattern is smaller than the text.
    if len(pat) > len(txt):
        return (False)
    similarity_score = 0.000
    if local == 1:
        similarity_score = fuzz.partial_ratio(pat, txt)
    else:
        similarity_score = fuzz.ratio(pat, txt)
    if similarity_score >= threshold * 100:
        return (True)
    return (False)

示例#9

0

显示文件

    def get_combined_fuzz_score(self, a, b, mode='geom_mean'):
        a, b = clean_name(a), clean_name(b)

        simple = float(fuzz.ratio(a, b) * self.weight['simple'])
        partial = float(fuzz.partial_ratio(a, b) * self.weight['partial'])

        return self.combine_scores(simple, partial, mode=mode)

示例#10

0

显示文件

def find_similar(search_for, dataset):
    res = []
    for data in dataset:
        res.append(fuzz.ratio(search_for, data))
    i, v = max(enumerate(res), key=operator.itemgetter(1))
    yield dataset[i]
    yield v

示例#11

0

显示文件

文件： difflib.py 项目： JKamlah/tesseractXplore

def subseq_matcher(seq1, seq2):
    """ Match similar lines """
    ls_grid = np.zeros((len(seq1), len(seq2)))
    for subseq1_index, subseq1 in enumerate(seq1):
        for subseq2_index, subseq2 in enumerate(seq2):
            ra = ratio(subseq1, subseq2)
            ls_grid[subseq1_index][subseq2_index] = ra if ra > 30 else 0
    max_val = np.argwhere(ls_grid == np.amax(ls_grid))
    while ls_grid[max_val[0][0]][max_val[0][1]] != 0.0:
        if len(max_val) != 1:
            max_val = [max_val[np.argmin([np.abs(x - y) for x, y in max_val])]]
        ls_grid[:, max_val[0][1]], ls_grid[max_val[0][0], :] = 0, 0
        ls_grid[max_val[0][0]][max_val[0][1]] = -1
        max_val = np.argwhere(ls_grid == np.amax(ls_grid))
    matched_seq = []
    if len(seq1) <= len(seq2):
        for col_id, col in enumerate(ls_grid.T):
            match = np.argwhere(col == -1)
            if len(match) == 0:
                matched_seq.append(["", seq2[col_id]])
            else:
                matched_seq.append([seq1[match[0][0]], seq2[col_id]])
            if col_id < len(seq1) and np.sum(ls_grid[col_id][:]) != -1:
                matched_seq.append([seq1[col_id], ""])
    else:
        for row_id, col in enumerate(ls_grid):
            match = np.argwhere(col == -1)
            if len(match) == 0:
                matched_seq.append([seq1[row_id], ""])
            else:
                matched_seq.append([seq1[row_id], seq2[match[0][0]]])
            if row_id < len(seq2) and np.sum(ls_grid.T[row_id, :]) != -1:
                matched_seq.append(["", seq2[row_id]])
    return matched_seq

示例#12

0

显示文件

文件： local_fuzzing_find_similar_tags.py 项目： alecbw/Find-Similar-Tags

def find_similar_pairs(tags, *, required_similarity=80):
    """
    Find pairs of similar-looking tags in the collection ``tags``.

    Increase ``required_similarity`` for stricter matching (=> less results).
    """
    for t1, t2 in itertools.combinations(sorted(tags), 2):
        if fuzz.ratio(t1, t2) > required_similarity:
            yield (t1, t2)

示例#13

0

显示文件

文件： manufacturer_index.py 项目： ice-bridge/AutoEq

 def search(self, name, threshold=80):
     matches = []
     for manufacturer in self.manufacturers:
         for variant in manufacturer:
             # Search with false name
             ratio = fuzz.ratio(variant.lower(), name.lower())
             if ratio > threshold:
                 matches.append((manufacturer[0], ratio))
     return sorted(matches, key=lambda x: x[1], reverse=True)

示例#14

0

显示文件

    def _assignment_not_found(self, src_path, other_path):
        msg = "Assignment not found at: {}".format(src_path)
        self.log.fatal(msg)
        found = glob.glob(other_path)
        if found:
            scores = sorted([(fuzz.ratio(self.src_path, x), x) for x in found])
            self.log.error("Did you mean: %s", scores[-1][1])

        raise ExchangeError(msg)

示例#15

0

显示文件

 def find_similarity(col1, col2):
     if algo == "rapidfuzz":
         similarity_score = fuzz.ratio(col1, col2)
     elif algo == "editdistance":
         similarity_score = editdistance.eval(col1, col2)
     else:
         is_junk = None
         similarity_score = SequenceMatcher(is_junk, col1, col2).ratio()
     return similarity_score

示例#16

0

显示文件

文件： multilang_similarity.py 项目： P-Programist/Upwork

def calc_order_score(document_keylines, ordered):
    '''
    Levenstein distance approach on lists:
    Encode each element with single letter
    and calculate the distance
    '''
    mapping = {line: chr(i + 100) for i, line in enumerate(document_keylines)}

    document_keylines_decoded = ''.join([mapping[line] for line in document_keylines])
    ordered_decoded = ''.join([mapping[line] for line in ordered])
    return fuzz.ratio(document_keylines_decoded, ordered_decoded)

示例#17

0

显示文件

def get_fuzzy_list(utterances: list, intent_ids: list):
    results = [[utterance, [], 0] for utterance in utterances]
    for i, (utterance, intent_id) in enumerate(zip(utterances, intent_ids)):
        logger.info(f"Processing utterance {i + 1} of {len(utterances)}.")
        for j, choice in enumerate(utterances[i + 1:]):
            if fuzz.ratio(utterance, choice, score_cutoff=90):
                results[i][2] += 1
                results[j + i + 1][2] += 1
                results[i][1].append(f"Row {j + i + 3}: [{intent_ids[j + i + 1]}] {choice}")
                results[j + i + 1][1].append(f"Row {i + 2}: [{intent_ids[i]}] {utterance}")
    return [fuzzy_matches for _, fuzzy_matches, _ in results]

示例#18

0

显示文件

文件： bedrooms.py 项目： ku222/archi-gan

 def _get_caption(self, imgname: str) -> List[str]:
     max_similarity = 0
     match = None
     for imgpath in self.img2caption.keys():
         if imgname in imgpath:
             similarity = ratio(imgname, imgpath)
             if similarity > max_similarity:
                 match = imgpath
                 max_similarity = similarity
     # Lookup best match
     return self.img2caption[match]

示例#19

0

显示文件

文件： test_polyfuzz.py 项目： tap222/PolyFuzz

    def match(self, from_list, to_list):
        # Calculate distances
        matches = [[fuzz.ratio(from_string, to_string) / 100 for to_string in to_list] for from_string in from_list]

        # Get best matches
        mappings = [to_list[index] for index in np.argmax(matches, axis=1)]
        scores = np.max(matches, axis=1)

        # Prepare dataframe
        matches = pd.DataFrame({'From': from_list, 'To': mappings, 'Similarity': scores})
        return matches

示例#20

0

显示文件

文件： string_processor.py 项目： AnyKeyShik/Bismarck

def is_words_similar(string, model):
    """
    Calculates the Levenshtein distance between two strings

    :param string: user input
    :param model: model string
    :return: Is words are similar
    :rtype: bool
    """

    if fuzz.ratio(string, model, score_cutoff=75):
        return True

    return False

示例#21

0

显示文件

def search_by_url(data: dict, url: str, topn: int = 5) -> List[tuple]:
    assert url
    logger.info(f"Searching for url={url}")
    res = []
    url = url.lower()
    for item_id, vals in data["list"].items():
        dest_url = vals.get("resolved_url", "").lower()
        if dest_url:
            score = fuzz.ratio(url, dest_url)
            res.append((score, item_id, vals))
    res = sorted(res, key=lambda x: x[0], reverse=True)[:topn]
    if res and res[0][0] > 95:
        return res[:1]
    return res

示例#22

0

显示文件

def get_best_result(title, configfile, dbfile):
    try:
        sj_results = get(title, configfile, dbfile, sj_only=True)[1]
    except:
        return False
    results = []
    i = len(sj_results)

    j = 0
    while i > 0:
        try:
            q = "result" + str(j + 1000)
            results.append(sj_results.get(q).get('title'))
        except:
            pass
        i -= 1
        j += 1
    best_score = 0
    best_match = 0
    for r in results:
        r = re.sub(r"\s\(.*\)", "", r)
        score = fuzz.ratio(title, r)
        if score > best_score:
            best_score = score
            best_match = i + 1000
        i += 1 + 1000
    best_match = 'result' + str(best_match)
    try:
        best_title = sj_results.get(best_match).get('title')
        if not re.match(r"^" + title.replace(" ", ".") + r".*$", best_title,
                        re.IGNORECASE):
            best_title = False
        best_payload = sj_results.get(best_match).get('payload')
    except:
        best_title = False
    if not best_title:
        logger.debug('Kein Treffer fuer die Suche nach ' + title +
                     '! Suchliste ergänzt.')
        listen = ["List_ContentShows_Shows", "List_ContentAll_Seasons"]
        for liste in listen:
            cont = ListDb(dbfile, liste).retrieve()
            if not cont:
                cont = ""
            if title not in cont:
                ListDb(dbfile, liste).store(title)
            return False
    logger.debug('Bester Treffer fuer die Suche nach ' + title + ' ist ' +
                 best_title)
    return best_payload

示例#23

0

显示文件

	def findItemName(self, itemDictionary, messageItem):

		bestScore = 0
		score = 0
		bestItem = None

		try:
			for itemName, itemLabel in list(itemDictionary.items()):
				score = fuzz.ratio(messageItem, itemLabel, score_cutoff=bestScore)
				if score > bestScore:
					bestScore = score
					bestItem = itemName
		except KeyError:
                    pass

		return bestItem

示例#24

0

显示文件

def match(
    left: pd.Series,
    right: pd.Series,
    preprocess: bool = False,
    fuzzy: bool = False,
    threshold: float = 0.8,
) -> pd.Series:
    """
    Compares values between two different Series to check if they match.

    Parameters
    ----------
    left : Series
        Left Series.
    right : Series
        Right Series.
    preprocess : bool
        Whether to clean and standardize values before comparing them.
    fuzzy : bool
        Whether to compare values using fuzzy logic.
    threshold : float
        Threshold to define equal values using fuzzy logic.

    Returns
    -------
    Series
        Series with booleans indicating whether the values match.

    """
    if preprocess:
        left = standardize_text(left)
        right = standardize_text(right)

    if fuzzy:
        values = pd.DataFrame({"left": left, "right": right})
        values = values.fillna("")
        score = values.apply(lambda row: fuzz.ratio(row["left"], row["right"]),
                             axis=1)
        result = (score / 100) >= threshold
    else:
        result = left == right

    nanmask = right.isna()
    result.loc[nanmask] = np.nan
    result = result.astype("boolean")

    return result

示例#25

0

显示文件

        def get_potential(iterable: Iterable,
                          *,
                          threshold: int = 80) -> list[str]:
            nonlocal name
            potential = []

            for item in iterable:
                original, item = item, item.lower()

                if name == item:
                    return [original]

                a, b = fuzz.ratio(name, item), fuzz.partial_ratio(name, item)
                if a >= threshold or b >= threshold:
                    potential.append(original)

            return potential

示例#26

0

显示文件

文件： utils.py 项目： dgks0n/PhuzzyMatcher

def fuzzy_matcher(features, document, match=None):
    matches = []
    tokens = nltk.word_tokenize(document)
    for feature in features:
        feature_length = len(feature.split(" "))
        for i in range(len(tokens) - feature_length + 1):
            matched_phrase = ""
            j = 0
            for j in range(i, i + feature_length):
                if re.search(r'[,!?{}\[\]]', tokens[j]):
                    break
                matched_phrase = matched_phrase + " " + tokens[j].lower()
            matched_phrase.strip()
            if not matched_phrase == "":
                if fuzz.ratio(matched_phrase, feature.lower()) > match:
                    matches.append([matched_phrase, feature, i, j])
    return matches

示例#27

0

显示文件

文件： multilang_similarity.py 项目： P-Programist/Upwork

def get_possible_sds_count(final_score, meta, last_section_candidates, section3_anchor):
    '''
    Main desicion function.

    Possible SDS/NON SDS calculation and attempt to count concatenated SDSs
    inside big files.

    For relatively huge docs with proper final score lets count probable SDS count.
    We assume these docs as concatenated multi SDS files.
    Approach is simple: count top candidates for last (most representative) keyline
    with really high similarity. As this line can vary,
    we match against several candidates.
    '''
    if final_score < 45:
        # Basic NON SDS Case
        sds_count = 0
    elif final_score >= 45 and meta['page_count'] < MULTI_SDS_MIN_PAGE_COUNT:
        sds_count = 1
    elif final_score >= 45 and meta['page_count'] >= MULTI_SDS_MIN_PAGE_COUNT:
        sds_count = 0  # Because we count all SDSs here
        for position, line in enumerate(meta['all_lines']):
            if section3_anchor in line:
                for candidate_line, min_similarity in last_section_candidates.items():
                    similarity = fuzz.ratio(candidate_line, line)
                    if similarity > min_similarity:
                        # Special cases for bad (but very similar) lines:
                        # Doesnt have quotes in them:
                        # INVALID LINE EXAMPLE: 5.1.3 sds section 3 "composition/information on ingredients"
                        # Doesnt start with specific symbols, like "(" or
                        # "1" (because "11" can be a bad OCR of double quote)
                        # INVALID LINE EXAMPLE: (composition/information on ingredients) .
                        bad_line = (line[0] in ['(', '1']) or ('"' in line)

                        if not bad_line:
                            sds_count += 1
                            continue  # Dont test a line anymore if already matched

        # Edge cases: ratio between page count and sds count cant be very low
        # If we observe 1 or 2 pages per SDS - its definately a layout problem.
        # Good example: B74F61F216D24EB5ABBABA08101EABF6.ashx.pdf, which has
        # all secions repeated as agenda at each page
        if sds_count:
            if meta['page_count'] / sds_count <= 2:
                sds_count = 1

    return sds_count

示例#28

0

显示文件

    def sort_found_entities(
        self,
        candidate_entities: List[Tuple[int, str, int]],
        candidate_names: List[List[str]],
        entity: str,
        context: str = None
    ) -> Tuple[List[str], List[float], List[Tuple[str, str, int, int]]]:
        entities_ratios = []
        for candidate, entity_names in zip(candidate_entities,
                                           candidate_names):
            entity_num, entity_id, num_rels, tokens_matched = candidate
            fuzz_ratio = max(
                [fuzz.ratio(name.lower(), entity) for name in entity_names])
            entities_ratios.append(
                (entity_num, entity_id, tokens_matched, fuzz_ratio, num_rels))

        srtd_with_ratios = sorted(entities_ratios,
                                  key=lambda x: (x[2], x[3], x[4]),
                                  reverse=True)
        if self.use_descriptions:
            log.debug(f"context {context}")
            id_to_score = {
                entity_id: (tokens_matched, score)
                for _, entity_id, tokens_matched, score, _ in
                srtd_with_ratios[:30]
            }
            entity_ids = [
                entity_id for _, entity_id, _, _, _ in srtd_with_ratios[:30]
            ]
            scores = self.entity_ranker.rank_rels(context, entity_ids)
            entities_with_scores = [(entity_id, id_to_score[entity_id][0],
                                     id_to_score[entity_id][1], score)
                                    for entity_id, score in scores]
            entities_with_scores = sorted(entities_with_scores,
                                          key=lambda x: (x[1], x[2], x[3]),
                                          reverse=True)
            entities_with_scores = [entity for entity in entities_with_scores if \
                                   (entity[3] > self.descr_rank_score_thres or entity[2] == 100.0)]
            log.debug(f"entities_with_scores {entities_with_scores[:10]}")
            entity_ids = [entity for entity, _, _, _ in entities_with_scores]
            confidences = [score for _, _, _, score in entities_with_scores]
        else:
            entity_ids = [ent[1] for ent in srtd_with_ratios]
            confidences = [float(ent[2]) * 0.01 for ent in srtd_with_ratios]

        return entity_ids, confidences, srtd_with_ratios

示例#29

0

显示文件

def search_by_title(data: dict, title: str, topn: int = 5) -> List[tuple]:
    assert title
    logger.info(f"Searching for title={title}")
    res = []
    title = title.lower()
    for item_id, vals in data["list"].items():
        dest_title = vals.get("resolved_title", "").lower()
        if dest_title:
            score = fuzz.ratio(title, dest_title)
            res.append((score, item_id, vals))
        # if dest_title == title:
        #     score = 100
        #     res.append((score, item_id, vals))
    res = sorted(res, key=lambda x: x[0], reverse=True)[:topn]
    if res and res[0][0] > 95:
        return res[:1]
    return res

示例#30

0

显示文件

文件： __init__.py 项目： domcross/fhem-skill

    def handle_presence_intent(self, message):
        self._setup()
        if self.fhem is None:
            self.speak_dialog('fhem.error.setup')
            return
        wanted = message.data["entity"]
        LOG.debug("wanted: %s" % wanted)

        try:
            roommates = self.fhem.get(room=self.allowed_devices_room,
                                      device_type='ROOMMATE')
        except ConnectionError:
            self.speak_dialog('fhem.error.offline')
            return

        if len(roommates) < 1:
            self.speak_dialog('fhem.presence.error')
            return

        presence = None
        bestRatio = 66

        for rm in roommates:
            if 'rr_realname' in rm['Attributes'].keys():
                realname = rm['Attributes'][rm['Attributes']['rr_realname']]
                LOG.debug("realname: %s" % realname)
                ratio = fuzz.ratio(wanted.lower(),
                                   realname.lower(),
                                   score_cutoff=bestRatio)
                LOG.debug("ratio: %s" % ratio)
                if ratio > bestRatio:
                    presence = rm['Readings']['presence']['Value']
                    bestName = realname
                    bestRatio = ratio

        presence_values = self.translate_namedvalues('presence.value')
        if presence:
            location = presence_values[presence]
            self.speak_dialog('fhem.presence.found',
                              data={
                                  'wanted': bestName,
                                  'location': location
                              })
        else:
            self.speak_dialog('fhem.presence.error')