Exemplos de ratio em Python, exemplos de rapidfuzz.fuzz.ratio em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: liechtenstein.py Projeto: dshix/python-salesforce

def get_best_matches(text,
                     to_compare,
                     top_n=5,
                     case_sensitive=True,
                     include_percentage=False):
    if case_sensitive: compare = lambda a, b: fuzz.ratio(a, b)
    else: compare = lambda a, b: fuzz.ratio(a.lower(), b.lower())
    result = [(compare(text, s), s) for s in to_compare]
    result.sort(key=lambda x: x[0], reverse=True)
    return_lambda = lambda tup: tup if include_percentage else tup[1]
    return [return_lambda(tup) for tup in result[0:top_n]]

Exemplo n.º 2

0

Exibir arquivo

Arquivo: multilang_similarity.py Projeto: P-Programist/Upwork

def get_candidate_lines(keylines, lines, meta):
    """
    For each key line get the candidate
    line from file lines with similarity metrics
    and line position
    """
    results = []

    # Heuristic 1: limit the search scope with the best candidate
    # for the LAST reference line
    last_line_candidates = []

    # Find the best candidate
    for position, line in enumerate(lines):
        similarity = fuzz.ratio(keylines[-1]['line'], line)
        last_line_candidates.append((similarity, position, line))

    cand_score, cand_pos, cand_line = max(last_line_candidates)

    # if it is not a random match (at least 55%)
    # and it cointains number "3", limit the search scope
    # (because in rare cases sections 2 and 3 are swapped)
    if (cand_score > 55 and '3' in cand_line):
        search_scope = lines[:cand_pos + 1]  # +1 because the line itself is very valuable for matching
    else:
        search_scope = lines

    # Heuristic 2: for very short documents (2-5 pages)
    # there often will be only sections with brief info
    # no adresses and detailed descriptions,
    # so we dont take some lines into account
    if 1 < meta['page_count'] < 5:
        keyline_scope = [kl for kl in keylines if not kl['helper_line']]
    else:
        keyline_scope = keylines

    # Now pick the best candidate for each reference line
    for keyline in keyline_scope:
        candidates = []

        for position, line in enumerate(search_scope):
            similarity = fuzz.ratio(keyline['line'], line)
            candidates.append({
                'line': line,
                'similarity': similarity,
                'position': position
            })

        best_match = max(candidates, key=lambda c: c['similarity'])
        results.append({'keyline': keyline, **best_match})

    return results

Exemplo n.º 3

0

Exibir arquivo

Arquivo: pride_leader.py Projeto: AwesomeGitHubRepos/sir-lancebot

    def invalid_embed_generate(self, pride_leader: str) -> discord.Embed:
        """
        Generates Invalid Embed.

        The invalid embed contains a list of closely matched names of the invalid pride
        leader the user gave. If no closely matched names are found it would list all
        the available pride leader names.

        Wikipedia is a useful place to learn about pride leaders and we don't have all
        the pride leaders, so the bot would add a field containing the wikipedia
        command to execute.
        """
        embed = discord.Embed(color=constants.Colours.soft_red)
        valid_names = []
        pride_leader = pride_leader.title()
        for name in PRIDE_RESOURCE:
            if fuzz.ratio(pride_leader, name) >= MINIMUM_FUZZ_RATIO:
                valid_names.append(name)

        if not valid_names:
            valid_names = ", ".join(PRIDE_RESOURCE)
            error_msg = "Sorry your input didn't match any stored names, here is a list of available names:"
        else:
            valid_names = "\n".join(valid_names)
            error_msg = "Did you mean?"

        embed.description = f"{error_msg}\n```\n{valid_names}\n```"
        embed.set_footer(
            text="To add more pride leaders, feel free to open a pull request!"
        )

        return embed

Exemplo n.º 4

0

Exibir arquivo

Arquivo: base.py Projeto: tmetzl/nbgrader

    def init_notebooks(self) -> None:
        self.assignments = {}
        self.notebooks = []
        assignment_glob = self._format_source(self.coursedir.assignment_id,
                                              self.coursedir.student_id)
        for assignment in glob.glob(assignment_glob):
            notebook_glob = os.path.join(assignment,
                                         self.coursedir.notebook_id + ".ipynb")
            found = glob.glob(notebook_glob)
            if len(found) == 0:
                self.log.warning("No notebooks were matched by '%s'",
                                 notebook_glob)
                continue
            self.assignments[assignment] = found

        if len(self.assignments) == 0:
            msg = "No notebooks were matched by '%s'" % assignment_glob
            self.log.error(msg)

            assignment_glob2 = self._format_source("*",
                                                   self.coursedir.student_id)
            found = glob.glob(assignment_glob2)
            if found:
                scores = sorted([(fuzz.ratio(assignment_glob, x), x)
                                 for x in found])
                self.log.error("Did you mean: %s", scores[-1][1])

            raise NbGraderException(msg)

Exemplo n.º 5

0

Exibir arquivo

    def has_wake_word(self, phrase):
        phrase_parts = phrase.split()

        test_word = False
        start_index = 0
        retn = False

        if len(phrase_parts) == 1:
            test_word = phrase_parts[0]
            self.heard = ""

        elif len(phrase_parts) > 1:
            prefixes = ["ok", "hey"]

            test_word = False

            first_word, second_word = phrase_parts[0:2]
            extracted_processes = process.extract(first_word, prefixes)
            for extracted_process in extracted_processes:
                if extracted_process[1] > 80:
                    test_word = second_word
                    start_index = 2

            if not test_word:
                test_word = first_word
                start_index = 1

        if test_word and isinstance(test_word, str):
            fuzzed = fuzz.ratio(test_word.lower(), self.wake_word.lower())
            retn = fuzzed >= 80

        if retn:
            self.heard = " ".join(phrase_parts[start_index::])

        return retn

Exemplo n.º 6

0

Exibir arquivo

def get_matched_entries(s, field_values, m_theta=0.85, s_theta=0.85):
    if not field_values:
        return None

    if isinstance(s, str):
        n_grams = split(s)
    else:
        n_grams = s

    matched = dict()
    for field_value in field_values:
        if not isinstance(field_value, string_types):
            continue
        fv_tokens = split(field_value)
        sm = difflib.SequenceMatcher(None, n_grams, fv_tokens)
        match = sm.find_longest_match(0, len(n_grams), 0, len(fv_tokens))
        if match.size > 0:
            source_match = get_effecitve_match_source(n_grams, match.a,
                                                      match.a + match.size)
            if source_match and source_match.size > 1:
                match_str = field_value[match.b:match.b + match.size]
                source_match_str = s[source_match.start:source_match.start +
                                     source_match.size]
                c_match_str = match_str.lower().strip()
                c_source_match_str = source_match_str.lower().strip()
                c_field_value = field_value.lower().strip()
                if c_match_str and not utils.is_number(
                        c_match_str) and not utils.is_common_db_term(
                            c_match_str):
                    if utils.is_stopword(c_match_str) or utils.is_stopword(c_source_match_str) or \
                            utils.is_stopword(c_field_value):
                        continue
                    if c_source_match_str.endswith(c_match_str + '\'s'):
                        match_score = 1.0
                    else:
                        if prefix_match(c_field_value, c_source_match_str):
                            match_score = fuzz.ratio(c_field_value,
                                                     c_source_match_str) / 100
                        else:
                            match_score = 0
                    if (utils.is_commonword(c_match_str)
                            or utils.is_commonword(c_source_match_str)
                            or utils.is_commonword(c_field_value)
                        ) and match_score < 1:
                        continue
                    s_match_score = match_score
                    if match_score >= m_theta and s_match_score >= s_theta:
                        if field_value.isupper(
                        ) and match_score * s_match_score < 1:
                            continue
                        matched[match_str] = (field_value, source_match_str,
                                              match_score, s_match_score,
                                              match.size)

    if not matched:
        return None
    else:
        return sorted(matched.items(),
                      key=lambda x: (1e16 * x[1][2] + 1e8 * x[1][3] + x[1][4]),
                      reverse=True)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: renamenotebooks.py Projeto: DigiKlausur/ilias2nbgrader

 def get_matches(self, file, files):
     matches = [f for f in files if f[-1] == 'ipynb']
     sims = [fuzz.ratio(file[0], m[1]) for m in matches]
     best = sorted(range(len(sims)), key=sims[::-1].__getitem__)
     matches = list(map(lambda i: matches[i], best))
     sims = list(map(lambda i: sims[i], best))
     return matches, sims

Exemplo n.º 8

0

Exibir arquivo

Arquivo: search.py Projeto: irbraun/oats

def binary_fuzzy_match(pat, txt, threshold, local=1):
    """
	Searches for fuzzy matches to a pattern in a longer string. A fuzzy match does 
	not necessarily need to be a perfect character for character match between a pattern
	and the larger text string, with a tolerance for mismatches controlled by the 
	threhsold parameter. The underlying metric is Levenshtein distance.
	
	Args:
		pat (str): The shorter text to search for.
		
		txt (str): The larger text to search within.
		
		threshold (int): Value between 0 and 1 at which matches are considered real.
		
		local (int, optional): Alignment method, 0 for global 1 for local.
	
	Returns:
		boolean: True if the pattern was found, false if it was not.
	"""
    # Make sure the pattern is smaller than the text.
    if len(pat) > len(txt):
        return (False)
    similarity_score = 0.000
    if local == 1:
        similarity_score = fuzz.partial_ratio(pat, txt)
    else:
        similarity_score = fuzz.ratio(pat, txt)
    if similarity_score >= threshold * 100:
        return (True)
    return (False)

Exemplo n.º 9

0

Exibir arquivo

    def get_combined_fuzz_score(self, a, b, mode='geom_mean'):
        a, b = clean_name(a), clean_name(b)

        simple = float(fuzz.ratio(a, b) * self.weight['simple'])
        partial = float(fuzz.partial_ratio(a, b) * self.weight['partial'])

        return self.combine_scores(simple, partial, mode=mode)

Exemplo n.º 10

0

Exibir arquivo

def find_similar(search_for, dataset):
    res = []
    for data in dataset:
        res.append(fuzz.ratio(search_for, data))
    i, v = max(enumerate(res), key=operator.itemgetter(1))
    yield dataset[i]
    yield v

Exemplo n.º 11

0

Exibir arquivo

Arquivo: difflib.py Projeto: JKamlah/tesseractXplore

def subseq_matcher(seq1, seq2):
    """ Match similar lines """
    ls_grid = np.zeros((len(seq1), len(seq2)))
    for subseq1_index, subseq1 in enumerate(seq1):
        for subseq2_index, subseq2 in enumerate(seq2):
            ra = ratio(subseq1, subseq2)
            ls_grid[subseq1_index][subseq2_index] = ra if ra > 30 else 0
    max_val = np.argwhere(ls_grid == np.amax(ls_grid))
    while ls_grid[max_val[0][0]][max_val[0][1]] != 0.0:
        if len(max_val) != 1:
            max_val = [max_val[np.argmin([np.abs(x - y) for x, y in max_val])]]
        ls_grid[:, max_val[0][1]], ls_grid[max_val[0][0], :] = 0, 0
        ls_grid[max_val[0][0]][max_val[0][1]] = -1
        max_val = np.argwhere(ls_grid == np.amax(ls_grid))
    matched_seq = []
    if len(seq1) <= len(seq2):
        for col_id, col in enumerate(ls_grid.T):
            match = np.argwhere(col == -1)
            if len(match) == 0:
                matched_seq.append(["", seq2[col_id]])
            else:
                matched_seq.append([seq1[match[0][0]], seq2[col_id]])
            if col_id < len(seq1) and np.sum(ls_grid[col_id][:]) != -1:
                matched_seq.append([seq1[col_id], ""])
    else:
        for row_id, col in enumerate(ls_grid):
            match = np.argwhere(col == -1)
            if len(match) == 0:
                matched_seq.append([seq1[row_id], ""])
            else:
                matched_seq.append([seq1[row_id], seq2[match[0][0]]])
            if row_id < len(seq2) and np.sum(ls_grid.T[row_id, :]) != -1:
                matched_seq.append(["", seq2[row_id]])
    return matched_seq

Exemplo n.º 12

0

Exibir arquivo

Arquivo: local_fuzzing_find_similar_tags.py Projeto: alecbw/Find-Similar-Tags

def find_similar_pairs(tags, *, required_similarity=80):
    """
    Find pairs of similar-looking tags in the collection ``tags``.

    Increase ``required_similarity`` for stricter matching (=> less results).
    """
    for t1, t2 in itertools.combinations(sorted(tags), 2):
        if fuzz.ratio(t1, t2) > required_similarity:
            yield (t1, t2)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: manufacturer_index.py Projeto: ice-bridge/AutoEq

 def search(self, name, threshold=80):
     matches = []
     for manufacturer in self.manufacturers:
         for variant in manufacturer:
             # Search with false name
             ratio = fuzz.ratio(variant.lower(), name.lower())
             if ratio > threshold:
                 matches.append((manufacturer[0], ratio))
     return sorted(matches, key=lambda x: x[1], reverse=True)

Exemplo n.º 14

0

Exibir arquivo

    def _assignment_not_found(self, src_path, other_path):
        msg = "Assignment not found at: {}".format(src_path)
        self.log.fatal(msg)
        found = glob.glob(other_path)
        if found:
            scores = sorted([(fuzz.ratio(self.src_path, x), x) for x in found])
            self.log.error("Did you mean: %s", scores[-1][1])

        raise ExchangeError(msg)

Exemplo n.º 15

0

Exibir arquivo

 def find_similarity(col1, col2):
     if algo == "rapidfuzz":
         similarity_score = fuzz.ratio(col1, col2)
     elif algo == "editdistance":
         similarity_score = editdistance.eval(col1, col2)
     else:
         is_junk = None
         similarity_score = SequenceMatcher(is_junk, col1, col2).ratio()
     return similarity_score

Exemplo n.º 16

0

Exibir arquivo

Arquivo: multilang_similarity.py Projeto: P-Programist/Upwork

def calc_order_score(document_keylines, ordered):
    '''
    Levenstein distance approach on lists:
    Encode each element with single letter
    and calculate the distance
    '''
    mapping = {line: chr(i + 100) for i, line in enumerate(document_keylines)}

    document_keylines_decoded = ''.join([mapping[line] for line in document_keylines])
    ordered_decoded = ''.join([mapping[line] for line in ordered])
    return fuzz.ratio(document_keylines_decoded, ordered_decoded)

Exemplo n.º 17

0

Exibir arquivo

def get_fuzzy_list(utterances: list, intent_ids: list):
    results = [[utterance, [], 0] for utterance in utterances]
    for i, (utterance, intent_id) in enumerate(zip(utterances, intent_ids)):
        logger.info(f"Processing utterance {i + 1} of {len(utterances)}.")
        for j, choice in enumerate(utterances[i + 1:]):
            if fuzz.ratio(utterance, choice, score_cutoff=90):
                results[i][2] += 1
                results[j + i + 1][2] += 1
                results[i][1].append(f"Row {j + i + 3}: [{intent_ids[j + i + 1]}] {choice}")
                results[j + i + 1][1].append(f"Row {i + 2}: [{intent_ids[i]}] {utterance}")
    return [fuzzy_matches for _, fuzzy_matches, _ in results]

Exemplo n.º 18

0

Exibir arquivo

Arquivo: bedrooms.py Projeto: ku222/archi-gan

 def _get_caption(self, imgname: str) -> List[str]:
     max_similarity = 0
     match = None
     for imgpath in self.img2caption.keys():
         if imgname in imgpath:
             similarity = ratio(imgname, imgpath)
             if similarity > max_similarity:
                 match = imgpath
                 max_similarity = similarity
     # Lookup best match
     return self.img2caption[match]

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_polyfuzz.py Projeto: tap222/PolyFuzz

    def match(self, from_list, to_list):
        # Calculate distances
        matches = [[fuzz.ratio(from_string, to_string) / 100 for to_string in to_list] for from_string in from_list]

        # Get best matches
        mappings = [to_list[index] for index in np.argmax(matches, axis=1)]
        scores = np.max(matches, axis=1)

        # Prepare dataframe
        matches = pd.DataFrame({'From': from_list, 'To': mappings, 'Similarity': scores})
        return matches

Exemplo n.º 20

0

Exibir arquivo

Arquivo: string_processor.py Projeto: AnyKeyShik/Bismarck

def is_words_similar(string, model):
    """
    Calculates the Levenshtein distance between two strings

    :param string: user input
    :param model: model string
    :return: Is words are similar
    :rtype: bool
    """

    if fuzz.ratio(string, model, score_cutoff=75):
        return True

    return False

Exemplo n.º 21

0

Exibir arquivo

def search_by_url(data: dict, url: str, topn: int = 5) -> List[tuple]:
    assert url
    logger.info(f"Searching for url={url}")
    res = []
    url = url.lower()
    for item_id, vals in data["list"].items():
        dest_url = vals.get("resolved_url", "").lower()
        if dest_url:
            score = fuzz.ratio(url, dest_url)
            res.append((score, item_id, vals))
    res = sorted(res, key=lambda x: x[0], reverse=True)[:topn]
    if res and res[0][0] > 95:
        return res[:1]
    return res

Exemplo n.º 22

0

Exibir arquivo

def get_best_result(title, configfile, dbfile):
    try:
        sj_results = get(title, configfile, dbfile, sj_only=True)[1]
    except:
        return False
    results = []
    i = len(sj_results)

    j = 0
    while i > 0:
        try:
            q = "result" + str(j + 1000)
            results.append(sj_results.get(q).get('title'))
        except:
            pass
        i -= 1
        j += 1
    best_score = 0
    best_match = 0
    for r in results:
        r = re.sub(r"\s\(.*\)", "", r)
        score = fuzz.ratio(title, r)
        if score > best_score:
            best_score = score
            best_match = i + 1000
        i += 1 + 1000
    best_match = 'result' + str(best_match)
    try:
        best_title = sj_results.get(best_match).get('title')
        if not re.match(r"^" + title.replace(" ", ".") + r".*$", best_title,
                        re.IGNORECASE):
            best_title = False
        best_payload = sj_results.get(best_match).get('payload')
    except:
        best_title = False
    if not best_title:
        logger.debug('Kein Treffer fuer die Suche nach ' + title +
                     '! Suchliste ergänzt.')
        listen = ["List_ContentShows_Shows", "List_ContentAll_Seasons"]
        for liste in listen:
            cont = ListDb(dbfile, liste).retrieve()
            if not cont:
                cont = ""
            if title not in cont:
                ListDb(dbfile, liste).store(title)
            return False
    logger.debug('Bester Treffer fuer die Suche nach ' + title + ' ist ' +
                 best_title)
    return best_payload

Exemplo n.º 23

0

Exibir arquivo

	def findItemName(self, itemDictionary, messageItem):

		bestScore = 0
		score = 0
		bestItem = None

		try:
			for itemName, itemLabel in list(itemDictionary.items()):
				score = fuzz.ratio(messageItem, itemLabel, score_cutoff=bestScore)
				if score > bestScore:
					bestScore = score
					bestItem = itemName
		except KeyError:
                    pass

		return bestItem

Exemplo n.º 24

0

Exibir arquivo

def match(
    left: pd.Series,
    right: pd.Series,
    preprocess: bool = False,
    fuzzy: bool = False,
    threshold: float = 0.8,
) -> pd.Series:
    """
    Compares values between two different Series to check if they match.

    Parameters
    ----------
    left : Series
        Left Series.
    right : Series
        Right Series.
    preprocess : bool
        Whether to clean and standardize values before comparing them.
    fuzzy : bool
        Whether to compare values using fuzzy logic.
    threshold : float
        Threshold to define equal values using fuzzy logic.

    Returns
    -------
    Series
        Series with booleans indicating whether the values match.

    """
    if preprocess:
        left = standardize_text(left)
        right = standardize_text(right)

    if fuzzy:
        values = pd.DataFrame({"left": left, "right": right})
        values = values.fillna("")
        score = values.apply(lambda row: fuzz.ratio(row["left"], row["right"]),
                             axis=1)
        result = (score / 100) >= threshold
    else:
        result = left == right

    nanmask = right.isna()
    result.loc[nanmask] = np.nan
    result = result.astype("boolean")

    return result

Exemplo n.º 25

0

Exibir arquivo

        def get_potential(iterable: Iterable,
                          *,
                          threshold: int = 80) -> list[str]:
            nonlocal name
            potential = []

            for item in iterable:
                original, item = item, item.lower()

                if name == item:
                    return [original]

                a, b = fuzz.ratio(name, item), fuzz.partial_ratio(name, item)
                if a >= threshold or b >= threshold:
                    potential.append(original)

            return potential

Exemplo n.º 26

0

Exibir arquivo

Arquivo: utils.py Projeto: dgks0n/PhuzzyMatcher

def fuzzy_matcher(features, document, match=None):
    matches = []
    tokens = nltk.word_tokenize(document)
    for feature in features:
        feature_length = len(feature.split(" "))
        for i in range(len(tokens) - feature_length + 1):
            matched_phrase = ""
            j = 0
            for j in range(i, i + feature_length):
                if re.search(r'[,!?{}\[\]]', tokens[j]):
                    break
                matched_phrase = matched_phrase + " " + tokens[j].lower()
            matched_phrase.strip()
            if not matched_phrase == "":
                if fuzz.ratio(matched_phrase, feature.lower()) > match:
                    matches.append([matched_phrase, feature, i, j])
    return matches

Exemplo n.º 27

0

Exibir arquivo

Arquivo: multilang_similarity.py Projeto: P-Programist/Upwork

def get_possible_sds_count(final_score, meta, last_section_candidates, section3_anchor):
    '''
    Main desicion function.

    Possible SDS/NON SDS calculation and attempt to count concatenated SDSs
    inside big files.

    For relatively huge docs with proper final score lets count probable SDS count.
    We assume these docs as concatenated multi SDS files.
    Approach is simple: count top candidates for last (most representative) keyline
    with really high similarity. As this line can vary,
    we match against several candidates.
    '''
    if final_score < 45:
        # Basic NON SDS Case
        sds_count = 0
    elif final_score >= 45 and meta['page_count'] < MULTI_SDS_MIN_PAGE_COUNT:
        sds_count = 1
    elif final_score >= 45 and meta['page_count'] >= MULTI_SDS_MIN_PAGE_COUNT:
        sds_count = 0  # Because we count all SDSs here
        for position, line in enumerate(meta['all_lines']):
            if section3_anchor in line:
                for candidate_line, min_similarity in last_section_candidates.items():
                    similarity = fuzz.ratio(candidate_line, line)
                    if similarity > min_similarity:
                        # Special cases for bad (but very similar) lines:
                        # Doesnt have quotes in them:
                        # INVALID LINE EXAMPLE: 5.1.3 sds section 3 "composition/information on ingredients"
                        # Doesnt start with specific symbols, like "(" or
                        # "1" (because "11" can be a bad OCR of double quote)
                        # INVALID LINE EXAMPLE: (composition/information on ingredients) .
                        bad_line = (line[0] in ['(', '1']) or ('"' in line)

                        if not bad_line:
                            sds_count += 1
                            continue  # Dont test a line anymore if already matched

        # Edge cases: ratio between page count and sds count cant be very low
        # If we observe 1 or 2 pages per SDS - its definately a layout problem.
        # Good example: B74F61F216D24EB5ABBABA08101EABF6.ashx.pdf, which has
        # all secions repeated as agenda at each page
        if sds_count:
            if meta['page_count'] / sds_count <= 2:
                sds_count = 1

    return sds_count

Exemplo n.º 28

0

Exibir arquivo

    def sort_found_entities(
        self,
        candidate_entities: List[Tuple[int, str, int]],
        candidate_names: List[List[str]],
        entity: str,
        context: str = None
    ) -> Tuple[List[str], List[float], List[Tuple[str, str, int, int]]]:
        entities_ratios = []
        for candidate, entity_names in zip(candidate_entities,
                                           candidate_names):
            entity_num, entity_id, num_rels, tokens_matched = candidate
            fuzz_ratio = max(
                [fuzz.ratio(name.lower(), entity) for name in entity_names])
            entities_ratios.append(
                (entity_num, entity_id, tokens_matched, fuzz_ratio, num_rels))

        srtd_with_ratios = sorted(entities_ratios,
                                  key=lambda x: (x[2], x[3], x[4]),
                                  reverse=True)
        if self.use_descriptions:
            log.debug(f"context {context}")
            id_to_score = {
                entity_id: (tokens_matched, score)
                for _, entity_id, tokens_matched, score, _ in
                srtd_with_ratios[:30]
            }
            entity_ids = [
                entity_id for _, entity_id, _, _, _ in srtd_with_ratios[:30]
            ]
            scores = self.entity_ranker.rank_rels(context, entity_ids)
            entities_with_scores = [(entity_id, id_to_score[entity_id][0],
                                     id_to_score[entity_id][1], score)
                                    for entity_id, score in scores]
            entities_with_scores = sorted(entities_with_scores,
                                          key=lambda x: (x[1], x[2], x[3]),
                                          reverse=True)
            entities_with_scores = [entity for entity in entities_with_scores if \
                                   (entity[3] > self.descr_rank_score_thres or entity[2] == 100.0)]
            log.debug(f"entities_with_scores {entities_with_scores[:10]}")
            entity_ids = [entity for entity, _, _, _ in entities_with_scores]
            confidences = [score for _, _, _, score in entities_with_scores]
        else:
            entity_ids = [ent[1] for ent in srtd_with_ratios]
            confidences = [float(ent[2]) * 0.01 for ent in srtd_with_ratios]

        return entity_ids, confidences, srtd_with_ratios

Exemplo n.º 29

0

Exibir arquivo

def search_by_title(data: dict, title: str, topn: int = 5) -> List[tuple]:
    assert title
    logger.info(f"Searching for title={title}")
    res = []
    title = title.lower()
    for item_id, vals in data["list"].items():
        dest_title = vals.get("resolved_title", "").lower()
        if dest_title:
            score = fuzz.ratio(title, dest_title)
            res.append((score, item_id, vals))
        # if dest_title == title:
        #     score = 100
        #     res.append((score, item_id, vals))
    res = sorted(res, key=lambda x: x[0], reverse=True)[:topn]
    if res and res[0][0] > 95:
        return res[:1]
    return res

Exemplo n.º 30

0

Exibir arquivo

Arquivo: __init__.py Projeto: domcross/fhem-skill

    def handle_presence_intent(self, message):
        self._setup()
        if self.fhem is None:
            self.speak_dialog('fhem.error.setup')
            return
        wanted = message.data["entity"]
        LOG.debug("wanted: %s" % wanted)

        try:
            roommates = self.fhem.get(room=self.allowed_devices_room,
                                      device_type='ROOMMATE')
        except ConnectionError:
            self.speak_dialog('fhem.error.offline')
            return

        if len(roommates) < 1:
            self.speak_dialog('fhem.presence.error')
            return

        presence = None
        bestRatio = 66

        for rm in roommates:
            if 'rr_realname' in rm['Attributes'].keys():
                realname = rm['Attributes'][rm['Attributes']['rr_realname']]
                LOG.debug("realname: %s" % realname)
                ratio = fuzz.ratio(wanted.lower(),
                                   realname.lower(),
                                   score_cutoff=bestRatio)
                LOG.debug("ratio: %s" % ratio)
                if ratio > bestRatio:
                    presence = rm['Readings']['presence']['Value']
                    bestName = realname
                    bestRatio = ratio

        presence_values = self.translate_namedvalues('presence.value')
        if presence:
            location = presence_values[presence]
            self.speak_dialog('fhem.presence.found',
                              data={
                                  'wanted': bestName,
                                  'location': location
                              })
        else:
            self.speak_dialog('fhem.presence.error')