Пример #1
0
def test_make_fuzzy_should_remove_letter_if_world_is_long():
    assert 'mt' not in make_fuzzy('mot')
    assert 'rain' in make_fuzzy('train')
    assert 'tain' in make_fuzzy('train')
    assert 'trin' in make_fuzzy('train')
    assert 'tran' in make_fuzzy('train')
    assert 'trai' in make_fuzzy('train')
Пример #2
0
def test_make_fuzzy_should_remove_letter_if_world_is_long():
    assert 'mt' not in make_fuzzy('mot')
    assert 'rain' in make_fuzzy('train')
    assert 'tain' in make_fuzzy('train')
    assert 'trin' in make_fuzzy('train')
    assert 'tran' in make_fuzzy('train')
    assert 'trai' in make_fuzzy('train')
Пример #3
0
def try_fuzzy(helper, tokens, include_common=True):
    if not helper.bucket_dry or not tokens:
        return
    helper.debug('Fuzzy on. Trying with %s.', tokens)
    tokens.sort(key=lambda t: len(t), reverse=True)
    allkeys = helper.keys[:]
    if include_common:
        # As we are in fuzzy, try to narrow as much as possible by adding
        # unused commons tokens.
        common = [t for t in helper.common if t.db_key not in helper.keys]
        allkeys.extend([t.db_key for t in common])
    for try_one in tokens:
        if helper.bucket_full:
            break
        keys = allkeys[:]
        if try_one.db_key in keys:
            keys.remove(try_one.db_key)
        if try_one.isdigit():
            continue
        helper.debug('Going fuzzy with %s', try_one)
        neighbors = make_fuzzy(try_one, max=helper.fuzzy)
        if len(keys):
            # Only retains tokens that have been seen in the index at least
            # once with the other tokens.
            DB.sadd(helper.query, *neighbors)
            interkeys = [pair_key(k[2:]) for k in keys]
            interkeys.append(helper.query)
            fuzzy_words = DB.sinter(interkeys)
            DB.delete(helper.query)
            # Keep the priority we gave in building fuzzy terms (inversion
            # first, then substitution, etc.).
            fuzzy_words = [w.decode() for w in fuzzy_words]
            fuzzy_words.sort(key=lambda x: neighbors.index(x))
        else:
            # The token we are considering is alone.
            fuzzy_words = []
            for neighbor in neighbors:
                key = dbkeys.token_key(neighbor)
                count = DB.zcard(key)
                if count:
                    fuzzy_words.append(neighbor)
        helper.debug('Found fuzzy candidates %s', fuzzy_words)
        fuzzy_keys = [dbkeys.token_key(w) for w in fuzzy_words]
        for key in fuzzy_keys:
            if helper.bucket_dry:
                helper.add_to_bucket(keys + [key])
Пример #4
0
def test_make_fuzzy_should_extend_term():
    expected = set([
        'omt', 'mto', 'amot', 'maot', 'moat', 'mota', 'bmot', 'mbot', 'mobt',
        'motb', 'cmot', 'mcot', 'moct', 'motc', 'dmot', 'mdot', 'modt', 'motd',
        'emot', 'meot', 'moet', 'mote', 'fmot', 'mfot', 'moft', 'motf', 'gmot',
        'mgot', 'mogt', 'motg', 'hmot', 'mhot', 'moht', 'moth', 'imot', 'miot',
        'moit', 'moti', 'jmot', 'mjot', 'mojt', 'motj', 'kmot', 'mkot', 'mokt',
        'motk', 'lmot', 'mlot', 'molt', 'motl', 'mmot', 'mmot', 'momt', 'motm',
        'nmot', 'mnot', 'mont', 'motn', 'omot', 'moot', 'moot', 'moto', 'pmot',
        'mpot', 'mopt', 'motp', 'qmot', 'mqot', 'moqt', 'motq', 'rmot', 'mrot',
        'mort', 'motr', 'smot', 'msot', 'most', 'mots', 'tmot', 'mtot', 'mott',
        'mott', 'umot', 'muot', 'mout', 'motu', 'vmot', 'mvot', 'movt', 'motv',
        'wmot', 'mwot', 'mowt', 'motw', 'xmot', 'mxot', 'moxt', 'motx', 'ymot',
        'myot', 'moyt', 'moty', 'zmot', 'mzot', 'mozt', 'motz', 'aot', 'mat',
        'moa', 'bot', 'mbt', 'mob', 'cot', 'mct', 'moc', 'dot', 'mdt', 'mod',
        'eot', 'met', 'moe', 'fot', 'mft', 'mof', 'got', 'mgt', 'mog', 'hot',
        'mht', 'moh', 'iot', 'mit', 'moi', 'jot', 'mjt', 'moj', 'kot', 'mkt',
        'mok', 'lot', 'mlt', 'mol', 'mmt', 'mom', 'not', 'mnt', 'mon',
        'oot', 'moo', 'pot', 'mpt', 'mop', 'qot', 'mqt', 'moq', 'rot',
        'mrt', 'mor', 'sot', 'mst', 'mos', 'tot', 'mtt', 'uot', 'mut',
        'mou', 'vot', 'mvt', 'mov', 'wot', 'mwt', 'mow', 'xot', 'mxt', 'mox',
        'yot', 'myt', 'moy', 'zot', 'mzt', 'moz',
    ])
    assert set(make_fuzzy('mot')) == expected
Пример #5
0
def test_make_fuzzy_should_extend_term():
    expected = set([
        'omt', 'mto', 'amot', 'maot', 'moat', 'mota', 'bmot', 'mbot', 'mobt',
        'motb', 'cmot', 'mcot', 'moct', 'motc', 'dmot', 'mdot', 'modt', 'motd',
        'emot', 'meot', 'moet', 'mote', 'fmot', 'mfot', 'moft', 'motf', 'gmot',
        'mgot', 'mogt', 'motg', 'hmot', 'mhot', 'moht', 'moth', 'imot', 'miot',
        'moit', 'moti', 'jmot', 'mjot', 'mojt', 'motj', 'kmot', 'mkot', 'mokt',
        'motk', 'lmot', 'mlot', 'molt', 'motl', 'mmot', 'mmot', 'momt', 'motm',
        'nmot', 'mnot', 'mont', 'motn', 'omot', 'moot', 'moot', 'moto', 'pmot',
        'mpot', 'mopt', 'motp', 'qmot', 'mqot', 'moqt', 'motq', 'rmot', 'mrot',
        'mort', 'motr', 'smot', 'msot', 'most', 'mots', 'tmot', 'mtot', 'mott',
        'mott', 'umot', 'muot', 'mout', 'motu', 'vmot', 'mvot', 'movt', 'motv',
        'wmot', 'mwot', 'mowt', 'motw', 'xmot', 'mxot', 'moxt', 'motx', 'ymot',
        'myot', 'moyt', 'moty', 'zmot', 'mzot', 'mozt', 'motz', 'aot', 'mat',
        'moa', 'bot', 'mbt', 'mob', 'cot', 'mct', 'moc', 'dot', 'mdt', 'mod',
        'eot', 'met', 'moe', 'fot', 'mft', 'mof', 'got', 'mgt', 'mog', 'hot',
        'mht', 'moh', 'iot', 'mit', 'moi', 'jot', 'mjt', 'moj', 'kot', 'mkt',
        'mok', 'lot', 'mlt', 'mol', 'mmt', 'mom', 'not', 'mnt', 'mon',
        'oot', 'moo', 'pot', 'mpt', 'mop', 'qot', 'mqt', 'moq', 'rot',
        'mrt', 'mor', 'sot', 'mst', 'mos', 'tot', 'mtt', 'uot', 'mut',
        'mou', 'vot', 'mvt', 'mov', 'wot', 'mwt', 'mow', 'xot', 'mxt', 'mox',
        'yot', 'myt', 'moy', 'zot', 'mzt', 'moz',
    ])
    assert set(make_fuzzy('mot')) == expected