Пример #1
0
def only_commons(helper):
    if len(helper.tokens) == len(helper.common):
        # Only common terms, shortcut to search
        keys = [t.db_key for t in helper.tokens]
        if helper.geohash_key:
            keys.append(helper.geohash_key)
            helper.debug('Adding geohash %s', helper.geohash_key)
        if len(keys) == 1 or helper.geohash_key:
            helper.add_to_bucket(keys)
        if helper.bucket_dry and len(keys) > 1:
            # Scan the less frequent token.
            helper.tokens.sort(key=lambda t: t.frequency)
            keys = [t.db_key for t in helper.tokens]
            first = helper.tokens[0]
            if first.frequency < config.INTERSECT_LIMIT:
                helper.debug('Under INTERSECT_LIMIT, force intersect.')
                helper.add_to_bucket(keys)
            else:
                helper.debug('INTERSECT_LIMIT hit, manual scan')
                if helper.filters:
                    # Always consider filters when doing manual intersect.
                    keys = keys + helper.filters
                    # But, hey, can we brute force again?
                    if any(
                            DB.scard(k) < config.INTERSECT_LIMIT
                            for k in helper.filters):
                        helper.debug('Filters under INTERSECT_LIMIT, force')
                        helper.add_to_bucket(keys)
                        return
                helper.debug('manual scan on "%s"', first)
                ids = scripts.manual_scan(keys=keys, args=[helper.min])
                helper.bucket.update(ids)
                helper.debug('%s results after scan', len(helper.bucket))
Пример #2
0
def test_manual_scan_with_filter(factory):
    vitry = factory(name="Vitry", type="city")
    factory(name="La monnaye", city="Saint-Loup-Cammas")
    street1 = factory(name="rue de la monnaie", city="Paris", importance=1)
    street2 = factory(name="rue de la monnaie", city="Condom", importance=0.9)
    results = scripts.manual_scan(keys=['w|rue', 'w|de', 'f|type|street'],
                                  args=[2])
    assert results == [
        'd|{}'.format(street1['_id']).encode(),
        'd|{}'.format(street2['_id']).encode()
    ]
    results = scripts.manual_scan(keys=['w|rue', 'w|de', 'f|type|whatever'],
                                  args=[2])
    assert results == []
    results = scripts.manual_scan(keys=['w|vitry', 'f|type|city'], args=[2])
    assert results == ['d|{}'.format(vitry['_id']).encode()]
Пример #3
0
def test_manual_scan(factory):
    factory(name="rue de la monnaie", city="Vitry")
    factory(name="La monnaye", city="Saint-Loup-Cammas")
    street1 = factory(name="rue de la monnaie", city="Paris", importance=1)
    street2 = factory(name="rue de la monnaie", city="Condom", importance=0.9)
    results = scripts.manual_scan(keys=['w|monnaie', 'w|rue', 'w|de'],
                                  args=[2])
    assert results == [
        'd|{}'.format(street1['id']).encode(),
        'd|{}'.format(street2['id']).encode()
    ]