Exemplo n.º 1
0
from bt_candidates.client import Client
from bt_candidates.wiring import default_filter_factory as ff
from bt_candidates.filters import MatchType
from bt_candidates.sorting import SortStrategy

client = Client('candidates.magic.boomtrain.com')
schema = client.get_schema('atlanta-black-star')

filt = ff.or_filter(
    ff.overlap_filter('title', ['Yohannes',  'IV'], match_type=MatchType.EXACT, min=2, max=2),
    ff.overlap_filter('title', ['Search', 'chicago'], match_type=MatchType.EXACT, min=2, max=2),
    ff.overlap_filter('title', ['african', 'history', 'month'], match_type=MatchType.EXACT, min=3, max=3)
)

candidates = client.get_candidates('atlanta-black-star', filt, limit=25)
print(len(candidates))
for c in candidates:
    print(c)

print('======================================================')

import itertools as it

def split_candidates(candidates, needed=10):
    to_score = []
    for _, grp in it.groupby(candidates, lambda c: c.sort_weight):
        grp = list(grp)
        if len(grp) > needed:
            to_score.extend(grp)
            return to_score
        else:
Exemplo n.º 2
0
client = Client(host='candidates.aws.boomtrain.com', port=7070)
vogue = "9b69d8fc8b441b43d493d713e5703ada"
filter_two_days = f.recency_filter(
    field='pubDate',
    min=timedelta(days=-2),
    max=timedelta(days=1),
)

filter_five_days = f.recency_filter(
    field='pubDate',
    min=timedelta(days=-5),
    max=timedelta(days=1),
)

recency_fallback_filter = f.or_filter(filter_two_days, filter_five_days)
candidates_two_days = client.get_candidates(site_id=vogue,
                                            filter=filter_two_days,
                                            limit=100)
count1 = len(candidates_two_days)
print("Candidates applying two days filter : {}".format(count1))
fallback_candidates = client.get_candidates(site_id=vogue,
                                            filter=recency_fallback_filter,
                                            limit=100)
count2 = len(fallback_candidates)
print("Candidates applying the recency fallback to 5 days : {}".format(count2))
if (count1 < 100) and (count1 > count2):
    print("Error")
else:
    print("number of candidates added by fallback is {}".format(count2 -
                                                                count1))
Exemplo n.º 3
0
schema = client.get_schema(site_id)

filter_item_type = ff.overlap_filter(field='itemType',
                                     values={'lists_en'},
                                     min=1,
                                     match_type=MatchType.CONTAINS)
filter_city_region = ff.overlap_filter(field='cityRegion',
                                       values={'toronto'},
                                       min=1,
                                       match_type=MatchType.CONTAINS)
filter_meta = ff.and_filter(filter_item_type, filter_city_region)
filter_meta_global = ff.and_filter(filter_item_type, filter_city_region,
                                   schema.named_filters['GLOBAL'])

candidates_meta = client.get_candidates(site_id=site_id,
                                        filter=filter_meta,
                                        limit=100)
candidates_meta_global = client.get_candidates(site_id=site_id,
                                               filter=filter_meta_global,
                                               limit=100)

if (len(candidates_meta) < 100):
    assert (
        'Less than 100 candidates found for itemtype and city_region filter')

if (len(candidates_meta_global) < 100):
    assert (
        'Less than 100 candidates found for item_type and city_region filter which also pass the GLOBAL filter'
    )