Пример #1
0
    def test01_most_similar(self):
        set_log_level(1)
        model = self.load_text8_model()
        index = HnswIndex(model.L0.shape[1])
        model.normalize('item')
        for f in model.L0:
            index.add_data(f)
        index.build(n_threads=4)
        index.save('n2.bin')

        par = ParW2V(model)

        model.opt.num_workers = 1
        all_keys = model._idmanager.itemids[::][:10000]
        start_t = time.time()
        [model.most_similar(k, topk=10) for k in all_keys]
        naive_elapsed = time.time() - start_t

        par.num_workers = 4
        start_t = time.time()
        par.most_similar(all_keys, topk=10, repr=True)
        par_elapsed = time.time() - start_t

        start_t = time.time()
        par.set_hnsw_index('n2.bin', 'item')
        par.most_similar(all_keys, topk=10, repr=True)
        ann_elapsed = time.time() - start_t
        self.assertTrue(naive_elapsed > par_elapsed * 1.5 > ann_elapsed * 5.0,
                        msg=f'{naive_elapsed} > {par_elapsed} > {ann_elapsed}')
        index.unload()
        os.remove('n2.bin')
Пример #2
0
def example2():
    log.set_log_level(log.INFO)
    als_option = ALSOption().get_default_option()
    data_option = MatrixMarketOptions().get_default_option()
    data_option.input.main = '../tests/ext/ml-20m/main'
    data_option.input.iid = '../tests/ext/ml-20m/iid'
    data_option.data.path = './ml20m.h5py'
    data_option.data.use_cache = True

    als = ALS(als_option, data_opt=data_option)
    als.initialize()
    als.train()
    als.normalize('item')
    als.build_itemid_map()

    print(
        'Make item recommendation on als.ml20m.par.top10.tsv with Paralell(Thread=4)'
    )
    par = ParALS(als)
    par.num_workers = 4
    all_items = als._idmanager.itemids
    start_t = time.time()
    with open('als.ml20m.par.top10.tsv', 'w') as fout:
        for idx in range(0, len(all_items), 128):
            topks, _ = par.most_similar(all_items[idx:idx + 128], repr=True)
            for q, p in zip(all_items[idx:idx + 128], topks):
                fout.write('%s\t%s\n' % (q, '\t'.join(p)))
    print('took: %.3f secs' % (time.time() - start_t))

    from n2 import HnswIndex
    index = HnswIndex(als.Q.shape[1])
    for f in als.Q:
        index.add_data(f)
    index.build(n_threads=4)
    index.save('ml20m.n2.index')
    index.unload()
    print(
        'Make item recommendation on als.ml20m.par.top10.tsv with Ann(Thread=1)'
    )
    par.set_hnsw_index('ml20m.n2.index', 'item')
    par.num_workers = 4
    start_t = time.time()
    with open('als.ml20m.ann.top10.tsv', 'w') as fout:
        for idx in range(0, len(all_items), 128):
            topks, _ = par.most_similar(all_items[idx:idx + 128], repr=True)
            for q, p in zip(all_items[idx:idx + 128], topks):
                fout.write('%s\t%s\n' % (q, '\t'.join(p)))
    print('took: %.3f secs' % (time.time() - start_t))