Exemplo n.º 1
0
from openrec import ImplicitModelTrainer
from openrec.utils import ImplicitDataset
from openrec.recommenders import VisualCML
from openrec.utils.evaluators import AUC, Recall
from openrec.utils.samplers import PairwiseSampler
from config import sess_config
import dataloader

raw_data = dataloader.load_amazon_book()
batch_size = 1000
test_batch_size = 100
item_serving_size = 1000
display_itr = 10000

train_dataset = ImplicitDataset(raw_data['train_data'],
                                raw_data['max_user'],
                                raw_data['max_item'],
                                name='Train')
val_dataset = ImplicitDataset(raw_data['val_data'],
                              raw_data['max_user'],
                              raw_data['max_item'],
                              name='Val')
test_dataset = ImplicitDataset(raw_data['test_data'],
                               raw_data['max_user'],
                               raw_data['max_item'],
                               name='Test')

model = VisualCML(batch_size=batch_size,
                  max_user=raw_data['max_user'],
                  max_item=raw_data['max_item'],
                  l2_reg=0.001,
                  l2_reg_mlp=0.001,
Exemplo n.º 2
0
import pandas as pd

if __name__ == "__main__":
    # We'll need to do a lot more wrangling/cleaning up - but this seems to AT LEAST WORK WITH DIFF DATA.
    max_users = 10000
    max_items = 200000
    csv = np.genfromtxt('Movies_ratings_small_merged_larger2.csv',
                        delimiter=",",
                        dtype=(int, int, float, int, int, float, int, int,
                               float),
                        names=True,
                        encoding=None)
    print(csv)

    train_dataset = ImplicitDataset(raw_data=csv,
                                    max_user=max_users,
                                    max_item=max_items,
                                    name='Train')
    val_dataset = ImplicitDataset(raw_data=csv,
                                  max_user=max_users,
                                  max_item=max_items,
                                  name='Val')
    test_dataset = ImplicitDataset(raw_data=csv,
                                   max_user=max_users,
                                   max_item=max_items,
                                   name='Test')

    bpr_model = BPR(batch_size=1000,
                    max_user=train_dataset.max_user(),
                    max_item=train_dataset.max_item(),
                    dim_embed=20,
                    opt='Adam')
Exemplo n.º 3
0
import os
import sys
sys.path.append(os.getcwd())

from openrec import ImplicitModelTrainer
from openrec.utils import ImplicitDataset
from openrec.recommenders import WCML
from openrec.utils.evaluators import AUC
from openrec.utils.samplers import NPairwiseSampler
import dataloader

raw_data = dataloader.load_citeulike()
batch_size = 2000
test_batch_size = 100
display_itr = 500

train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')
val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val')
test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test')

model = WCML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 
    dim_embed=20, neg_num=5, l2_reg=None, opt='Adam', sess_config=None)
sampler = NPairwiseSampler(batch_size=batch_size, dataset=train_dataset, negativenum=5, num_process=5)
model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size,
                                     train_dataset=train_dataset, model=model, sampler=sampler)
auc_evaluator = AUC()

model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset],
                    evaluators=[auc_evaluator], num_negatives=200)
Exemplo n.º 4
0
"""


def get_focused(dataset, focus_bound, interactions_per_item):
    focused_indices = []
    for i, interaction in enumerate(dataset):
        user_id, item_id = interaction
        if interactions_per_item[item_id] < focus_bound[
                1] and interactions_per_item[item_id] >= focus_bound[0]:
            focused_indices.append(i)
    return np.take(dataset, focused_indices)


raw_data = get_raw_data(sys.argv[1])
val_dataset = ImplicitDataset(raw_data['val_data'],
                              raw_data['max_user'],
                              raw_data['max_item'],
                              name='Val')
test_dataset = ImplicitDataset(raw_data['test_data'],
                               raw_data['max_user'],
                               raw_data['max_item'],
                               name='Test')

#Getting Focused sets
full_dataset = np.concatenate(
    (raw_data['train_data'], raw_data['val_data'], raw_data['test_data']),
    axis=0)
interactions_per_item = get_item_interaction_dict(full_dataset)
#focus_bound, percent_focused = get_focus_bound(full_dataset, interactions_per_item)
focused_val_set = get_focused(raw_data['val_data'], focus_bound,
                              interactions_per_item)
focused_test_set = get_focused(raw_data['test_data'], focus_bound,