def __init__(self, config):
        self.config = config
        self.meta_files = dict()
        self.all_asin_to_titles = dict()

        for meta in self.config['metas']:
            data = self.config[meta]
            pp = data['pp']
            self.meta_files[meta] = {}
            self.meta_files[meta][
                'bought'] = pp + '/' + meta + '_meta_id_bought_mapping.pt'
            self.meta_files[meta][
                'id'] = pp + '/' + meta + '_meta_id_item_mapping.pt'
            self.meta_files[meta][
                'cat'] = pp + '/' + meta + '_meta_id_cat_map.pt'
            self.meta_files[meta]['idx'] = load_dict_output(
                pp, 'item_id_map.json')
            self.meta_files[meta]['id_item'] = load_dict_output(
                pp, 'id_item_map.json')
def create_title_id_meta_mapping(name, pp_data_dir, meta_df):
    records = meta_df.to_dict('record')
    item_mapping = load_dict_output(pp_data_dir, 'id_item_map.json')
    asin_mapping = dict()
    name = name
    for record in records:
        asin_mapping[record['asin']] = record['title']

    item_mapping.update(asin_mapping)
    with open(pp_data_dir + '/' + name + '_meta_id_item_mapping.pt',
              'wb') as f:
        pickle.dump(item_mapping, f)
}

print("Reading dataset")

if args.dataset == "movielens":
    data_dir = cfg.vals['movielens_dir'] + "/preprocessed_choice/"
elif args.dataset == "amazon":
    data_dir = cfg.vals['amazon_dir'] + "/preprocessed_choice/"
else:
    raise ValueError("--dataset must be 'amazon' or 'movielens'")

X_train, X_test, y_train, y_test = read_train_test_dir(data_dir)

print("Dataset read complete...")

user_item_rating_map = load_dict_output(data_dir, "user_item_rating.json",
                                        True)
item_rating_map = load_dict_output(data_dir, "item_rating.json", True)
stats = load_dict_output(data_dir, "stats.json")

print("n users: {}".format(stats['n_users']))
print("n items: {}".format(stats['n_items']))

n_test = get_test_sample_size(X_test.shape[0], k=TEST_BATCH_SIZE)
X_test = X_test[:n_test, :]
y_test = y_test[:n_test, :]

mlp = MLP({
    'num_users': stats['n_users'],
    'num_items': stats['n_items'],
    'latent_dim': params["h_dim_size"],
    'use_cuda': args.cuda,
import torch
import pandas as pd
import numpy as np
from model.trainer import NeuralUtilityTrainer
from model._loss import loss_mse
from sklearn.preprocessing import OneHotEncoder
from experiments.utils import get_mrs_arr, get_supp_k, get_comp_k
from preprocessing.utils import load_dict_output

# Set model and data paths manuall
# TODO: Ahmad to modify for his application
MODEL_PATH = cfg.vals[
    "model_dir"] + "/home_kitchen_ahmad/item_encoder_amazon_utility_done.pt"
data_dir = cfg.vals['amazon_dir'] + "/preprocessed_home_kitchen/"
item_map = load_dict_output(
    cfg.vals["amazon_dir"] + "/preprocessed_home_kitchen/", "id_item_map.json",
    True)
item_to_idx = load_dict_output(
    cfg.vals["amazon_dir"] + "/preprocessed_home_kitchen/", "item_id_map.json",
    True)

# Process data
X_train, X_test, y_train, y_test = read_train_test_dir(data_dir)
stats = load_dict_output(data_dir, "stats.json")

one_hot = OneHotEncoder(categories=[range(stats["n_items"])])
items_for_grad = one_hot.fit_transform(
    np.arange(stats["n_items"]).reshape(-1, 1)).todense().astype(np.float32)

train = np.concatenate([X_train, y_train], axis=1)
test = np.concatenate([X_test, y_test], axis=1)
Exemplo n.º 5
0


print("Reading dataset")

if args.dataset == "movielens":
    data_dir = cfg.vals['movielens_dir'] + "/preprocessed/"
elif args.dataset == "amazon":
    data_dir = cfg.vals['amazon_dir'] + "/preprocessed/"
else:
    raise ValueError("--dataset must be 'amazon' or 'movielens'")



X_train, X_test, y_train, y_test = read_train_test_dir(data_dir)
stats = load_dict_output(data_dir, "stats.json")
print("Dataset read complete...")



n_test = get_test_sample_size(X_test.shape[0], k=TEST_BATCH_SIZE)
X_test = X_test[:n_test, :]
y_test = y_test[:n_test, :]

users_test = X_test[:, 0].reshape(-1,1)
items_test = X_test[:, 1].reshape(-1,1)
y_test = y_test.reshape(-1,1)


predictor = Predictor(model=model, batch_size=TEST_BATCH_SIZE, users=users_test, items=items_test, y=y_test,
                      use_cuda=args.cuda, n_items=stats["n_items"])