key, value = p_str.split('=') try: init_args[key] = eval(value) except: init_args[key] = value # convert the column argument to list if args.columns is not None: args.columns = args.columns.split(',') # read the dataset logger.info('Reading {}'.format(args.train)) train_df, item_to_idx, user_to_idx = read_dataset(args.train, sep=',', header=0, columns=args.columns, user_key=args.user_key, item_key=args.item_key, rating_key=args.rating_key) logger.info('Reading {}'.format(args.test)) test_df, _, _ = read_dataset(args.test, sep=',', header=0, columns=args.columns, user_key=args.user_key, item_key=args.item_key, rating_key=args.rating_key, item_to_idx=item_to_idx, user_to_idx=user_to_idx) # build reverse maps
try: init_args[key] = eval(value) except: init_args[key] = value # convert the column argument to list if args.columns is not None: args.columns = args.columns.split(',') # read the dataset logger.info('Reading {}'.format(args.dataset)) dataset, idx_to_user, idx_to_item = read_dataset( args.dataset, header=args.header, sep=args.sep, columns=args.columns, make_implicit=args.make_implicit, implicit_th=args.implicit_th, item_key=args.item_key, user_key=args.user_key, rating_key=args.rating_key) nusers, nitems = len(idx_to_user), len(idx_to_item) logger.info('The dataset has {} users and {} items'.format(nusers, nitems)) # evaluate the recommendation quality with k-fold cross-validation logger.info('Running {}-fold Cross Validation'.format(args.cv_folds)) roc_auc_, precision_, recall_, map_, mrr_, ndcg_ = np.zeros( args.cv_folds), np.zeros(args.cv_folds), np.zeros(args.cv_folds), np.zeros( args.cv_folds), np.zeros(args.cv_folds), np.zeros(args.cv_folds) at = args.rec_length nfold = 0
init_args = OrderedDict() if args.params: for p_str in args.params.split(','): key, value = p_str.split('=') try: init_args[key] = eval(value) except: init_args[key] = value # convert the column argument to list if args.columns is not None: args.columns = args.columns.split(',') # read the dataset logger.info('Reading {}'.format(args.train)) train_df = read_dataset(args.train, sep=',', header=0) logger.info('Reading {}'.format(args.test)) test_df = read_dataset(args.test, sep=',', header=0) nusers, nitems = train_df.user_idx.max()+1, train_df.item_idx.max()+1 train = df_to_csr(train_df, nrows=nusers, ncols=nitems) test = df_to_csr(test_df, nrows=nusers, ncols=nitems) # train the recommender recommender = RecommenderClass(**init_args) logger.info('Recommender: {}'.format(recommender)) tic = dt.now() logger.info('Training started') recommender.fit(train) logger.info('Training completed in {}'.format(dt.now() - tic))
parser.add_argument('--item_key', type=str, default='item_id') parser.add_argument('--rating_key', type=str, default='rating') parser.add_argument('--rnd_seed', type=int, default=1234) args = parser.parse_args() # convert the column argument to list if args.columns is not None: args.columns = args.columns.split(',') # read the dataset logger.info('Reading {}'.format(args.dataset)) dataset, idx_to_user, idx_to_item = read_dataset(args.dataset, header=args.header, sep=args.sep, columns=args.columns, make_binary=args.make_binary, binary_th=args.binary_th, item_key=args.item_key, user_key=args.user_key, rating_key=args.rating_key) nusers, nitems = len(idx_to_user), len(idx_to_item) logger.info('The dataset has {} users and {} items'.format(nusers, nitems)) # compute the holdout split logger.info('Computing the {:.0f}% holdout split'.format(args.holdout_perc * 100)) train_df, test_df = holdout(dataset, user_key=args.user_key, item_key=args.item_key, perc=args.holdout_perc,
import logging import argparse import pandas as pd import numpy as np from recpy.recommenders.item_knn import ItemKNNRecommender from recpy.recommenders.non_personalized import TopPop, GlobalEffects from recpy.utils.data_utils import read_dataset, df_to_csr from recpy.metrics import roc_auc, precision, recall, map, ndcg, rr logger = logging.getLogger(__name__) logging.basicConfig( level=logging.INFO, format="%(asctime)s: %(name)s: %(l-evelname)s: %(message)s") train_df = read_dataset('data/ml100k/train_tuning.csv', sep=',', header=0) test_df = read_dataset('data/ml100k/valid.csv', sep=',', header=0) nusers, nitems = train_df.user_idx.max() + 1, train_df.item_idx.max() + 1 train = df_to_csr(train_df, nrows=nusers, ncols=nitems) test = df_to_csr(test_df, nrows=nusers, ncols=nitems) shrinkage = np.arange(0, 200, 25) result = np.zeros_like(shrinkage, dtype=np.float32) metric = recall at = 10 for idx, sh in enumerate(shrinkage): logger.info("Iter {}/{}".format(idx + 1, len(shrinkage))) recommender = ItemKNNRecommender(shrinkage=sh, similarity='cosine') recommender.fit(train)
roc_auc_ /= n_eval precision_ /= n_eval recall_ /= n_eval map_ /= n_eval mrr_ /= n_eval ndcg_ /= n_eval return roc_auc_, precision_, recall_, map_, mrr_, ndcg_ metric = roc_auc cv_folds = 5 at = 10 is_binary = True train_df = read_dataset('../../data/ml100k/binary_holdout/train.csv', sep=',', header=0) test_df = read_dataset('../../data/ml100k/binary_holdout/test.csv', sep=',', header=0) nusers, nitems = train_df.user_idx.max() + 1, train_df.item_idx.max() + 1 train = df_to_csr(train_df, is_binary=is_binary, nrows=nusers, ncols=nitems) test = df_to_csr(test_df, is_binary=is_binary, nrows=nusers, ncols=nitems) # # TopPop # # RecommenderClass = TopPop # param_space = {} # # Evaluate all the metrics over the hold out split # recommender = RecommenderClass()