def test_user_based_field(): """Ensure that the user_based field is taken into account (only) when needed.""" algorithms = (KNNBasic, KNNWithMeans, KNNBaseline) for klass in algorithms: algo = klass(sim_options={'user_based': True}) rmses_user_based = evaluate(algo, data, measures=['rmse'])['rmse'] algo = klass(sim_options={'user_based': False}) rmses_item_based = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmses_user_based != rmses_item_based
def test_SVDpp_parameters(): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = SVDpp(n_factors=1, n_epochs=1) rmse_default = evaluate(algo, data, measures=['rmse'])['rmse'] # n_factors algo = SVDpp(n_factors=2, n_epochs=1) rmse_factors = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_factors # The rest is OK but just takes too long for now... """
def test_shrinkage_field(): """Ensure the shrinkage field is taken into account.""" sim_options = {'name': 'pearson_baseline', 'shrinkage': 0} bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options) rmse_shrinkage_0 = evaluate(algo, data, measures=['rmse'])['rmse'] sim_options = {'name': 'pearson_baseline', 'shrinkage': 100} bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options) rmse_shrinkage_100 = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_shrinkage_0 != rmse_shrinkage_100
def test_performances(): """Test the returned dict. Also do dumping.""" current_dir = os.path.dirname(os.path.realpath(__file__)) folds_files = [(current_dir + '/custom_train', current_dir + '/custom_test')] reader = Reader(line_format='user item rating', sep=' ', skip_lines=3, rating_scale=(1, 5)) data = Dataset.load_from_folds(folds_files=folds_files, reader=reader) algo = NormalPredictor() tmp_dir = tempfile.mkdtemp() # create tmp dir performances = evaluate(algo, data, measures=['RmSe', 'Mae'], with_dump=True, dump_dir=tmp_dir, verbose=2) shutil.rmtree(tmp_dir) # remove tmp dir print(performances) assert performances['RMSE'] is performances['rmse'] assert performances['MaE'] is performances['mae']
def test_method_field(): """Ensure the method field is taken into account.""" bsl_options = {'method': 'als'} algo = BaselineOnly(bsl_options=bsl_options) rmse_als = evaluate(algo, data, measures=['rmse'])['rmse'] bsl_options = {'method': 'sgd'} algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_als != rmse_sgd with pytest.raises(ValueError): bsl_options = {'method': 'wrong_name'} algo = BaselineOnly(bsl_options=bsl_options) evaluate(algo, data)
def test_sgd_n_epoch_field(): """Ensure the n_epoch field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_n_epoch_1 = evaluate(algo, data, measures=['rmse'])['rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 20, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_n_epoch_5 = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_sgd_n_epoch_1 != rmse_sgd_n_epoch_5
def test_als_reg_i_field(): """Ensure the reg_i field is taken into account.""" bsl_options = { 'method': 'als', 'reg_i': 0, } algo = BaselineOnly(bsl_options=bsl_options) rmse_als_regi_0 = evaluate(algo, data, measures=['rmse'])['rmse'] bsl_options = { 'method': 'als', 'reg_i': 10, } algo = BaselineOnly(bsl_options=bsl_options) rmse_als_regi_10 = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_als_regi_0 != rmse_als_regi_10
def test_sgd_reg_field(): """Ensure the reg field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'reg': 0.02, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_reg_002 = evaluate(algo, data, measures=['rmse'])['rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'reg': 1, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_reg_1 = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_sgd_reg_002 != rmse_sgd_reg_1
def test_sgd_learning_rate_field(): """Ensure the learning_rate field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'learning_rate': .005, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_lr_005 = evaluate(algo, data, measures=['rmse'])['rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'learning_rate': .00005, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_lr_00005 = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_sgd_lr_005 != rmse_sgd_lr_00005
def test_name_field(): """Ensure the name field is taken into account.""" sim_options = {'name': 'cosine'} algo = KNNBasic(sim_options=sim_options) rmse_cosine = evaluate(algo, data, measures=['rmse'])['rmse'] sim_options = {'name': 'msd'} algo = KNNBasic(sim_options=sim_options) rmse_msd = evaluate(algo, data, measures=['rmse'])['rmse'] sim_options = {'name': 'pearson'} algo = KNNBasic(sim_options=sim_options) rmse_pearson = evaluate(algo, data, measures=['rmse'])['rmse'] sim_options = {'name': 'pearson_baseline'} bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options) rmse_pearson_bsl = evaluate(algo, data, measures=['rmse'])['rmse'] for rmse_a, rmse_b in combinations( (rmse_cosine, rmse_msd, rmse_pearson, rmse_pearson_bsl), 2): assert (rmse_a != rmse_b) with pytest.raises(NameError): sim_options = {'name': 'wrong_name'} algo = KNNBasic(sim_options=sim_options) evaluate(algo, data)
def main(): class MyParser(argparse.ArgumentParser): '''A parser which prints the help message when an error occurs. Taken from http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu.''' # noqa def error(self, message): sys.stderr.write('error: %s\n' % message) self.print_help() sys.exit(2) parser = MyParser( description='Evaluate the performance of a rating prediction ' + 'algorithm ' + 'on a given dataset using cross validation. You can use a built-in ' + 'or a custom dataset, and you can choose to automatically split the ' + 'dataset into folds, or manually specify train and test files. ' + 'Please refer to the documentation page ' + '(http://surprise.readthedocs.io/) for more details.', epilog="""Example:\n surprise -algo SVD -params "{'n_epochs': 5, 'verbose': True}" -load-builtin ml-100k -n-folds 3""") algo_choices = { 'NormalPredictor': NormalPredictor, 'BaselineOnly': BaselineOnly, 'KNNBasic': KNNBasic, 'KNNBaseline': KNNBaseline, 'KNNWithMeans': KNNWithMeans, 'SVD': SVD, 'SVDpp': SVDpp, 'QSVD': QSVD, 'QSVDp': QSVDp, 'QSVDpp': QSVDpp, 'NMF': NMF, 'SlopeOne': SlopeOne, 'WeightedSlopeOne': WeightedSlopeOne, 'BiPolarSlopeOne': BiPolarSlopeOne, 'CoClustering': CoClustering, 'CoClusteringRegression': CoClusteringRegression, 'SoftBoundBiPolarSlopeOne': SoftBoundBiPolarSlopeOne } parser.add_argument('-algo', type=str, choices=algo_choices, help='The prediction algorithm to use. ' + 'Allowed values are ' + ', '.join(algo_choices.keys()) + '.', metavar='<prediction algorithm>') parser.add_argument('-params', type=str, metavar='<algorithm parameters>', default='{}', help='A kwargs dictionary that contains all the ' + 'algorithm parameters.' + 'Example: "{\'n_epochs\': 10}".' ) parser.add_argument('-load-builtin', type=str, dest='load_builtin', metavar='<dataset name>', default='ml-100k', help='The name of the built-in dataset to use.' + 'Allowed values are ' + ', '.join(dataset.BUILTIN_DATASETS.keys()) + '. Default is ml-100k.' ) parser.add_argument('-load-custom', type=str, dest='load_custom', metavar='<file path>', default=None, help='A file path to custom dataset to use. ' + 'Ignored if ' + '-loadbuiltin is set. The -reader parameter needs ' + 'to be set.' ) parser.add_argument('-folds-files', type=str, dest='folds_files', metavar='<train1 test1 train2 test2... >', default=None, help='A list of custom train and test files. ' + 'Ignored if -load-builtin or -load-custom is set. ' 'The -reader parameter needs to be set.' ) parser.add_argument('-reader', type=str, metavar='<reader>', default=None, help='A Reader to read the custom dataset. Example: ' + '"Reader(line_format=\'user item rating timestamp\',' + ' sep=\'\\t\')"' ) parser.add_argument('-n-folds', type=int, dest='n_folds', metavar="<number of folds>", default=5, help='The number of folds for cross-validation. ' + 'Default is 5.' ) parser.add_argument('-seed', type=int, metavar='<random seed>', default=None, help='The seed to use for RNG. ' + 'Default is the current system time.' ) parser.add_argument('--with-dump', dest='with_dump', action='store_true', help='Dump the algorithm ' + 'results in a file (one file per fold)' + 'Default is False.' ) parser.add_argument('-dump-dir', dest='dump_dir', type=str, metavar='<dir>', default=None, help='Where to dump the files. Ignored if ' + 'with-dump is not set. Default is ' + '~/.surprise_data/dumps.' ) parser.add_argument('--clean', dest='clean', action='store_true', help='Remove the ' + dataset.DATASETS_DIR + ' directory and exit.' ) parser.add_argument('-v', '--version', action='version', version=__version__) args = parser.parse_args() if args.clean: shutil.rmtree(dataset.DATASETS_DIR) print('Removed', dataset.DATASETS_DIR) exit() # setup RNG rd.seed(args.seed) np.random.seed(args.seed) # setup algorithm params = eval(args.params) if args.algo is None: parser.error('No algorithm was specified.') algo = algo_choices[args.algo](**params) # setup dataset if args.load_custom is not None: # load custom and split if args.reader is None: parser.error('-reader parameter is needed.') reader = eval(args.reader) data = Dataset.load_from_file(args.load_custom, reader=reader) data.split(n_folds=args.n_folds) elif args.folds_files is not None: # load from files if args.reader is None: parser.error('-reader parameter is needed.') reader = eval(args.reader) folds_files = args.folds_files.split() folds_files = [(folds_files[i], folds_files[i + 1]) for i in range(0, len(folds_files) - 1, 2)] data = Dataset.load_from_folds(folds_files=folds_files, reader=reader) else: # load builtin dataset and split data = Dataset.load_builtin(args.load_builtin) data.split(n_folds=args.n_folds) evaluate(algo, data, with_dump=args.with_dump, dump_dir=args.dump_dir)
def test_SVD_parameters(): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = SVD(n_factors=1, n_epochs=1) rmse_default = evaluate(algo, data, measures=['rmse'])['rmse'] # n_factors algo = SVD(n_factors=2, n_epochs=1) rmse_factors = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_factors # n_epochs algo = SVD(n_factors=1, n_epochs=2) rmse_n_epochs = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_n_epochs # lr_all algo = SVD(n_factors=1, n_epochs=1, lr_all=5) rmse_lr_all = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_all # reg_all algo = SVD(n_factors=1, n_epochs=1, reg_all=5) rmse_reg_all = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_all # lr_bu algo = SVD(n_factors=1, n_epochs=1, lr_bu=5) rmse_lr_bu = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_bu # lr_bi algo = SVD(n_factors=1, n_epochs=1, lr_bi=5) rmse_lr_bi = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_bi # lr_pu algo = SVD(n_factors=1, n_epochs=1, lr_pu=5) rmse_lr_pu = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_pu # lr_qi algo = SVD(n_factors=1, n_epochs=1, lr_qi=5) rmse_lr_qi = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_lr_qi # reg_bu algo = SVD(n_factors=1, n_epochs=1, reg_bu=5) rmse_reg_bu = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_bu # reg_bi algo = SVD(n_factors=1, n_epochs=1, reg_bi=5) rmse_reg_bi = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_bi # reg_pu algo = SVD(n_factors=1, n_epochs=1, reg_pu=5) rmse_reg_pu = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_pu # reg_qi algo = SVD(n_factors=1, n_epochs=1, reg_qi=5) rmse_reg_qi = evaluate(algo, data, measures=['rmse'])['rmse'] assert rmse_default != rmse_reg_qi