def test_compute_rating_predictions(rating_true): svd = surprise.SVD() train_set = surprise.Dataset.load_from_df( rating_true, reader=surprise.Reader()).build_full_trainset() svd.fit(train_set) preds = compute_rating_predictions(svd, rating_true) assert set(preds.columns) == {"userID", "itemID", "prediction"} assert preds["userID"].dtypes == rating_true["userID"].dtypes assert preds["itemID"].dtypes == rating_true["itemID"].dtypes user = rating_true.iloc[0]["userID"] item = rating_true.iloc[0]["itemID"] assert preds[(preds["userID"] == user) & ( preds["itemID"] == item)]["prediction"].values == pytest.approx( svd.predict(user, item).est, rel=TOL) preds = compute_rating_predictions( svd, rating_true.rename(columns={ "userID": "uid", "itemID": "iid" }), usercol="uid", itemcol="iid", predcol="pred", ) assert set(preds.columns) == {"uid", "iid", "pred"} assert preds["uid"].dtypes == rating_true["userID"].dtypes assert preds["iid"].dtypes == rating_true["itemID"].dtypes user = rating_true.iloc[1]["userID"] item = rating_true.iloc[1]["itemID"] assert preds[(preds["uid"] == user) & (preds["iid"] == item)]["pred"].values == pytest.approx( svd.predict(user, item).est, rel=TOL)
def test_compute_rating_predictions(python_data): rating_true, _, _ = python_data(binary_rating=False) svd = surprise.SVD() train_set = surprise.Dataset.load_from_df( rating_true, reader=surprise.Reader()).build_full_trainset() svd.fit(train_set) preds = compute_rating_predictions(svd, rating_true) assert set(preds.columns) == {'userID', 'itemID', 'prediction'} assert preds['userID'].dtypes == rating_true['userID'].dtypes assert preds['itemID'].dtypes == rating_true['itemID'].dtypes user = rating_true.iloc[0]['userID'] item = rating_true.iloc[0]['itemID'] assert preds[(preds['userID'] == user) & (preds['itemID'] == item)]['prediction'].values == \ pytest.approx(svd.predict(user, item).est, rel=TOL) preds = compute_rating_predictions(svd, rating_true.rename(columns={ 'userID': 'uid', 'itemID': 'iid' }), usercol='uid', itemcol='iid', predcol='pred') assert set(preds.columns) == {'uid', 'iid', 'pred'} assert preds['uid'].dtypes == rating_true['userID'].dtypes assert preds['iid'].dtypes == rating_true['itemID'].dtypes user = rating_true.iloc[1]['userID'] item = rating_true.iloc[1]['itemID'] assert preds[(preds['uid'] == user) & (preds['iid'] == item)]['pred'].values == \ pytest.approx(svd.predict(user, item).est, rel=TOL)
def test_compute_rating_predictions(rating_true): svd = surprise.SVD() train_set = surprise.Dataset.load_from_df( rating_true, reader=surprise.Reader() ).build_full_trainset() svd.fit(train_set) preds = compute_rating_predictions(svd, rating_true) assert set(preds.columns) == {"userID", "itemID", "prediction"} assert preds["userID"].dtypes == rating_true["userID"].dtypes assert preds["itemID"].dtypes == rating_true["itemID"].dtypes user = rating_true.iloc[0]["userID"] item = rating_true.iloc[0]["itemID"] assert preds[(preds["userID"] == user) & (preds["itemID"] == item)][ "prediction" ].values == pytest.approx(svd.predict(user, item).est, rel=TOL) preds = compute_rating_predictions( svd, rating_true.rename(columns={"userID": "uid", "itemID": "iid"}), usercol="uid", itemcol="iid", predcol="pred", ) assert set(preds.columns) == {"uid", "iid", "pred"} assert preds["uid"].dtypes == rating_true["userID"].dtypes assert preds["iid"].dtypes == rating_true["itemID"].dtypes user = rating_true.iloc[1]["userID"] item = rating_true.iloc[1]["itemID"] assert preds[(preds["uid"] == user) & (preds["iid"] == item)][ "pred" ].values == pytest.approx(svd.predict(user, item).est, rel=TOL)
def svd_training(params): """ Train Surprise SVD using the given hyper-parameters """ logger.debug("Start training...") train_data = pd.read_pickle(path=os.path.join(params['datastore'], params['train_datapath'])) validation_data = pd.read_pickle(path=os.path.join(params['datastore'], params['validation_datapath'])) svd_params = {p: params[p] for p in ['random_state', 'n_epochs', 'verbose', 'biased', 'n_factors', 'init_mean', 'init_std_dev', 'lr_all', 'reg_all', 'lr_bu', 'lr_bi', 'lr_pu', 'lr_qi', 'reg_bu', 'reg_bi', 'reg_pu', 'reg_qi']} svd = surprise.SVD(**svd_params) train_set = surprise.Dataset.load_from_df(train_data, reader=surprise.Reader(params['surprise_reader'])) \ .build_full_trainset() svd.fit(train_set) logger.debug("Evaluating...") metrics_dict = {} rating_metrics = params['rating_metrics'] if len(rating_metrics) > 0: predictions = compute_rating_predictions(svd, validation_data, usercol=params['usercol'], itemcol=params['itemcol']) for metric in rating_metrics: result = getattr(evaluation, metric)(validation_data, predictions) logger.debug("%s = %g", metric, result) if metric == params['primary_metric']: metrics_dict['default'] = result else: metrics_dict[metric] = result ranking_metrics = params['ranking_metrics'] if len(ranking_metrics) > 0: all_predictions = compute_ranking_predictions(svd, train_data, usercol=params['usercol'], itemcol=params['itemcol'], recommend_seen=params['recommend_seen']) k = params['k'] for metric in ranking_metrics: result = getattr(evaluation, metric)(validation_data, all_predictions, col_prediction='prediction', k=k) logger.debug("%s@%d = %g", metric, k, result) if metric == params['primary_metric']: metrics_dict['default'] = result else: metrics_dict[metric] = result if len(ranking_metrics) == 0 and len(rating_metrics) == 0: raise ValueError("No metrics were specified.") # Report the metrics nni.report_final_result(metrics_dict) # Save the metrics in a JSON file output_dir = os.environ.get('NNI_OUTPUT_DIR') with open(os.path.join(output_dir, 'metrics.json'), 'w') as fp: temp_dict = metrics_dict.copy() temp_dict[params['primary_metric']] = temp_dict.pop('default') json.dump(temp_dict, fp) return svd
def predict_svd(model, test): with Timer() as t: preds = compute_rating_predictions(model, test, usercol=DEFAULT_USER_COL, itemcol=DEFAULT_ITEM_COL, predcol=DEFAULT_PREDICTION_COL) return preds, t
def predict_svd(model, test): with Timer() as t: preds = compute_rating_predictions(model, test, usercol=DEFAULT_USER_COL, itemcol=DEFAULT_ITEM_COL, predcol=DEFAULT_PREDICTION_COL) return preds, t
def svd_training(args): """ Train Surprise SVD using the given hyper-parameters """ print("Start training...") train_data = pd.read_pickle(path=os.path.join(args.datastore, args.train_datapath)) validation_data = pd.read_pickle(path=os.path.join(args.datastore, args.validation_datapath)) svd = surprise.SVD(random_state=args.random_state, n_epochs=args.epochs, verbose=args.verbose, biased=args.biased, n_factors=args.n_factors, init_mean=args.init_mean, init_std_dev=args.init_std_dev, lr_all=args.lr_all, reg_all=args.reg_all, lr_bu=args.lr_bu, lr_bi=args.lr_bi, lr_pu=args.lr_pu, lr_qi=args.lr_qi, reg_bu=args.reg_bu, reg_bi=args.reg_bi, reg_pu=args.reg_pu, reg_qi=args.reg_qi) train_set = surprise.Dataset.load_from_df(train_data, reader=surprise.Reader(args.surprise_reader)) \ .build_full_trainset() svd.fit(train_set) print("Evaluating...") rating_metrics = args.rating_metrics if len(rating_metrics) > 0: predictions = compute_rating_predictions(svd, validation_data, usercol=args.usercol, itemcol=args.itemcol) for metric in rating_metrics: result = eval(metric)(validation_data, predictions) print(metric, result) if HAS_AML: run.log(metric, result) ranking_metrics = args.ranking_metrics if len(ranking_metrics) > 0: all_predictions = compute_ranking_predictions(svd, train_data, usercol=args.usercol, itemcol=args.itemcol, remove_seen=args.remove_seen) k = args.k for metric in ranking_metrics: result = eval(metric)(validation_data, all_predictions, col_prediction='prediction', k=k) print("{}@{}".format(metric, k), result) if HAS_AML: run.log(metric, result) if len(ranking_metrics) == 0 and len(rating_metrics) == 0: raise ValueError("No metrics were specified.") return svd
def svd_training(args): """ Train Surprise SVD using the given hyper-parameters """ print("Start training...") train_data = pd.read_pickle(path=os.path.join(args.datastore, args.train_datapath)) validation_data = pd.read_pickle(path=os.path.join(args.datastore, args.validation_datapath)) svd = surprise.SVD(random_state=args.random_state, n_epochs=args.epochs, verbose=args.verbose, biased=args.biased, n_factors=args.n_factors, init_mean=args.init_mean, init_std_dev=args.init_std_dev, lr_all=args.lr_all, reg_all=args.reg_all, lr_bu=args.lr_bu, lr_bi=args.lr_bi, lr_pu=args.lr_pu, lr_qi=args.lr_qi, reg_bu=args.reg_bu, reg_bi=args.reg_bi, reg_pu=args.reg_pu, reg_qi=args.reg_qi) train_set = surprise.Dataset.load_from_df(train_data, reader=surprise.Reader(args.surprise_reader)) \ .build_full_trainset() svd.fit(train_set) print("Evaluating...") rating_metrics = args.rating_metrics if len(rating_metrics) > 0: predictions = compute_rating_predictions(svd, validation_data, usercol=args.usercol, itemcol=args.itemcol) for metric in rating_metrics: result = eval(metric)(validation_data, predictions) print(metric, result) if HAS_AML: run.log(metric, result) ranking_metrics = args.ranking_metrics if len(ranking_metrics) > 0: all_predictions = compute_ranking_predictions(svd, train_data, usercol=args.usercol, itemcol=args.itemcol, recommend_seen=args.recommend_seen) k = args.k for metric in ranking_metrics: result = eval(metric)(validation_data, all_predictions, col_prediction='prediction', k=k) print("{}@{}".format(metric, k), result) if HAS_AML: run.log(metric, result) if len(ranking_metrics) == 0 and len(rating_metrics) == 0: raise ValueError("No metrics were specified.") return svd
def svd_training(params): """ Train Surprise SVD using the given hyper-parameters """ logger.debug("Start training...") train_data = pd.read_pickle( path=os.path.join(params['datastore'], params['train_datapath'])) validation_data = pd.read_pickle( path=os.path.join(params['datastore'], params['validation_datapath'])) svd_params = { p: params[p] for p in [ 'random_state', 'n_epochs', 'verbose', 'biased', 'n_factors', 'init_mean', 'init_std_dev', 'lr_all', 'reg_all', 'lr_bu', 'lr_bi', 'lr_pu', 'lr_qi', 'reg_bu', 'reg_bi', 'reg_pu', 'reg_qi' ] } svd = surprise.SVD(**svd_params) train_set = surprise.Dataset.load_from_df(train_data, reader=surprise.Reader(params['surprise_reader'])) \ .build_full_trainset() svd.fit(train_set) logger.debug("Evaluating...") metrics_dict = {} rating_metrics = params['rating_metrics'] if len(rating_metrics) > 0: predictions = compute_rating_predictions(svd, validation_data, usercol=params['usercol'], itemcol=params['itemcol']) for metric in rating_metrics: result = getattr(evaluation, metric)(validation_data, predictions) logger.debug("%s = %g", metric, result) if metric == params['primary_metric']: metrics_dict['default'] = result else: metrics_dict[metric] = result ranking_metrics = params['ranking_metrics'] if len(ranking_metrics) > 0: all_predictions = compute_ranking_predictions( svd, train_data, usercol=params['usercol'], itemcol=params['itemcol'], recommend_seen=params['recommend_seen']) k = params['k'] for metric in ranking_metrics: result = getattr(evaluation, metric)(validation_data, all_predictions, col_prediction='prediction', k=k) logger.debug("%s@%d = %g", metric, k, result) if metric == params['primary_metric']: metrics_dict['default'] = result else: metrics_dict[metric] = result if len(ranking_metrics) == 0 and len(rating_metrics) == 0: raise ValueError("No metrics were specified.") # Report the metrics nni.report_final_result(metrics_dict) # Save the metrics in a JSON file output_dir = os.environ.get('NNI_OUTPUT_DIR') with open(os.path.join(output_dir, 'metrics.json'), 'w') as fp: temp_dict = metrics_dict.copy() temp_dict[params['primary_metric']] = temp_dict.pop('default') json.dump(temp_dict, fp) return svd
def svd_training(params): """ Train Surprise SVD using the given hyper-parameters """ logger.debug("Start training...") train_data = pd.read_pickle( path=os.path.join(params["datastore"], params["train_datapath"])) validation_data = pd.read_pickle( path=os.path.join(params["datastore"], params["validation_datapath"])) svd_params = { p: params[p] for p in [ "random_state", "n_epochs", "verbose", "biased", "n_factors", "init_mean", "init_std_dev", "lr_all", "reg_all", "lr_bu", "lr_bi", "lr_pu", "lr_qi", "reg_bu", "reg_bi", "reg_pu", "reg_qi", ] } svd = surprise.SVD(**svd_params) train_set = surprise.Dataset.load_from_df( train_data, reader=surprise.Reader( params["surprise_reader"])).build_full_trainset() svd.fit(train_set) logger.debug("Evaluating...") metrics_dict = {} rating_metrics = params["rating_metrics"] if len(rating_metrics) > 0: predictions = compute_rating_predictions(svd, validation_data, usercol=params["usercol"], itemcol=params["itemcol"]) for metric in rating_metrics: result = getattr(evaluation, metric)(validation_data, predictions) logger.debug("%s = %g", metric, result) if metric == params["primary_metric"]: metrics_dict["default"] = result else: metrics_dict[metric] = result ranking_metrics = params["ranking_metrics"] if len(ranking_metrics) > 0: all_predictions = compute_ranking_predictions( svd, train_data, usercol=params["usercol"], itemcol=params["itemcol"], remove_seen=params["remove_seen"], ) k = params["k"] for metric in ranking_metrics: result = getattr(evaluation, metric)(validation_data, all_predictions, col_prediction="prediction", k=k) logger.debug("%s@%d = %g", metric, k, result) if metric == params["primary_metric"]: metrics_dict["default"] = result else: metrics_dict[metric] = result if len(ranking_metrics) == 0 and len(rating_metrics) == 0: raise ValueError("No metrics were specified.") # Report the metrics nni.report_final_result(metrics_dict) # Save the metrics in a JSON file output_dir = os.environ.get("NNI_OUTPUT_DIR") with open(os.path.join(output_dir, "metrics.json"), "w") as fp: temp_dict = metrics_dict.copy() temp_dict[params["primary_metric"]] = temp_dict.pop("default") json.dump(temp_dict, fp) return svd