Exemplo n.º 1
0
def main():
    args = get_arguments()

    # Data loading
    data_reader = RecSys2019Reader(args.reader_path)
    data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=3, use_validation_set=False,
                                               force_new_split=True, seed=args.seed)
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    if args.recommender_name == "sslim_bpr":
        ICM_all = get_ICM_train(data_reader)
        URM_train = sps.vstack([URM_train, ICM_all.T], format="csr")
    if args.recommender_name == "rp3beta_side":
        ICM_all = get_ICM_train(data_reader)
        URM_train = sps.vstack([URM_train, ICM_all.T], format="csr")
        URM_train = TF_IDF(URM_train).tocsr()
    if args.recommender_name == "pure_svd":
        URM_train = TF_IDF(URM_train).tocsr()
    if args.recommender_name == "pure_svd_side":
        ICM_all = get_ICM_train(data_reader)
        URM_train = sps.vstack([URM_train, ICM_all.T], format="csr")

    # Setting evaluator
    exclude_cold_users = args.exclude_users
    h = int(args.focus_on_high)
    fol = int(args.focus_on_low)
    if h != 0:
        print("Excluding users with less than {} interactions".format(h))
        ignore_users_mask = np.ediff1d(URM_train.tocsr().indptr) < h
        ignore_users = np.arange(URM_train.shape[0])[ignore_users_mask]
    elif fol != 0:
        print("Excluding users with more than {} interactions".format(fol))
        warm_users_mask = np.ediff1d(URM_train.tocsr().indptr) > fol
        ignore_users = np.arange(URM_train.shape[0])[warm_users_mask]
        if exclude_cold_users:
            cold_user_mask = np.ediff1d(URM_train.tocsr().indptr) == 0
            cold_users = np.arange(URM_train.shape[0])[cold_user_mask]
            ignore_users = np.unique(np.concatenate((cold_users, ignore_users)))
    elif exclude_cold_users:
        print("Excluding cold users...")
        cold_user_mask = np.ediff1d(URM_train.tocsr().indptr) == 0
        ignore_users = np.arange(URM_train.shape[0])[cold_user_mask]
    else:
        ignore_users = None

    cutoff_list = [10]
    evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users)

    # HP tuning
    print("Start tuning...")
    version_path = "../../report/hp_tuning/{}/".format(args.recommender_name)
    now = datetime.now().strftime('%b%d_%H-%M-%S')
    now = now + "_k_out_value_3/"
    version_path = version_path + "/" + now

    runParameterSearch_Collaborative(URM_train=URM_train,
                                     recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
                                     evaluator_validation=evaluator,
                                     metric_to_optimize="MAP",
                                     output_folder_path=version_path,
                                     n_cases=int(args.n_cases),
                                     n_random_starts=int(args.n_random_starts))
    print("...tuning ended")
Exemplo n.º 2
0

if __name__ == '__main__':
    # Data loading
    root_data_path = "../../data/"
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = New_DataSplitter_leave_k_out(data_reader,
                                               k_out_value=K_OUT,
                                               use_validation_set=False,
                                               force_new_split=True,
                                               seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    # Build ICMs
    ICM_all = get_ICM_train(data_reader)

    # Build UCMs
    UCM_all = get_UCM_train(data_reader)

    model = HybridWeightedAverageRecommender(URM_train, normalize=NORMALIZE)

    all_models = _get_all_models(URM_train=URM_train,
                                 UCM_all=UCM_all,
                                 ICM_all=ICM_all)
    for model_name, model_object in all_models.items():
        model.add_fitted_model(model_name, model_object)
    print("The models added in the hybrid are: {}".format(
        list(all_models.keys())))

    # Setting evaluator