Exemplo n.º 1
0
def main():
    args = get_arguments()

    # Data loading
    root_data_path = args.reader_path
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=K_OUT, allow_cold_users=ALLOW_COLD_USERS,
                                               use_validation_set=False, force_new_split=True, seed=args.seed)
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    # Remove interactions to users that has len == 1 to URM_train
    len_1_users_mask = np.ediff1d(URM_train.tocsr().indptr) == 1
    len_1_users = np.arange(URM_train.shape[0])[len_1_users_mask]

    URM_train = URM_train.tolil()
    URM_train[len_1_users, :] = 0
    URM_train = URM_train.tocsr()

    # Remove interactions to users that has len == 1 to URM_test
    len_1_users_mask = np.ediff1d(URM_test.tocsr().indptr) == 1
    len_1_users = np.arange(URM_test.shape[0])[len_1_users_mask]

    URM_test = URM_test.tolil()
    URM_test[len_1_users, :] = 0
    URM_test = URM_test.tocsr()

    UCM_all = get_UCM_train_cold(data_reader)

    ignore_users = get_ignore_users(URM_train, data_reader.get_original_user_id_to_index_mapper(),
                                    lower_threshold=args.lower_threshold, upper_threshold=args.upper_threshold,
                                    ignore_non_target_users=args.exclude_non_target)
    ignore_users = np.concatenate([ignore_users, len_1_users])

    # Setting evaluator
    cutoff_list = [10]
    evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users)

    # HP tuning
    print("Start tuning...")
    version_path = "../../report/hp_tuning/{}/".format(args.recommender_name)
    now = datetime.now().strftime('%b%d_%H-%M-%S')
    now = now + "_k_out_value_{}/".format(K_OUT)
    version_path = version_path + "/" + now

    run_parameter_search_user_demographic(URM_train=URM_train, UCM_object=UCM_all, UCM_name="UCM_all",
                                          recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
                                          evaluator_validation=evaluator,
                                          metric_to_optimize="MAP",
                                          output_folder_path=version_path,
                                          parallelizeKNN=True,
                                          n_cases=int(args.n_cases),
                                          n_random_starts=int(args.n_random_starts))

    print("...tuning ended")
Exemplo n.º 2
0
def main():
    args = get_arguments()

    # Data loading
    data_reader = RecSys2019Reader(args.reader_path)
    data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=3, use_validation_set=False,
                                               force_new_split=True, seed=args.seed)
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    if args.recommender_name == "sslim_bpr":
        ICM_all = get_ICM_train(data_reader)
        URM_train = sps.vstack([URM_train, ICM_all.T], format="csr")
    if args.recommender_name == "rp3beta_side":
        ICM_all = get_ICM_train(data_reader)
        URM_train = sps.vstack([URM_train, ICM_all.T], format="csr")
        URM_train = TF_IDF(URM_train).tocsr()
    if args.recommender_name == "pure_svd":
        URM_train = TF_IDF(URM_train).tocsr()
    if args.recommender_name == "pure_svd_side":
        ICM_all = get_ICM_train(data_reader)
        URM_train = sps.vstack([URM_train, ICM_all.T], format="csr")

    # Setting evaluator
    exclude_cold_users = args.exclude_users
    h = int(args.focus_on_high)
    fol = int(args.focus_on_low)
    if h != 0:
        print("Excluding users with less than {} interactions".format(h))
        ignore_users_mask = np.ediff1d(URM_train.tocsr().indptr) < h
        ignore_users = np.arange(URM_train.shape[0])[ignore_users_mask]
    elif fol != 0:
        print("Excluding users with more than {} interactions".format(fol))
        warm_users_mask = np.ediff1d(URM_train.tocsr().indptr) > fol
        ignore_users = np.arange(URM_train.shape[0])[warm_users_mask]
        if exclude_cold_users:
            cold_user_mask = np.ediff1d(URM_train.tocsr().indptr) == 0
            cold_users = np.arange(URM_train.shape[0])[cold_user_mask]
            ignore_users = np.unique(np.concatenate((cold_users, ignore_users)))
    elif exclude_cold_users:
        print("Excluding cold users...")
        cold_user_mask = np.ediff1d(URM_train.tocsr().indptr) == 0
        ignore_users = np.arange(URM_train.shape[0])[cold_user_mask]
    else:
        ignore_users = None

    cutoff_list = [10]
    evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users)

    # HP tuning
    print("Start tuning...")
    version_path = "../../report/hp_tuning/{}/".format(args.recommender_name)
    now = datetime.now().strftime('%b%d_%H-%M-%S')
    now = now + "_k_out_value_3/"
    version_path = version_path + "/" + now

    runParameterSearch_Collaborative(URM_train=URM_train,
                                     recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
                                     evaluator_validation=evaluator,
                                     metric_to_optimize="MAP",
                                     output_folder_path=version_path,
                                     n_cases=int(args.n_cases),
                                     n_random_starts=int(args.n_random_starts))
    print("...tuning ended")
from src.data_management.New_DataSplitter_leave_k_out import *
from src.data_management.RecSys2019Reader import RecSys2019Reader
from src.data_management.RecSys2019Reader_utils import merge_UCM, get_ICM_numerical
from src.data_management.data_getter import get_warmer_UCM
from src.model import best_models
from src.utils.general_utility_functions import get_split_seed

if __name__ == '__main__':
    os.environ["MKL_NUM_THREADS"] = "1"
    os.environ["OPENBLAS_NUM_THREADS"] = "1"

    # Data loading
    data_reader = RecSys2019Reader("../data/")
    data_reader = New_DataSplitter_leave_k_out(data_reader,
                                               k_out_value=3,
                                               use_validation_set=False,
                                               force_new_split=True,
                                               seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    # Build ICMs
    ICM_numerical, _ = get_ICM_numerical(data_reader.dataReader_object)
    ICM = data_reader.get_ICM_from_name("ICM_sub_class")
    ICM_all, _ = merge_ICM(ICM, URM_train.transpose(), {}, {})

    # Build UCMs
    URM_all = data_reader.dataReader_object.get_URM_all()
    UCM_age = data_reader.dataReader_object.get_UCM_from_name("UCM_age")
    UCM_region = data_reader.dataReader_object.get_UCM_from_name("UCM_region")
    UCM_age_region, _ = merge_UCM(UCM_age, UCM_region, {}, {})
Exemplo n.º 4
0
from src.model import new_best_models
from src.model.FactorizationMachine.FieldAwareFMRecommender import FieldAwareFMRecommender
from src.utils.general_utility_functions import get_split_seed, get_project_root_path

if __name__ == '__main__':
    set_env_variables()

    # Data loading
    root_data_path = "../data/"
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = DataPreprocessingRemoveColdUsersItems(data_reader,
                                                        threshold_users=25,
                                                        threshold_items=20)
    data_reader = New_DataSplitter_leave_k_out(data_reader,
                                               k_out_value=1,
                                               use_validation_set=False,
                                               force_new_split=True,
                                               seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    # Build ICMs
    ICM_all, item_feature_fields = get_ICM_with_fields(data_reader)

    # Build UCMs: do not change the order of ICMs and UCMs
    UCM_all, user_feature_fields = get_UCM_with_fields(data_reader)

    cold_users_mask = np.ediff1d(URM_train.tocsr().indptr) == 0
    cold_users = np.arange(URM_train.shape[0])[cold_users_mask]

    cutoff_list = [10]
Exemplo n.º 5
0
    all_models[
        'ItemCBF_CF'] = best_models_lower_threshold_23.ItemCBF_CF.get_model(
            URM_train=URM_train, ICM_train=ICM_all)
    all_models['ItemCBF_FW'] = new_best_models.ItemCBF_all_FW.get_model(
        URM_train=URM_train, ICM_train=ICM_all)

    return all_models


if __name__ == '__main__':
    # Data loading
    root_data_path = "../../data/"
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = New_DataSplitter_leave_k_out(data_reader,
                                               k_out_value=K_OUT,
                                               use_validation_set=False,
                                               force_new_split=True,
                                               seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    # Build ICMs
    ICM_all = get_ICM_train(data_reader)

    # Build UCMs
    UCM_all = get_UCM_train(data_reader)

    model = HybridWeightedAverageRecommender(URM_train, normalize=NORMALIZE)

    all_models = _get_all_models(URM_train=URM_train,
                                 UCM_all=UCM_all,
from src.data_management.New_DataSplitter_leave_k_out import *
from src.data_management.RecSys2019Reader import RecSys2019Reader
from src.data_management.data_reader import get_UCM_train, get_ICM_train_new
from src.data_management.dataframe_preprocessing import get_preprocessed_dataframe
from src.feature.demographics_content import get_user_demographic
from src.model import new_best_models, k_1_out_best_models
from src.model.KNN.ItemKNNCBFCFRecommender import ItemKNNCBFCFRecommender
from src.plots.recommender_plots import *
from src.utils.general_utility_functions import get_split_seed

if __name__ == '__main__':
    root_data_path = "../../data"
    k_out = 1
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=k_out, use_validation_set=False,
                                               force_new_split=True, seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    ICM_all, _ = get_ICM_train_new(data_reader)
    UCM_all = get_UCM_train(data_reader)

    item_cbf_cf_parm = new_best_models.ItemCBF_CF.get_best_parameters()
    item_cbf_cf_parm['topK'] = 5
    model = ItemKNNCBFCFRecommender(URM_train=URM_train, ICM_train=ICM_all)
    model.fit(**item_cbf_cf_parm)
    model.RECOMMENDER_NAME = "ItemCBFCF"

    sub4 = k_1_out_best_models.HybridNormWeightedAvgAll.get_model(URM_train, ICM_all, UCM_all)
    recommender_list = [model, sub4]