Пример #1
0
    def load_model(self, folder_path, file_name = None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Loading model from file '{}'".format(folder_path + file_name))

        dataIO = DataIO(folder_path=folder_path)
        data_dict = dataIO.load_data(file_name=file_name)

        self.args = ArgsInterface()


        for attrib_name in data_dict.keys():

            if attrib_name.startswith("_args_"):
                data_dict_key = attrib_name
                attrib_name = attrib_name[len("_args_"):]
                setattr(self.args, attrib_name, data_dict[data_dict_key])

            else:
                self.__setattr__(attrib_name, data_dict[attrib_name])


        self.dataset = DatasetInterface(URM_train = self.URM_train)

        ConvNCF.tf.reset_default_graph()

        self.sess = ConvNCF.tf.Session()

        ConvNCF.TRAIN_KEEP_PROB = self.args.keep
        self.model = ConvNCF.ConvNCF(self.dataset.num_users, self.dataset.num_items, self.args)
        self.model.build_graph()
        ConvNCF.initialize(self.model, self.dataset, self.args)
        ConvNCF._model = self.model


        saver = ConvNCF.tf.train.Saver()
        self.sess = ConvNCF.tf.Session()
        saver.restore(self.sess, folder_path + file_name + "_session")

        ConvNCF._sess = self.sess

        self._print("Loading complete")
Пример #2
0
    def _run_epoch(self, currentEpoch):

        batch_time, train_time = ConvNCF.run_epoch(model=self.model,
                                                   epoch_count=currentEpoch,
                                                   args=self.args,
                                                   dataset=self.dataset,
                                                   verbose=False,
                                                   original_evaluation=False)

        print("{}: Epoch: {} batch cost: {} train cost: {}".format(self.RECOMMENDER_NAME,currentEpoch, batch_time, train_time))
Пример #3
0
def read_data_split_and_search(dataset_name,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    result_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)

    if dataset_name == "gowalla":
        dataset = GowallaReader(result_folder_path)

    elif dataset_name == "yelp":
        dataset = YelpReader(result_folder_path)

    else:
        print("Dataset name not supported, current is {}".format(dataset_name))
        return

    print('Current dataset is: {}'.format(dataset_name))

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()
    URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy()

    print_negative_items_stats(URM_train, URM_validation, URM_test,
                               URM_test_negative)

    # Ensure IMPLICIT data
    from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices

    assert_implicit_data(
        [URM_train, URM_validation, URM_test, URM_test_negative])

    # URM_test_negative contains duplicates in both train and test
    assert_disjoint_matrices([URM_train, URM_validation, URM_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    metric_to_optimize = "NDCG"
    n_cases = 50
    n_random_starts = 15

    from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample

    cutoff_list_validation = [10]
    cutoff_list_test = [5, 10, 20]

    evaluator_validation = EvaluatorNegativeItemSample(
        URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation)
    evaluator_test = EvaluatorNegativeItemSample(URM_test,
                                                 URM_test_negative,
                                                 cutoff_list=cutoff_list_test)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        # Providing an empty matrix to URM_negative for the train samples
        article_hyperparameters = {
            "batch_size": 512,
            "epochs": 1500,
            "epochs_MFBPR": 500,
            "embedding_size": 64,
            "hidden_size": 128,
            "negative_sample_per_positive": 1,
            "negative_instances_per_positive": 4,
            "regularization_users_items": 0.01,
            "regularization_weights": 10,
            "regularization_filter_weights": 1,
            "learning_rate_embeddings": 0.05,
            "learning_rate_CNN": 0.05,
            "channel_size": [32, 32, 32, 32, 32, 32],
            "dropout": 0.0,
            "epoch_verbose": 1,
        }

        earlystopping_hyperparameters = {
            "validation_every_n": 5,
            "stop_on_validation": True,
            "lower_validations_allowed": 5,
            "evaluator_object": evaluator_validation,
            "validation_metric": metric_to_optimize,
            "epochs_min": 150
        }

        parameterSearch = SearchSingleCase(
            ConvNCF_RecommenderWrapper,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            0] = URM_train + URM_validation

        parameterSearch.search(
            recommender_input_args,
            recommender_input_args_last_test=recommender_input_args_last_test,
            fit_hyperparameters_values=article_hyperparameters,
            output_folder_path=result_folder_path,
            resume_from_saved=True,
            output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME)

        #remember to close the global session since use global variables
        ConvNCF.close_session(verbose=True)

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[ConvNCF_RecommenderWrapper],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=None,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["HIT_RATE", "NDCG"],
            cutoffs_list=cutoff_list_test,
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1",
                "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY",
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=cutoff_list_validation,
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
    def fit(self,
            batch_size=512,
            epochs=1500,
            epochs_MFBPR=500,
            load_pretrained_MFBPR_if_available=False,
            embedding_size=64,
            hidden_size=128,
            negative_sample_per_positive=1,
            negative_instances_per_positive=4,
            regularization_users_items=0.01,
            regularization_weights=10,
            regularization_filter_weights=1,
            learning_rate_embeddings=0.05,
            learning_rate_CNN=0.05,
            channel_size=[32, 32, 32, 32, 32, 32],
            dropout=0.0,
            epoch_verbose=25,
            temp_file_folder=None,
            **earlystopping_kwargs):

        if load_pretrained_MFBPR_if_available:
            self.temp_file_folder = temp_file_folder
            self._use_default_temp_folder = False
        else:
            self.temp_file_folder = self._get_unique_temp_folder(
                input_temp_file_folder=temp_file_folder)

        # initialize models
        print("{}: Init model...".format(self.RECOMMENDER_NAME))

        self.dataset = DatasetInterface(URM_train=self.URM_train)
        self.epochs_best_MFBPR = None

        self.args = ArgsInterface()
        self.args.dataset = 'no_dataset_name'
        self.args.model = self.RECOMMENDER_NAME
        self.args.verbose = epoch_verbose
        self.args.batch_size = batch_size
        self.args.embed_size = embedding_size
        self.args.hidden_size = hidden_size
        self.args.dns = negative_sample_per_positive
        self.args.regs = [
            regularization_users_items, regularization_weights,
            regularization_filter_weights
        ]
        self.args.task = 'no_task_name'
        self.args.num_neg = negative_instances_per_positive
        self.args.lr_embed = learning_rate_embeddings
        self.args.lr_net = learning_rate_CNN
        self.args.net_channel = channel_size
        self.args.pretrain = 1
        self.args.ckpt = 0
        self.args.train_auc = 0
        self.args.keep = 1 - dropout
        self.args.path_partial_results = self.temp_file_folder

        # Pre train the weights for ConvNCF net
        if load_pretrained_MFBPR_if_available and os.path.isfile(
                self.args.path_partial_results +
                "best_model_latent_factors.npy"):
            print("{}: MF_BPR_model found in '{}', skipping training!".format(
                self.RECOMMENDER_NAME, self.args.path_partial_results))

        else:
            print("{}: MF_BPR_model not found in '{}', training!".format(
                self.RECOMMENDER_NAME, self.args.path_partial_results))

            MF_BPR_model = MFBPR_Wrapper(self.URM_train)
            MF_BPR_model.fit(
                batch_size=512,
                epochs=epochs_MFBPR,
                embed_size=64,
                negative_sample_per_positive=1,
                regularization_users=0.01,
                regularization_items=0.0,
                learning_rate=0.05,
                epoch_evaluation=25,
                train_auc_verbose=0,
                path_partial_results=self.args.path_partial_results,
                **earlystopping_kwargs,
            )

            self.epochs_best_MFBPR = MF_BPR_model.epochs_best_MFBPR

            MF_BPR_model._dealloc_global_variables()

        ConvNCF.init_logging(self.args)
        ConvNCF.TRAIN_KEEP_PROB = self.args.keep
        ConvNCF.tf.reset_default_graph()

        self.model = ConvNCF.ConvNCF(self.dataset.num_users,
                                     self.dataset.num_items, self.args)
        self.model.build_graph()
        ConvNCF.initialize(self.model, self.dataset, self.args)
        self.sess = ConvNCF.get_session()

        print("{}: Init model... done!".format(self.RECOMMENDER_NAME))

        self._update_best_model()

        self._train_with_early_stopping(epochs_max=epochs,
                                        algorithm_name=self.RECOMMENDER_NAME,
                                        **earlystopping_kwargs)

        # close session tensorflow
        ConvNCF.close_session()

        self.sess = ConvNCF.tf.Session()

        self.load_model(self.temp_file_folder, file_name="_best_model")

        self._print("Training complete")

        self._clean_temp_folder(temp_file_folder=self.temp_file_folder)