def load_model(self, folder_path, file_name = None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Loading model from file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) data_dict = dataIO.load_data(file_name=file_name) self.args = ArgsInterface() for attrib_name in data_dict.keys(): if attrib_name.startswith("_args_"): data_dict_key = attrib_name attrib_name = attrib_name[len("_args_"):] setattr(self.args, attrib_name, data_dict[data_dict_key]) else: self.__setattr__(attrib_name, data_dict[attrib_name]) self.dataset = DatasetInterface(URM_train = self.URM_train) ConvNCF.tf.reset_default_graph() self.sess = ConvNCF.tf.Session() ConvNCF.TRAIN_KEEP_PROB = self.args.keep self.model = ConvNCF.ConvNCF(self.dataset.num_users, self.dataset.num_items, self.args) self.model.build_graph() ConvNCF.initialize(self.model, self.dataset, self.args) ConvNCF._model = self.model saver = ConvNCF.tf.train.Saver() self.sess = ConvNCF.tf.Session() saver.restore(self.sess, folder_path + file_name + "_session") ConvNCF._sess = self.sess self._print("Loading complete")
def _run_epoch(self, currentEpoch): batch_time, train_time = ConvNCF.run_epoch(model=self.model, epoch_count=currentEpoch, args=self.args, dataset=self.dataset, verbose=False, original_evaluation=False) print("{}: Epoch: {} batch cost: {} train cost: {}".format(self.RECOMMENDER_NAME,currentEpoch, batch_time, train_time))
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "gowalla": dataset = GowallaReader(result_folder_path) elif dataset_name == "yelp": dataset = YelpReader(result_folder_path) else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() print_negative_items_stats(URM_train, URM_validation, URM_test, URM_test_negative) # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) # URM_test_negative contains duplicates in both train and test assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample cutoff_list_validation = [10] cutoff_list_test = [5, 10, 20] evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: # Providing an empty matrix to URM_negative for the train samples article_hyperparameters = { "batch_size": 512, "epochs": 1500, "epochs_MFBPR": 500, "embedding_size": 64, "hidden_size": 128, "negative_sample_per_positive": 1, "negative_instances_per_positive": 4, "regularization_users_items": 0.01, "regularization_weights": 10, "regularization_filter_weights": 1, "learning_rate_embeddings": 0.05, "learning_rate_CNN": 0.05, "channel_size": [32, 32, 32, 32, 32, 32], "dropout": 0.0, "epoch_verbose": 1, } earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 150 } parameterSearch = SearchSingleCase( ConvNCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME) #remember to close the global session since use global variables ConvNCF.close_session(verbose=True) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ConvNCF_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=cutoff_list_test, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=cutoff_list_validation, table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def fit(self, batch_size=512, epochs=1500, epochs_MFBPR=500, load_pretrained_MFBPR_if_available=False, embedding_size=64, hidden_size=128, negative_sample_per_positive=1, negative_instances_per_positive=4, regularization_users_items=0.01, regularization_weights=10, regularization_filter_weights=1, learning_rate_embeddings=0.05, learning_rate_CNN=0.05, channel_size=[32, 32, 32, 32, 32, 32], dropout=0.0, epoch_verbose=25, temp_file_folder=None, **earlystopping_kwargs): if load_pretrained_MFBPR_if_available: self.temp_file_folder = temp_file_folder self._use_default_temp_folder = False else: self.temp_file_folder = self._get_unique_temp_folder( input_temp_file_folder=temp_file_folder) # initialize models print("{}: Init model...".format(self.RECOMMENDER_NAME)) self.dataset = DatasetInterface(URM_train=self.URM_train) self.epochs_best_MFBPR = None self.args = ArgsInterface() self.args.dataset = 'no_dataset_name' self.args.model = self.RECOMMENDER_NAME self.args.verbose = epoch_verbose self.args.batch_size = batch_size self.args.embed_size = embedding_size self.args.hidden_size = hidden_size self.args.dns = negative_sample_per_positive self.args.regs = [ regularization_users_items, regularization_weights, regularization_filter_weights ] self.args.task = 'no_task_name' self.args.num_neg = negative_instances_per_positive self.args.lr_embed = learning_rate_embeddings self.args.lr_net = learning_rate_CNN self.args.net_channel = channel_size self.args.pretrain = 1 self.args.ckpt = 0 self.args.train_auc = 0 self.args.keep = 1 - dropout self.args.path_partial_results = self.temp_file_folder # Pre train the weights for ConvNCF net if load_pretrained_MFBPR_if_available and os.path.isfile( self.args.path_partial_results + "best_model_latent_factors.npy"): print("{}: MF_BPR_model found in '{}', skipping training!".format( self.RECOMMENDER_NAME, self.args.path_partial_results)) else: print("{}: MF_BPR_model not found in '{}', training!".format( self.RECOMMENDER_NAME, self.args.path_partial_results)) MF_BPR_model = MFBPR_Wrapper(self.URM_train) MF_BPR_model.fit( batch_size=512, epochs=epochs_MFBPR, embed_size=64, negative_sample_per_positive=1, regularization_users=0.01, regularization_items=0.0, learning_rate=0.05, epoch_evaluation=25, train_auc_verbose=0, path_partial_results=self.args.path_partial_results, **earlystopping_kwargs, ) self.epochs_best_MFBPR = MF_BPR_model.epochs_best_MFBPR MF_BPR_model._dealloc_global_variables() ConvNCF.init_logging(self.args) ConvNCF.TRAIN_KEEP_PROB = self.args.keep ConvNCF.tf.reset_default_graph() self.model = ConvNCF.ConvNCF(self.dataset.num_users, self.dataset.num_items, self.args) self.model.build_graph() ConvNCF.initialize(self.model, self.dataset, self.args) self.sess = ConvNCF.get_session() print("{}: Init model... done!".format(self.RECOMMENDER_NAME)) self._update_best_model() self._train_with_early_stopping(epochs_max=epochs, algorithm_name=self.RECOMMENDER_NAME, **earlystopping_kwargs) # close session tensorflow ConvNCF.close_session() self.sess = ConvNCF.tf.Session() self.load_model(self.temp_file_folder, file_name="_best_model") self._print("Training complete") self._clean_temp_folder(temp_file_folder=self.temp_file_folder)