def sample_triple_time(self, dump=True, load_save=False): """ Sample triples or load triples samples from files. Only applicable for basket based Recommender Returns: None """ sample_file_name = ("triple_" + self.config["dataset"] + (("_" + str(self.config["percent"] * 100)) if "percent" in self.config else "") + (("_" + str(self.config["time_step"])) if "time_step" in self.config else "_10") + "_" + str(self.config["n_sample"]) if "percent" in self.config else "" + ".csv") self.process_path = os.path.join(self.config["root_dir"], self.config["process_dir"]) ensureDir(self.process_path) sample_file = os.path.join(self.process_path, sample_file_name) my_sampler = Sampler( self.train, sample_file, self.config["n_sample"], dump=dump, load_save=load_save, ) return my_sampler.sample_by_time(self.config["time_step"])
def get_adj_mat(self): """ Get the adjacent matrix, if not previously stored then call the function to create This method is for NGCF model Return: Different types of adjacment matrix """ self.init_train_items() process_file_name = ( "ngcf_" + self.config["dataset"] + "_" + self.config["data_split"] + ( ("_" + str(self.config["percent"] * 100)) if "percent" in self.config else "" ) ) self.process_path = os.path.join( self.config["root_dir"], self.config["process_dir"], self.config["dataset"] + "/", ) process_file_name = os.path.join(self.process_path, process_file_name) ensureDir(process_file_name) print(process_file_name) try: t1 = time() adj_mat = sp.load_npz(os.path.join(process_file_name, "s_adj_mat.npz")) norm_adj_mat = sp.load_npz( os.path.join(process_file_name, "s_norm_adj_mat.npz") ) mean_adj_mat = sp.load_npz( os.path.join(process_file_name, "s_mean_adj_mat.npz") ) print("already load adj matrix", adj_mat.shape, time() - t1) except Exception: adj_mat, norm_adj_mat, mean_adj_mat = self.create_adj_mat() sp.save_npz(os.path.join(process_file_name, "s_adj_mat.npz"), adj_mat) sp.save_npz( os.path.join(process_file_name, "s_norm_adj_mat.npz"), norm_adj_mat ) sp.save_npz( os.path.join(process_file_name, "s_mean_adj_mat.npz"), mean_adj_mat ) return adj_mat, norm_adj_mat, mean_adj_mat
def prepare_env(config): """Prepare running environment - Load parameters from json files. - Initialize system folders, model name and the paths to be saved. - Initialize resource monitor. - Initialize random seed. - Initialize logging. Args: config (dict): Global configs. """ # obtain abspath for the project # You need specified it if it is running in the container. if "root_dir" not in config: file_dir = os.path.dirname(os.path.abspath(__file__)) config["root_dir"] = os.path.abspath(os.path.join(file_dir, "..")) # load config file from json with open(config["config_file"]) as config_params: print("loading config file", config["config_file"]) json_config = json.load(config_params) print(f"Current pid: {os.getpid()}") # update global parameters with these parameters received from the command line . json_config.update(config) config = json_config # construct unique model run id, which consist of model name, config id and a timestamp timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S") random_str = "".join( [random.choice(string.ascii_lowercase) for n in range(6)]) config["model_run_id"] = (config["model"] + "_" + config["config_id"] + "_" + timestamp_str + "_" + random_str) set_seed(config["seed"] if "seed" in config else 2020) initialize_folders(config["root_dir"]) # Initialize log file config["log_file"] = os.path.join(config["root_dir"], config["log_dir"], config["model_run_id"]) logger.init_std_logger(config["log_file"]) print("python version:", sys.version) print("pytorch version:", torch.__version__) # File paths to be saved config["run_dir"] = os.path.join(config["root_dir"], config["run_dir"], config["model_run_id"]) print( "The intermediate running statuses will be reported in folder:", config["run_dir"], ) # Model checkpoints paths to be saved config["model_save_dir"] = os.path.join(config["root_dir"], config["checkpoint_dir"], config["model_run_id"]) ensureDir(config["model_save_dir"]) print("Model checkpoint will save in file:", config["model_save_dir"]) config["result_file"] = os.path.join(config["root_dir"], config["result_dir"], config["result_file"]) print("Performance result will save in file:", config["result_file"]) # remove comments print_dict_as_table(config, "Model configs") return config