Пример #1
0
    def sample_triple_time(self, dump=True, load_save=False):
        """
        Sample triples or load triples samples from files. Only applicable for basket based Recommender
        Returns:
            None

        """
        sample_file_name = ("triple_" + self.config["dataset"] +
                            (("_" + str(self.config["percent"] * 100))
                             if "percent" in self.config else "") +
                            (("_" + str(self.config["time_step"]))
                             if "time_step" in self.config else "_10") + "_" +
                            str(self.config["n_sample"])
                            if "percent" in self.config else "" + ".csv")
        self.process_path = os.path.join(self.config["root_dir"],
                                         self.config["process_dir"])
        ensureDir(self.process_path)
        sample_file = os.path.join(self.process_path, sample_file_name)
        my_sampler = Sampler(
            self.train,
            sample_file,
            self.config["n_sample"],
            dump=dump,
            load_save=load_save,
        )
        return my_sampler.sample_by_time(self.config["time_step"])
Пример #2
0
    def get_adj_mat(self):
        """ Get the adjacent matrix, if not previously stored then call the function to create
        This method is for NGCF model
        Return:
            Different types of adjacment matrix
        """
        self.init_train_items()

        process_file_name = (
            "ngcf_"
            + self.config["dataset"]
            + "_"
            + self.config["data_split"]
            + (
                ("_" + str(self.config["percent"] * 100))
                if "percent" in self.config
                else ""
            )
        )
        self.process_path = os.path.join(
            self.config["root_dir"],
            self.config["process_dir"],
            self.config["dataset"] + "/",
        )
        process_file_name = os.path.join(self.process_path, process_file_name)
        ensureDir(process_file_name)
        print(process_file_name)
        try:
            t1 = time()
            adj_mat = sp.load_npz(os.path.join(process_file_name, "s_adj_mat.npz"))
            norm_adj_mat = sp.load_npz(
                os.path.join(process_file_name, "s_norm_adj_mat.npz")
            )
            mean_adj_mat = sp.load_npz(
                os.path.join(process_file_name, "s_mean_adj_mat.npz")
            )
            print("already load adj matrix", adj_mat.shape, time() - t1)
        except Exception:
            adj_mat, norm_adj_mat, mean_adj_mat = self.create_adj_mat()
            sp.save_npz(os.path.join(process_file_name, "s_adj_mat.npz"), adj_mat)
            sp.save_npz(
                os.path.join(process_file_name, "s_norm_adj_mat.npz"), norm_adj_mat
            )
            sp.save_npz(
                os.path.join(process_file_name, "s_mean_adj_mat.npz"), mean_adj_mat
            )
        return adj_mat, norm_adj_mat, mean_adj_mat
Пример #3
0
def prepare_env(config):
    """Prepare running environment
        - Load parameters from json files.
        - Initialize system folders, model name and the paths to be saved.
        - Initialize resource monitor.
        - Initialize random seed.
        - Initialize logging.

    Args:
        config (dict): Global configs.

    """
    # obtain abspath for the project
    # You need specified it if it is running in the container.
    if "root_dir" not in config:
        file_dir = os.path.dirname(os.path.abspath(__file__))
        config["root_dir"] = os.path.abspath(os.path.join(file_dir, ".."))

    # load config file from json
    with open(config["config_file"]) as config_params:
        print("loading config file", config["config_file"])
        json_config = json.load(config_params)
    print(f"Current pid: {os.getpid()}")
    # update global parameters with these parameters received from the command line .
    json_config.update(config)
    config = json_config

    # construct unique model run id, which consist of model name, config id and a timestamp
    timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
    random_str = "".join(
        [random.choice(string.ascii_lowercase) for n in range(6)])
    config["model_run_id"] = (config["model"] + "_" + config["config_id"] +
                              "_" + timestamp_str + "_" + random_str)
    set_seed(config["seed"] if "seed" in config else 2020)
    initialize_folders(config["root_dir"])

    # Initialize log file
    config["log_file"] = os.path.join(config["root_dir"], config["log_dir"],
                                      config["model_run_id"])
    logger.init_std_logger(config["log_file"])

    print("python version:", sys.version)
    print("pytorch version:", torch.__version__)

    #  File paths to be saved
    config["run_dir"] = os.path.join(config["root_dir"], config["run_dir"],
                                     config["model_run_id"])
    print(
        "The intermediate running statuses will be reported in folder:",
        config["run_dir"],
    )

    #  Model checkpoints paths to be saved
    config["model_save_dir"] = os.path.join(config["root_dir"],
                                            config["checkpoint_dir"],
                                            config["model_run_id"])
    ensureDir(config["model_save_dir"])
    print("Model checkpoint will save in file:", config["model_save_dir"])

    config["result_file"] = os.path.join(config["root_dir"],
                                         config["result_dir"],
                                         config["result_file"])

    print("Performance result will save in file:", config["result_file"])

    # remove comments

    print_dict_as_table(config, "Model configs")
    return config