Пример #1
0
    def train(self):
        """Train the model."""
        self.load_dataset()
        self.gpu_id, self.config["device_str"] = self.get_device()
        """ Main training navigator

        Returns:

        """

        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.data.instance_vae_loader(
            batch_size=self.config["model"]["batch_size"],
            device=self.config["model"]["device_str"],
        )

        self.config["model"]["n_items"] = self.data.n_items
        self.config["model"]["n_users"] = self.data.n_users
        self.engine = VAECFEngine(self.config)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #2
0
    def train_mlp(self):
        """ Train MLP

        Returns:
            None
        """
        # Train MLP
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["model"]["num_negative"],
            self.config["model"]["batch_size"])
        self.engine = MLPEngine(self.config)
        self.mlp_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["mlp_config"]["save_name"],
        )
        self._train(self.engine, train_loader, self.mlp_save_dir)

        while self.eval_engine.n_worker:
            print("Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.data.test, self.engine.model)
Пример #3
0
    def train(self):
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        for epoch in range(self.config["model"]["max_epoch"]):
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=self.model_save_dir)

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break

            train_loader = self.sample_generator.pairwise_negative_train_loader(
                self.config["model"]["batch_size"],
                self.config["model"]["device_str"])
            self.engine.train_an_epoch(epoch_id=epoch,
                                       train_loader=train_loader)
            self.eval_engine.train_eval(self.data.valid[0], self.data.test[0],
                                        self.engine.model, epoch)
        self.config["run_time"] = self.monitor.stop()
Пример #4
0
    def train(self):
        """Train the model."""
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])

        if self.config["model"]["loss"] == "bpr":
            train_loader = self.data.instance_bpr_loader(
                batch_size=self.config["model"]["batch_size"],
                device=self.config["model"]["device_str"],
            )
        elif self.config["model"]["loss"] == "bce":
            train_loader = self.data.instance_bce_loader(
                num_negative=self.config["model"]["num_negative"],
                batch_size=self.config["model"]["batch_size"],
                device=self.config["model"]["device_str"],
            )
        else:
            raise ValueError(
                f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'"
            )

        self.engine = LCFNEngine(self.config)
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()

        return self.eval_engine.best_valid_performance
Пример #5
0
    def train(self):
        if self.config["pretrain"] == "gmf":
            user_embed, item_embed = self.train_gmf()
            model = self.cmnengine(self.config, user_embed, item_embed,
                                   self.data.item_users_list)
            self.monitor = Monitor(log_dir=self.config["run_dir"],
                                   delay=1,
                                   gpu_id=self.gpu_id)
            self.model_dir = os.path.join(self.config["model_save_dir"],
                                          self.config["save_name"])
            for epoch in range(config["max_epoch"]):
                print(f"Epoch {epoch} starts !")
                print("-" * 80)
                if epoch > 0 and self.eval_engine.n_no_update == 0:
                    # previous epoch have already obtained better result
                    model.save_checkpoint(model_dir=self.model_dir)

                if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                    print(
                        "Early stop criterion triggered, no performance update for {:} times"
                        .format(MAX_N_UPDATE))
                    break

                train_loader = self.data
                model.train_an_epoch(epoch_id=epoch, train_loader=train_loader)

                self.eval_engine.train_eval(self.data.valid[0],
                                            self.data.test[0], model.model,
                                            epoch)
            self.config["run_time"] = self.monitor.stop()
            self.eval_engine.test_eval(self.data.test, model.model)
Пример #6
0
    def train(self):
        """Default train implementation

        """
        self.load_dataset()
        self.train_data = self.data.sample_triple()
        self.config["model"]["alpha_step"] = (
            1 - self.config["model"]["alpha"]) / (
                self.config["model"]["max_epoch"])
        self.config["user_fea"] = self.data.user_feature
        self.config["item_fea"] = self.data.item_feature
        self.engine = VBCAREngine(self.config)
        self.engine.data = self.data
        assert hasattr(self,
                       "engine"), "Please specify the exact model engine !"
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        print("Start training... ")
        epoch_bar = tqdm(range(self.config["model"]["max_epoch"]),
                         file=sys.stdout)
        for epoch in epoch_bar:
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=os.path.join(
                    self.config["system"]["model_save_dir"], "model.cpk"))

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break
            data_loader = DataLoader(
                torch.LongTensor(self.train_data.to_numpy()).to(
                    self.engine.device),
                batch_size=self.config["model"]["batch_size"],
                shuffle=True,
                drop_last=True,
            )
            self.engine.train_an_epoch(data_loader, epoch_id=epoch)
            self.eval_engine.train_eval(self.data.valid[0], self.data.test[0],
                                        self.engine.model, epoch)
            # anneal alpha
            self.engine.model.alpha = min(
                self.config["model"]["alpha"] +
                math.exp(epoch - self.config["model"]["max_epoch"] + 20),
                1,
            )
            """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
            lr = self.config["model"]["lr"] * (0.5**(epoch // 10))
            for param_group in self.engine.optimizer.param_groups:
                param_group["lr"] = lr
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #7
0
    def train(self):
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)

        # Train GCN
        self.engine = GCN_SEngine(self.config["gcn_config"])
        train_loader = self.dataset
        self.gcn_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["gcn_config"]["save_name"])
        self._train(engine=self.engine,
                    train_loader=train_loader,
                    save_dir=self.gcn_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        # Train MLP
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["num_negative"], self.config["batch_size"])
        self.engine = MLPEngine(self.config["mlp_config"],
                                gcn_config=self.config["gcn_config"])
        self.mlp_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["mlp_config"]["save_name"])
        self._train(engine=self.engine,
                    train_loader=train_loader,
                    save_dir=self.mlp_save_dir)

        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the

        # Train ncf
        self.engine = NeuMFEngine(
            self.config["neumf_config"],
            mlp_config=self.config["mlp_config"],
            gcn_config=self.config["gcn_config"],
        )
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(
            engine=self.engine,
            train_loader=train_loader,
            save_dir=self.neumf_save_dir,
        )

        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
Пример #8
0
 def train(self):
     """Train and test NARM."""
     self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                            delay=1,
                            gpu_id=self.gpu_id)
     train_loader = self.load_train_data
     self.engine = NARMEngine(self.config)
     self.narm_save_dir = os.path.join(
         self.config["system"]["model_save_dir"],
         self.config["model"]["save_name"])
     self._train(self.engine, train_loader, self.narm_save_dir)
     self.config["run_time"] = self.monitor.stop()
     self.seq_eval_engine.test_eval_seq(self.test_data, self.engine)
Пример #9
0
 def train_ncf(self):
     """Train NeuMF."""
     self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                            delay=1,
                            gpu_id=self.gpu_id)
     train_loader = self.sample_generator.instance_a_train_loader(
         self.config["model"]["num_negative"],
         self.config["model"]["batch_size"])
     self.engine = NeuMFEngine(self.config)
     self.neumf_save_dir = os.path.join(
         self.config["system"]["model_save_dir"],
         self.config["model"]["neumf_config"]["save_name"],
     )
     self._train(self.engine, train_loader, self.neumf_save_dir)
     self.config["run_time"] = self.monitor.stop()
     self.eval_engine.test_eval(self.data.test, self.engine.model)
Пример #10
0
 def train(self):
     """Train the model."""
     self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                            delay=1,
                            gpu_id=self.gpu_id)
     self.model_save_dir = os.path.join(
         self.config["system"]["model_save_dir"],
         self.config["model"]["save_name"])
     self.engine = LightGCNEngine(self.config)
     train_loader = self.data.instance_bpr_loader(
         batch_size=self.config["model"]["batch_size"],
         device=self.config["model"]["device_str"],
     )
     self._train(self.engine, train_loader, self.model_save_dir)
     self.config["run_time"] = self.monitor.stop()
     return self.eval_engine.best_valid_performance
Пример #11
0
class VBCAR_train(TrainEngine):
    """ An instance class from the TrainEngine base class

    """
    def __init__(self, config):
        """Constructor

                Args:
                    config (dict): All the parameters for the model
        """
        self.config = config
        super(VBCAR_train, self).__init__(self.config)
        self.load_dataset()
        self.train_data = self.dataset.sample_triple()
        self.config["alpha_step"] = (1 - self.config["alpha"]) / (
            self.config["max_epoch"])
        self.engine = VBCAREngine(self.config)

    def train(self):
        """Default train implementation

        """
        assert hasattr(self,
                       "engine"), "Please specify the exact model engine !"
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.engine.data = self.dataset
        print("Start training... ")
        epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout)
        for epoch in epoch_bar:
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=os.path.join(
                    self.config["model_save_dir"], "model.cpk"))

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break
            data_loader = self.build_data_loader()
            self.engine.train_an_epoch(data_loader, epoch_id=epoch)
            self.eval_engine.train_eval(self.dataset.valid[0],
                                        self.dataset.test[0],
                                        self.engine.model, epoch)
            # anneal alpha
            self.engine.model.alpha = min(
                self.config["alpha"] +
                math.exp(epoch - self.config["max_epoch"] + 20),
                1,
            )
            """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
            lr = self.config["lr"] * (0.5**(epoch // 10))
            for param_group in self.engine.optimizer.param_groups:
                param_group["lr"] = lr
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #12
0
class LightGCN_train(TrainEngine):
    """An instance class from the TrainEngine base class."""
    def __init__(self, config):
        """Initialize LightGCN_train Class.

        Args:
            config (dict): All the parameters for the model.
        """
        self.config = config
        super(LightGCN_train, self).__init__(config)
        self.load_dataset()
        self.build_data_loader()
        self.engine = LightGCNEngine(self.config)

    def build_data_loader(self):
        """Missing Doc."""
        # ToDo: Please define the directory to store the adjacent matrix
        self.sample_generator = DataLoaderBase(ratings=self.data.train)
        adj_mat, norm_adj_mat, mean_adj_mat = self.sample_generator.get_adj_mat(
            self.config)
        norm_adj = sparse_mx_to_torch_sparse_tensor(norm_adj_mat)
        self.config["model"]["norm_adj"] = norm_adj
        self.config["model"]["n_users"] = self.data.n_users
        self.config["model"]["n_items"] = self.data.n_items

    def train(self):
        """Train the model."""
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        self.max_n_update = self.config["model"]["max_n_update"]
        for epoch in range(self.config["model"]["max_epoch"]):
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=self.model_save_dir)

            if self.eval_engine.n_no_update >= self.max_n_update:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(self.max_n_update))
                break

            train_loader = self.sample_generator.pairwise_negative_train_loader(
                self.config["model"]["batch_size"],
                self.config["model"]["device_str"])
            self.engine.train_an_epoch(epoch_id=epoch,
                                       train_loader=train_loader)
            self.eval_engine.train_eval(self.data.valid[0], self.data.test[0],
                                        self.engine.model, epoch)
        self.config["run_time"] = self.monitor.stop()

    def test(self):
        """Test the model."""
        self.engine.resume_checkpoint(model_dir=self.model_save_dir)
        super(LightGCN_train, self).test()
Пример #13
0
class LCFN_train(TrainEngine):
    """An instance class from the TrainEngine base class."""
    def __init__(self, config):
        """Initialize NGCF_train Class.

        Args:
            config (dict): All the parameters for the model.
        """
        self.config = config
        print(config)
        super(LCFN_train, self).__init__(self.config)
        self.load_dataset()
        self.build_data_loader()

    def build_data_loader(self):
        """Missing Doc."""
        # ToDo: Please define the directory to store the adjacent matrix
        self.gpu_id, self.config["model"]["device_str"] = self.get_device()
        self.sample_generator = DataLoaderBase(ratings=self.data.train)
        graph_embeddings = self.sample_generator.get_graph_embeddings(
            self.config)

        self.config["model"]["graph_embeddings"] = graph_embeddings

        self.config["model"]["n_users"] = self.data.n_users
        self.config["model"]["n_items"] = self.data.n_items

    def train(self):
        """Train the model."""
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])

        if self.config["model"]["loss"] == "bpr":
            train_loader = self.data.instance_bpr_loader(
                batch_size=self.config["model"]["batch_size"],
                device=self.config["model"]["device_str"],
            )
        elif self.config["model"]["loss"] == "bce":
            train_loader = self.data.instance_bce_loader(
                num_negative=self.config["model"]["num_negative"],
                batch_size=self.config["model"]["batch_size"],
                device=self.config["model"]["device_str"],
            )
        else:
            raise ValueError(
                f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'"
            )

        self.engine = LCFNEngine(self.config)
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()

        return self.eval_engine.best_valid_performance
Пример #14
0
 def train(self):
     self.load_dataset()
     self.engine = Triple2vecEngine(self.config)
     self.engine.data = self.data
     self.train_data = self.data.sample_triple()
     train_loader = DataLoader(
         torch.LongTensor(self.train_data.to_numpy()).to(self.engine.device),
         batch_size=self.config["model"]["batch_size"],
         shuffle=True,
         drop_last=True,
     )
     self.monitor = Monitor(
         log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id
     )
     self.model_save_dir = os.path.join(
         self.config["system"]["model_save_dir"], self.config["model"]["save_name"]
     )
     self._train(self.engine, train_loader, self.model_save_dir)
     self.config["run_time"] = self.monitor.stop()
     return self.eval_engine.best_valid_performance
Пример #15
0
    def train(self):
        """Default train implementation

        """
        assert hasattr(self,
                       "engine"), "Please specify the exact model engine !"
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.engine.data = self.dataset
        print("Start training... ")
        epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout)
        for epoch in epoch_bar:
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=os.path.join(
                    self.config["model_save_dir"], "model.cpk"))

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break
            self.engine.train_an_epoch(self.train_data, epoch_id=epoch)
            self.eval_engine.train_eval(self.dataset.valid[0],
                                        self.dataset.test[0],
                                        self.engine.model, epoch)
            # anneal alpha
            self.engine.model.alpha = min(
                self.config["alpha"] +
                math.exp(epoch - self.config["max_epoch"] + 20),
                1,
            )
            """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
            lr = self.config["lr"] * (0.5**(epoch // 10))
            for param_group in self.engine.optimizer.param_groups:
                param_group["lr"] = lr
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #16
0
    def train_gmf(self):
        """Train GMF."""
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_dir = os.path.join(self.config["model_save_dir"],
                                      self.config["save_name"])
        for epoch in range(config["max_epoch"]):
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.gmfengine.save_checkpoint(model_dir=self.model_dir)

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break

            train_loader = self.data
            self.gmfengine.train_an_epoch(epoch_id=epoch,
                                          train_loader=train_loader)

        print("Saving embeddings to: %s" % self.config["model_save_dir"])
        user_embed, item_embed, v = (
            self.gmfengine.model.user_memory.weight.detach().cpu(),
            self.gmfengine.model.item_memory.weight.detach().cpu(),
            self.gmfengine.model.v.weight.detach().cpu(),
        )
        embed_dir = os.path.join(self.config["model_save_dir"],
                                 "pretain/embeddings")
        ensureDir(embed_dir)
        np.savez(embed_dir, user=user_embed, item=item_embed, v=v)
        self.config["run_time"] = self.monitor.stop()

        return np.array(user_embed), np.array(item_embed)
Пример #17
0
class Triple2vec_train(TrainEngine):
    """ An instance class from the TrainEngine base class

    """

    def __init__(self, config):
        """Constructor

        Args:
            config (dict): All the parameters for the model
        """

        self.config = config
        super(Triple2vec_train, self).__init__(self.config)
        self.gpu_id, self.config["device_str"] = self.get_device()

    def load_dataset(self):
        """Load dataset."""
        split_data = load_split_dataset(self.config)
        self.data = GroceryData(split_dataset=split_data, config=self.config)
        self.config["model"]["n_users"] = self.data.n_users
        self.config["model"]["n_items"] = self.data.n_items

    def train(self):
        self.load_dataset()
        self.engine = Triple2vecEngine(self.config)
        self.engine.data = self.data
        self.train_data = self.data.sample_triple()
        train_loader = DataLoader(
            torch.LongTensor(self.train_data.to_numpy()).to(self.engine.device),
            batch_size=self.config["model"]["batch_size"],
            shuffle=True,
            drop_last=True,
        )
        self.monitor = Monitor(
            log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id
        )
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"], self.config["model"]["save_name"]
        )
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #18
0
class MF_train(TrainEngine):
    """MF_train Class."""
    def __init__(self, args):
        """Initialize MF_train Class."""
        print(args)
        super(MF_train, self).__init__(args)

    def train(self):
        """Train the model."""
        self.load_dataset()
        self.gpu_id, self.config["device_str"] = self.get_device()
        """ Main training navigator

        Returns:

        """
        # Train NeuMF without pre-train
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        if self.config["model"]["loss"] == "bpr":
            train_loader = self.data.instance_bpr_loader(
                batch_size=self.config["model"]["batch_size"],
                device=self.config["model"]["device_str"],
            )
        elif self.config["model"]["loss"] == "bce":
            train_loader = self.data.instance_bce_loader(
                num_negative=self.config["model"]["num_negative"],
                batch_size=self.config["model"]["batch_size"],
                device=self.config["model"]["device_str"],
            )
        else:
            raise ValueError(
                f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'"
            )

        self.engine = MFEngine(self.config)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #19
0
class LightGCN_train(TrainEngine):
    """An instance class from the TrainEngine base class."""
    def __init__(self, config):
        """Initialize LightGCN_train Class.

        Args:
            config (dict): All the parameters for the model.
        """
        self.config = config
        super(LightGCN_train, self).__init__(config)
        self.load_dataset()
        self.build_data_loader()
        self.engine = LightGCNEngine(self.config)

    def build_data_loader(self):
        """Missing Doc."""
        adj_mat, norm_adj_mat, mean_adj_mat = self.data.get_adj_mat(
            self.config)
        norm_adj = sparse_mx_to_torch_sparse_tensor(norm_adj_mat)
        self.config["model"]["norm_adj"] = norm_adj
        self.config["model"]["n_users"] = self.data.n_users
        self.config["model"]["n_items"] = self.data.n_items

    def train(self):
        """Train the model."""
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        self.engine = LightGCNEngine(self.config)
        train_loader = self.data.instance_bpr_loader(
            batch_size=self.config["model"]["batch_size"],
            device=self.config["model"]["device_str"],
        )
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #20
0
class UltraGCN_train(TrainEngine):
    """An instance class from the TrainEngine base class."""
    def __init__(self, config):
        """Initialize UltraGCN_train Class.

        Args:
            config (dict): All the parameters for the model.
        """
        self.config = config
        super(UltraGCN_train, self).__init__(config)
        self.load_dataset()
        self.build_data_loader()
        self.engine = UltraGCNEngine(self.config)

    def build_data_loader(self):
        """Load all matrix."""
        train_mat, constraint_mat = self.data.get_constraint_mat(self.config)
        # norm_adj = sparse_mx_to_torch_sparse_tensor(norm_adj_mat)
        self.config["model"]["train_mat"] = train_mat
        self.config["model"]["constraint_mat"] = constraint_mat
        self.config["model"]["n_users"] = self.data.n_users
        self.config["model"]["n_items"] = self.data.n_items

    def train(self):
        """Train the model."""
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        train_loader = self.data.instance_mul_neg_loader(
            batch_size=self.config["model"]["batch_size"],
            device=self.config["model"]["device_str"],
            num_negative=self.config["model"]["negative_num"],
        )
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #21
0
class SGL_train(TrainEngine):
    """An instance class from the TrainEngine base class."""
    def __init__(self, config):
        """Initialize SGL_train Class.

        Args:
            config (dict): All the parameters for the model.
        """
        self.config = config
        super(SGL_train, self).__init__(config)
        self.load_dataset()
        self.build_data_loader()
        self.engine = SGLEngine(self.config)

    def build_data_loader(self):
        self.config["model"]["n_users"] = self.data.n_users
        self.config["model"]["n_items"] = self.data.n_items
        norm_adj = self.data.create_sgl_mat(self.config)
        self.config["model"]["norm_adj"] = norm_adj

    def train(self):
        """Train the model."""
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["save_name"])
        self.engine = SGLEngine(self.config)
        train_loader = self.data.instance_bpr_loader(
            batch_size=self.config["model"]["batch_size"],
            device=self.config["model"]["device_str"],
        )
        self._train(self.engine, train_loader, self.model_save_dir)
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #22
0
class cmn_train(TrainEngine):
    """ An instance class from the TrainEngine base class

    """
    def __init__(self, config):
        """Constructor

        Args:
            config (dict): All the parameters for the model
        """

        self.config = config
        super(cmn_train, self).__init__(self.config)
        self.load_dataset()
        self.gmfengine = PairwiseGMFEngine(self.config)
        self.cmnengine = cmnEngine
        self.gpu_id, self.config["device_str"] = self.get_device()

    def train_gmf(self):
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        self.model_dir = os.path.join(self.config["model_save_dir"],
                                      self.config["save_name"])
        for epoch in range(config["max_epoch"]):
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.gmfengine.save_checkpoint(model_dir=self.model_dir)

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(MAX_N_UPDATE))
                break

            train_loader = self.data
            self.gmfengine.train_an_epoch(epoch_id=epoch,
                                          train_loader=train_loader)

        print("Saving embeddings to: %s" % self.config["model_save_dir"])
        user_embed, item_embed, v = (
            self.gmfengine.model.user_memory.weight.detach().cpu(),
            self.gmfengine.model.item_memory.weight.detach().cpu(),
            self.gmfengine.model.v.weight.detach().cpu(),
        )
        embed_dir = os.path.join(self.config["model_save_dir"],
                                 "pretain/embeddings")
        ensureDir(embed_dir)
        np.savez(embed_dir, user=user_embed, item=item_embed, v=v)
        self.config["run_time"] = self.monitor.stop()

        return np.array(user_embed), np.array(item_embed)

    def train(self):
        if self.config["pretrain"] == "gmf":
            user_embed, item_embed = self.train_gmf()
            model = self.cmnengine(self.config, user_embed, item_embed,
                                   self.data.item_users_list)
            self.monitor = Monitor(log_dir=self.config["run_dir"],
                                   delay=1,
                                   gpu_id=self.gpu_id)
            self.model_dir = os.path.join(self.config["model_save_dir"],
                                          self.config["save_name"])
            for epoch in range(config["max_epoch"]):
                print(f"Epoch {epoch} starts !")
                print("-" * 80)
                if epoch > 0 and self.eval_engine.n_no_update == 0:
                    # previous epoch have already obtained better result
                    model.save_checkpoint(model_dir=self.model_dir)

                if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                    print(
                        "Early stop criterion triggered, no performance update for {:} times"
                        .format(MAX_N_UPDATE))
                    break

                train_loader = self.data
                model.train_an_epoch(epoch_id=epoch, train_loader=train_loader)

                self.eval_engine.train_eval(self.data.valid[0],
                                            self.data.test[0], model.model,
                                            epoch)
            self.config["run_time"] = self.monitor.stop()
            self.eval_engine.test_eval(self.data.test, model.model)
Пример #23
0
class NARM_train(TrainEngine):
    """ An instance class from the TrainEngine base class

    """
    def __init__(self, config):
        """Constructor

        Args:
            config (dict): All the parameters for the model
        """
        self.config = config
        super(NARM_train, self).__init__(self.config)
        self.load_dataset_seq()
        self.build_data_loader()
        self.engine = NARMEngine(self.config)
        self.seq_eval_engine = SeqEvalEngine(self.config)

    def load_dataset_seq(self):
        """ Default implementation of building dataset

        Returns:
            None
        """
        # ml = Movielens_100k()
        # ml.download()
        # ml.load_interaction()
        # self.dataset = ml.make_temporal_split(n_negative=0, n_test=0)

        ld_dataset = load_dataset(self.config)
        ld_dataset.download()
        ld_dataset.load_interaction()
        self.dataset = ld_dataset.make_temporal_split(n_negative=0, n_test=0)

        self.train_data = self.dataset[self.dataset.col_flag == "train"]
        self.valid_data = self.dataset[self.dataset.col_flag == "validate"]
        self.test_data = self.dataset[self.dataset.col_flag == "test"]

        # self.dataset = Dataset(self.config)
        self.config["n_users"] = self.train_data.col_user.nunique()
        self.config["n_items"] = self.train_data.col_item.nunique() + 1

    def build_data_loader(self):
        """ Convert users' interactions to sequences

        Returns:
            load_train_data (DataLoader): training set.

        """

        # reindex items from 1
        self.train_data, self.valid_data, self.test_data = reindex_items(
            self.train_data, self.valid_data, self.test_data)

        # data to sequences
        self.valid_data = create_seq_db(self.valid_data)
        self.test_data = create_seq_db(self.test_data)

        # convert interactions to sequences
        seq_train_data = create_seq_db(self.train_data)

        # convert sequences to (seq, target) format
        load_train_data = dataset_to_seq_target_format(seq_train_data)

        # define pytorch Dataset class for sequential datasets
        load_train_data = SeqDataset(load_train_data)

        # pad the sequences with 0
        self.load_train_data = DataLoader(
            load_train_data,
            batch_size=self.config["batch_size"],
            shuffle=False,
            collate_fn=collate_fn,
        )
        return self.load_train_data

    def _train(self, engine, train_loader, save_dir):
        """Train the model with epochs

        Retruns:
            None
        """
        epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout)
        for epoch in epoch_bar:
            print("Epoch {} starts !".format(epoch))
            print("-" * 80)
            if self.check_early_stop(engine, save_dir, epoch):
                break
            engine.train_an_epoch(train_loader, epoch=epoch)
            """evaluate model on validation and test sets"""

            # evaluation
            self.seq_eval_engine.train_eval_seq(self.valid_data,
                                                self.test_data, engine, epoch)

    def train(self):
        """ Train and test NARM

        Returns:
            None
        """
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.load_train_data
        self.engine = NARMEngine(self.config)
        self.narm_save_dir = os.path.join(self.config["model_save_dir"],
                                          self.config["save_name"])
        self._train(self.engine, train_loader, self.narm_save_dir)
        self.config["run_time"] = self.monitor.stop()
        self.seq_eval_engine.test_eval_seq(self.test_data, self.engine)
Пример #24
0
class VBCAR_train(TrainEngine):
    """An instance class from the TrainEngine base class."""
    def __init__(self, config):
        """Initialize VBCAR_train Class.

        Args:
            config (dict): All the parameters for the model.
        """
        self.config = config
        super(VBCAR_train, self).__init__(self.config)

    def load_dataset(self):
        """Load dataset."""
        split_data = load_split_dataset(self.config)
        self.data = GroceryData(split_dataset=split_data, config=self.config)
        self.config["model"]["n_users"] = self.data.n_users
        self.config["model"]["n_items"] = self.data.n_items

    def train(self):
        """Train the model."""
        self.load_dataset()
        self.train_data = self.data.sample_triple()
        self.config["model"]["alpha_step"] = (
            1 - self.config["model"]["alpha"]) / (
                self.config["model"]["max_epoch"])
        self.config["user_fea"] = self.data.user_feature
        self.config["item_fea"] = self.data.item_feature
        self.engine = VBCAREngine(self.config)
        self.engine.data = self.data
        assert hasattr(self,
                       "engine"), "Please specify the exact model engine !"
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        print("Start training... ")
        epoch_bar = tqdm(range(self.config["model"]["max_epoch"]),
                         file=sys.stdout)
        self.max_n_update = self.config["model"]["max_n_update"]
        for epoch in epoch_bar:
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=os.path.join(
                    self.config["system"]["model_save_dir"], "model.cpk"))

            if self.eval_engine.n_no_update >= self.max_n_update:
                print(
                    "Early stop criterion triggered, no performance update for {:} times"
                    .format(self.max_n_update))
                break
            data_loader = DataLoader(
                torch.LongTensor(self.train_data.to_numpy()).to(
                    self.engine.device),
                batch_size=self.config["model"]["batch_size"],
                shuffle=True,
                drop_last=True,
            )
            self.engine.train_an_epoch(data_loader, epoch_id=epoch)
            self.eval_engine.train_eval(self.data.valid[0], self.data.test[0],
                                        self.engine.model, epoch)
            # anneal alpha
            self.engine.model.alpha = min(
                self.config["model"]["alpha"] +
                math.exp(epoch - self.config["model"]["max_epoch"] + 20),
                1,
            )
            """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
            lr = self.config["model"]["lr"] * (0.5**(epoch // 10))
            for param_group in self.engine.optimizer.param_groups:
                param_group["lr"] = lr
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance
Пример #25
0
class NGCF_train(TrainEngine):
    """ An instance class from the TrainEngine base class

    """

    def __init__(self, config):
        """Constructor

        Args:
            config (dict): All the parameters for the model
        """

        self.config = config
        super(NGCF_train, self).__init__(self.config)
        self.load_dataset()
        self.build_data_loader()
        self.engine = NGCFEngine(self.config)

    def build_data_loader(self):
        # ToDo: Please define the directory to store the adjacent matrix
        plain_adj, norm_adj, mean_adj = self.dataset.get_adj_mat()
        norm_adj = sparse_mx_to_torch_sparse_tensor(norm_adj)
        self.config["norm_adj"] = norm_adj
        self.config["num_batch"] = self.dataset.n_train // config["batch_size"] + 1
        self.config["n_users"] = self.dataset.n_users
        self.config["n_items"] = self.dataset.n_items

    def train(self):
        self.monitor = Monitor(
            log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id
        )
        self.model_dir = os.path.join(
            self.config["model_save_dir"], self.config["save_name"]
        )
        for epoch in range(config["max_epoch"]):
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(model_dir=self.model_dir)

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times".format(
                        MAX_N_UPDATE
                    )
                )
                break

            train_loader = self.dataset
            self.engine.train_an_epoch(
                epoch_id=epoch, train_loader=train_loader
            )
            self.eval_engine.train_eval(
                self.dataset.valid[0], self.dataset.test[0], self.engine.model, epoch
            )
        self.config["run_time"] = self.monitor.stop()

    def test(self):
        self.engine.resume_checkpoint(model_dir=self.model_dir)
        super(NGCF_train, self).test()
Пример #26
0
class TrainEngine(object):
    """Training engine for all the models.

    """

    def __init__(self, config):
        """Initialing

        Args:
            config (dict): Config dict received from command line. Should have the config["config_file"].

        Attributes:
            dataset (Dataset): A dataset containing DataFrame of train, validation and test.
            train_data (DataLoader): Extracted training data or train DataLoader, need to be implement.
            monitor (Monitor): An monitor object that monitor the computational resources.
            engine (Model Engine)


        """
        self.dataset = None
        self.train_data = None
        self.monitor = None
        self.engine = None
        self.config = prepare_env(config)
        self.gpu_id, self.config["device_str"] = get_device() if self.config["device"] == "gpu" else (None, "cpu")
        self.eval_engine = EvalEngine(self.config)

    def load_dataset(self):
        """ Default implementation of building dataset

        Returns:
            None

        """
        self.dataset = data_util.Dataset(self.config)
        self.config["item_fea"] = self.dataset.item_feature
        self.config["user_fea"] = self.dataset.user_feature
        self.config["n_users"] = self.dataset.n_users
        self.config["n_items"] = self.dataset.n_items

    # noinspection PyTypeChecker
    def build_data_loader(self):
        """ Default data builder

        Returns:
            DataLoader

        """
        return DataLoader(
            torch.LongTensor(self.train_data.to_numpy()).to(self.engine.device),
            batch_size=self.config["batch_size"],
            shuffle=True,
            drop_last=True,
        )

    def train(self):
        """Default train implementation

        """
        assert hasattr(self, "engine"), "Please specify the exact model engine !"
        self.monitor = Monitor(
            log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id
        )
        self.engine.data = self.dataset
        print("Start training... ")
        epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout)
        for epoch in epoch_bar:
            print(f"Epoch {epoch} starts !")
            print("-" * 80)
            if epoch > 0 and self.eval_engine.n_no_update == 0:
                # previous epoch have already obtained better result
                self.engine.save_checkpoint(
                    model_dir=os.path.join(self.config["model_save_dir"], "model.cpk")
                )

            if self.eval_engine.n_no_update >= MAX_N_UPDATE:
                print(
                    "Early stop criterion triggered, no performance update for {:} times".format(
                        MAX_N_UPDATE
                    )
                )
                break
            data_loader = self.build_data_loader()
            self.engine.train_an_epoch(data_loader, epoch_id=epoch)
            self.eval_engine.train_eval(
                self.dataset.valid[0], self.dataset.test[0], self.engine.model, epoch
            )
            """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
            lr = self.config["lr"] * (0.5 ** (epoch // 10))
            for param_group in self.engine.optimizer.param_groups:
                param_group["lr"] = lr
        self.config["run_time"] = self.monitor.stop()
        return self.eval_engine.best_valid_performance

    def test(self):
        """Evaluate the performance for the testing sets based on the final model.

        """
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)
Пример #27
0
class NCF_train(TrainEngine):
    """ An instance class from the TrainEngine base class

        """
    def __init__(self, config):
        """Constructor

        Args:
            config (dict): All the parameters for the model
        """
        self.config = config
        super(NCF_train, self).__init__(self.config)
        self.load_dataset()
        self.build_data_loader()
        self.gpu_id, self.config["model"]["device_str"] = self.get_device()

    def build_data_loader(self):
        # ToDo: Please define the directory to store the adjacent matrix
        self.sample_generator = DataLoaderBase(ratings=self.data.train)
        self.config["model"]["num_batch"] = (
            self.data.n_train // self.config["model"]["batch_size"] + 1)
        self.config["model"]["n_users"] = self.data.n_users
        self.config["model"]["n_items"] = self.data.n_items

    def train(self):
        """ Main training navigator

        Returns:

        """

        # Options are: 'mlp', 'gmf', 'ncf_end', and 'ncf_pre';
        # Train NeuMF without pre-train
        if self.config["model"]["model"] == "ncf_end":
            self.train_ncf()
        elif self.config["model"]["model"] == "gmf":
            self.train_gmf()
        elif self.config["model"]["model"] == "mlp":
            self.train_mlp()
        elif self.config["model"]["model"] == "ncf_pre":
            self.train_gmf()
            self.train_mlp()
            self.train_ncf()
        else:
            raise ValueError(
                "Model type error: Options are: 'mlp', 'gmf', 'ncf_end', and 'ncf_pre'."
            )

    def train_ncf(self):
        """ Train NeuMF

        Returns:
            None
        """
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["model"]["num_negative"],
            self.config["model"]["batch_size"])
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["neumf_config"]["save_name"],
        )
        self._train(self.engine, train_loader, self.neumf_save_dir)
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.data.test, self.engine.model)

    def train_gmf(self):
        """ Train GMF

        Returns:
            None
        """
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["model"]["num_negative"],
            self.config["model"]["batch_size"])
        # Train GMF
        self.engine = GMFEngine(self.config)
        self.gmf_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["gmf_config"]["save_name"],
        )
        self._train(self.engine, train_loader, self.gmf_save_dir)
        while self.eval_engine.n_worker:
            print("Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.data.test, self.engine.model)

    def train_mlp(self):
        """ Train MLP

        Returns:
            None
        """
        # Train MLP
        self.monitor = Monitor(log_dir=self.config["system"]["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["model"]["num_negative"],
            self.config["model"]["batch_size"])
        self.engine = MLPEngine(self.config)
        self.mlp_save_dir = os.path.join(
            self.config["system"]["model_save_dir"],
            self.config["model"]["mlp_config"]["save_name"],
        )
        self._train(self.engine, train_loader, self.mlp_save_dir)

        while self.eval_engine.n_worker:
            print("Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.data.test, self.engine.model)
Пример #28
0
    def train(self):
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["num_negative"], self.config["batch_size"])

        # Train ncf without pretrain
        self.config["pretrain"] = None
        self.config["model"] = "NCF_wo_pre"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train GCN
        self.config["pretrain"] = None
        self.config["model"] = "GCN"
        self.engine = GCN_SEngine(self.config)
        self.gcn_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["gcn_config"]["save_name"])
        self._train(engine=self.engine,
                    train_loader=self.dataset,
                    save_dir=self.gcn_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the

        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train GMF
        self.config["pretrain"] = None
        self.config["model"] = "GMF"
        self.engine = GMFEngine(self.config)
        self.gmf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["gmf_config"]["save_name"])
        self._train(self.engine, train_loader, self.gmf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train MLP
        self.config["pretrain"] = None
        self.config["model"] = "mlp"
        self.engine = MLPEngine(self.config)
        self.mlp_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["mlp_config"]["save_name"])
        self._train(self.engine, train_loader, self.mlp_save_dir)

        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train ncf_gmf
        self.config["pretrain"] = "gmf"
        self.config["model"] = "ncf_gmf"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train ncf_gcn
        self.config["pretrain"] = "gcn"
        self.config["model"] = "ncf_gcn"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)
Пример #29
0
class NCF_train(TrainEngine):
    """ An instance class from the TrainEngine base class

        """
    def __init__(self, config):
        """Constructor

        Args:
            config (dict): All the parameters for the model
        """

        self.config = config
        super(NCF_train, self).__init__(self.config)
        self.load_dataset()
        self.build_data_loader()
        self.sample_generator = SampleGenerator(ratings=self.dataset.train)
        # update model config

    def build_data_loader(self):
        # ToDo: Please define the directory to store the adjacent matrix
        user_fea_norm_adj, item_fea_norm_adj = self.dataset.make_fea_sim_mat()
        self.sample_generator = SampleGenerator(ratings=self.dataset.train)
        self.config["user_fea_norm_adj"] = sparse_mx_to_torch_sparse_tensor(
            user_fea_norm_adj)
        self.config["item_fea_norm_adj"] = sparse_mx_to_torch_sparse_tensor(
            item_fea_norm_adj)
        self.config["num_batch"] = self.dataset.n_train // self.config[
            "batch_size"] + 1
        self.config["n_users"] = self.dataset.n_users
        self.config["n_items"] = self.dataset.n_items

    def check_early_stop(self, engine, model_dir, epoch):
        """ Check if early stop criterion is triggered
        Save model if previous epoch have already obtained better result

        Args:
            epoch (int): epoch num

        Returns:
            True: if early stop criterion is triggered
            False: else

        """
        if epoch > 0 and self.eval_engine.n_no_update == 0:
            # save model if previous epoch have already obtained better result
            engine.save_checkpoint(model_dir=model_dir)

        if self.eval_engine.n_no_update >= MAX_N_UPDATE:
            # stop training if early stop criterion is triggered
            print(
                "Early stop criterion triggered, no performance update for {:} times"
                .format(MAX_N_UPDATE))
            return True
        return False

    def _train(self, engine, train_loader, save_dir):
        self.eval_engine.flush()
        epoch_bar = tqdm(range(self.config["max_epoch"]), file=sys.stdout)
        for epoch in epoch_bar:
            print("Epoch {} starts !".format(epoch))
            print("-" * 80)
            if self.check_early_stop(engine, save_dir, epoch):
                break
            engine.train_an_epoch(train_loader, epoch_id=epoch)
            """evaluate model on validation and test sets"""
            self.eval_engine.train_eval(self.dataset.valid[0],
                                        self.dataset.test[0], engine.model,
                                        epoch)

    def train(self):
        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        train_loader = self.sample_generator.instance_a_train_loader(
            self.config["num_negative"], self.config["batch_size"])

        # Train ncf without pretrain
        self.config["pretrain"] = None
        self.config["model"] = "NCF_wo_pre"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train GCN
        self.config["pretrain"] = None
        self.config["model"] = "GCN"
        self.engine = GCN_SEngine(self.config)
        self.gcn_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["gcn_config"]["save_name"])
        self._train(engine=self.engine,
                    train_loader=self.dataset,
                    save_dir=self.gcn_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the

        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train GMF
        self.config["pretrain"] = None
        self.config["model"] = "GMF"
        self.engine = GMFEngine(self.config)
        self.gmf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["gmf_config"]["save_name"])
        self._train(self.engine, train_loader, self.gmf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train MLP
        self.config["pretrain"] = None
        self.config["model"] = "mlp"
        self.engine = MLPEngine(self.config)
        self.mlp_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["mlp_config"]["save_name"])
        self._train(self.engine, train_loader, self.mlp_save_dir)

        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train ncf_gmf
        self.config["pretrain"] = "gmf"
        self.config["model"] = "ncf_gmf"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)

        self.monitor = Monitor(log_dir=self.config["run_dir"],
                               delay=1,
                               gpu_id=self.gpu_id)
        # Train ncf_gcn
        self.config["pretrain"] = "gcn"
        self.config["model"] = "ncf_gcn"
        self.engine = NeuMFEngine(self.config)
        self.neumf_save_dir = os.path.join(
            self.config["model_save_dir"],
            self.config["neumf_config"]["save_name"])
        self._train(self.engine, train_loader, self.neumf_save_dir)
        while self.eval_engine.n_worker:
            print(f"Wait 15s for the complete of eval_engine.n_worker")
            time.sleep(15)  # wait the
        self.config["run_time"] = self.monitor.stop()
        self.eval_engine.test_eval(self.dataset.test, self.engine.model)