def __init__(self, config): """Initialize NGCF_train Class. Args: config (dict): All the parameters for the model. """ self.config = config super(NGCF_train, self).__init__(self.config) self.load_dataset() self.build_data_loader() self.engine = NGCFEngine(self.config["model"])
def __init__(self, config): """Constructor Args: config (dict): All the parameters for the model """ self.config = config super(NGCF_train, self).__init__(self.config) self.load_dataset() self.build_data_loader() self.engine = NGCFEngine(self.config)
def train(self): """Train the model.""" self.monitor = Monitor(log_dir=self.config["system"]["run_dir"], delay=1, gpu_id=self.gpu_id) self.model_save_dir = os.path.join( self.config["system"]["model_save_dir"], self.config["model"]["save_name"]) if self.config["model"]["loss"] == "bpr": train_loader = self.data.instance_bpr_loader( batch_size=self.config["model"]["batch_size"], device=self.config["model"]["device_str"], ) elif self.config["model"]["loss"] == "bce": train_loader = self.data.instance_bce_loader( num_negative=self.config["model"]["num_negative"], batch_size=self.config["model"]["batch_size"], device=self.config["model"]["device_str"], ) else: raise ValueError( f"Unsupported loss type {self.config['loss']}, try other options: 'bpr' or 'bce'" ) self.engine = NGCFEngine(self.config) self._train(self.engine, train_loader, self.model_save_dir) self.config["run_time"] = self.monitor.stop() return self.eval_engine.best_valid_performance
class NGCF_train(TrainEngine): """ An instance class from the TrainEngine base class """ def __init__(self, config): """Constructor Args: config (dict): All the parameters for the model """ self.config = config super(NGCF_train, self).__init__(self.config) self.load_dataset() self.build_data_loader() self.engine = NGCFEngine(self.config) def build_data_loader(self): # ToDo: Please define the directory to store the adjacent matrix plain_adj, norm_adj, mean_adj = self.dataset.get_adj_mat() norm_adj = sparse_mx_to_torch_sparse_tensor(norm_adj) self.config["norm_adj"] = norm_adj self.config["num_batch"] = self.dataset.n_train // config["batch_size"] + 1 self.config["n_users"] = self.dataset.n_users self.config["n_items"] = self.dataset.n_items def train(self): self.monitor = Monitor( log_dir=self.config["run_dir"], delay=1, gpu_id=self.gpu_id ) self.model_dir = os.path.join( self.config["model_save_dir"], self.config["save_name"] ) for epoch in range(config["max_epoch"]): print(f"Epoch {epoch} starts !") print("-" * 80) if epoch > 0 and self.eval_engine.n_no_update == 0: # previous epoch have already obtained better result self.engine.save_checkpoint(model_dir=self.model_dir) if self.eval_engine.n_no_update >= MAX_N_UPDATE: print( "Early stop criterion triggered, no performance update for {:} times".format( MAX_N_UPDATE ) ) break train_loader = self.dataset self.engine.train_an_epoch( epoch_id=epoch, train_loader=train_loader ) self.eval_engine.train_eval( self.dataset.valid[0], self.dataset.test[0], self.engine.model, epoch ) self.config["run_time"] = self.monitor.stop() def test(self): self.engine.resume_checkpoint(model_dir=self.model_dir) super(NGCF_train, self).test()