def set_train_mode(self): """ Training Mode - Pipeline 1. read raw_data (DataReader) 2. build vocabs (DataReader, Token) 3. indexing tokens (DataReader, Token) 4. convert to DataSet (DataReader) 5. create DataLoader (DataLoader) 6. define model and optimizer 7. run! """ logger.info("Config. \n" + pretty_json_dumps(self.config_dict) + "\n") data_reader, token_makers = self._create_data_and_token_makers() datas, helpers = data_reader.read() # Token & Vocab text_handler = TextHandler(token_makers, lazy_indexing=True) texts = data_reader.filter_texts(datas) token_counters = text_handler.make_token_counters(texts, config=self.config) text_handler.build_vocabs(token_counters) text_handler.index(datas, data_reader.text_columns) # iterator datasets = data_reader.convert_to_dataset(datas, helpers=helpers) # with name self.config.iterator.cuda_devices = self.config.cuda_devices train_loader, valid_loader, test_loader = self._create_by_factory( DataLoaderFactory, self.config.iterator, param={"datasets": datasets} ) checkpoint_dir = Path(self.config.trainer.log_dir) / "checkpoint" checkpoints = None if checkpoint_dir.exists(): checkpoints = self._load_exist_checkpoints(checkpoint_dir) # contain model and optimizer if checkpoints is None: model = self._create_model(token_makers, helpers=helpers) op_dict = self._create_by_factory( OptimizerFactory, self.config.optimizer, param={"model": model} ) else: model = self._create_model(token_makers, checkpoint=checkpoints) op_dict = self._create_by_factory( OptimizerFactory, self.config.optimizer, param={"model": model} ) utils.load_optimizer_checkpoint(op_dict["optimizer"], checkpoints) self.set_trainer(model, op_dict=op_dict) return train_loader, valid_loader, op_dict["optimizer"]
def _set_saved_config(self): saved_config_dict = self.model_checkpoint["config"] self.config_dict = saved_config_dict logger.info("Load saved_config ...") logger.info(pretty_json_dumps(saved_config_dict)) saved_config = NestedNamespace() saved_config.load_from_json(saved_config_dict) is_use_gpu = self.config.use_gpu self.config = saved_config self.config.use_gpu = is_use_gpu
def _set_saved_config(self, cuda_devices): saved_config_dict = self.model_checkpoint["config"] saved_config_dict["iterator"]["batch_size"] = saved_config_dict[ "iterator"]["batch_size"] // len(cuda_devices) self.config_dict = saved_config_dict logger.info("Load saved_config ...") logger.info(pretty_json_dumps(saved_config_dict)) saved_config = NestedNamespace() saved_config.load_from_json(saved_config_dict) is_use_gpu = self.config.use_gpu self.config = saved_config self.config.use_gpu = is_use_gpu self.config.cuda_devices = cuda_devices