def save_checkpoint(self, epoch_score, model): if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]: logger.info( "[Epoch callback] Validation {} improved ({} --> {}). Saving model!" .format(self.metric_name, self.val_score, epoch_score)) model.save(self.model_path) self.val_score = epoch_score
def save_args(args): param_path = os.path.join(args.model_dir, "params.json") logger.info(" [*] MODEL dir: %s" % args.model_dir) logger.info(" [*] PARAM path: %s" % param_path) with open(param_path, 'w') as fp: json.dump(args.__dict__, fp, indent=4, sort_keys=True)
def create_dir(dir_path, mode=None): """ Create a directory :param dir_path: the directory to create :param mode: the permissions to set dir_path to (ie: 0o700) """ if not os.path.exists(dir_path): logger.info("[*] Make directories : {}".format(dir_path)) os.makedirs(dir_path) if mode is not None: os.chmod(dir_path, mode)
def load(self, model_path, device="cuda", strict=False): "恢复模型基本状态" self.device = device if next(self.parameters()).device != self.device: self.to(self.device) model_dict = torch.load(model_path, map_location=torch.device(device)) self.load_state_dict(model_dict["state_dict"], strict=False) self.optimizer.load_state_dict(model_dict["optimizer"]) self.scheduler.load_state_dict(model_dict["scheduler"]) self.epoch = model_dict["epoch"] self.fp16 = model_dict["fp16"] self.metrics = model_dict["metrics"] self.best_scores = model_dict["best_scores"] self.the_one = model_dict["the_one"] logger.info(f" [Restor model from] {model_path}...")
def log_frozen_and_tunable_parameter_names(model: torch.nn.Module) -> None: frozen_parameter_names, tunable_parameter_names = get_frozen_and_tunable_parameter_names( model) logger.info("The following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("The following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name)
def process_imdb(path='aclImdb/', csv_save_path='aclImdb/'): # Path to dataset location path = path # Create a dictionary of paths and lists that store lines (key: value = path: list) sets_dict = { 'train/pos/': [], 'train/neg/': [], 'test/pos/': [], 'test/neg/': [] } # Load the data for dataset in tqdm(sets_dict): sub_dir = os.path.join(path, dataset) sets_dict[dataset] = [text for text in load_data(sub_dir)] # Concatenate training and testing examples into one dataset dataset = pd.concat([ pd.DataFrame({ 'review': sets_dict['train/pos/'], 'sentiment': 1 }), pd.DataFrame({ 'review': sets_dict['test/pos/'], 'sentiment': 1 }), pd.DataFrame({ 'review': sets_dict['train/neg/'], 'sentiment': 0 }), pd.DataFrame({ 'review': sets_dict['test/neg/'], 'sentiment': 0 }) ], axis=0, ignore_index=True) dataset.to_csv(csv_save_path, header=False, encoding='utf-8', index=False) logger.info(f"[Saved csv file] {csv_save_path}")
def save(self, model_path): "保存模型、优化器、lr调度器、训练基本状态" model_state_dict = self.state_dict() if self.optimizer is not None: opt_state_dict = self.optimizer.state_dict() else: opt_state_dict = None if self.scheduler is not None: sch_state_dict = self.scheduler.state_dict() else: sch_state_dict = None model_dict = {} model_dict["state_dict"] = model_state_dict model_dict["optimizer"] = opt_state_dict model_dict["scheduler"] = sch_state_dict model_dict["epoch"] = self.epoch model_dict["fp16"] = self.fp16 model_dict["metrics"] = self.metrics model_dict["best_scores"] = self.best_scores model_dict["the_one"] = self.the_one torch.save(model_dict, model_path) logger.info(f" [Save model at] {model_path}...")
def on_epoch_end(self, model): "通过 CallbackRunner 对象,回调该函数" epoch_score = model.metrics[self.model_state][self.metric_name] if self.mode == "min": score = -1.0 * epoch_score else: score = np.copy(epoch_score) if self.best_score is None: self.best_score = score self.save_checkpoint(epoch_score, model) elif score < self.best_score + self.delta: self.counter += 1 logger.info( " [Epoch callback] EarlyStopping counter: {} out of {}".format( self.counter, self.patience)) # 设置 主训练循环 的早停哨兵值 if self.counter >= self.patience: model.model_state = ModelState.END else: self.best_score = score self.save_checkpoint(epoch_score, model) self.counter = 0
def train_w2v_model(model_save_dir, sentences: MySentences): logger.info('Start...') model = Word2Vec( sentences, sg=0, hs=1, vector_size=128, window=12, min_count=1, workers=2, epochs=5, ) logger.info(model.max_final_vocab) model.wv.save_word2vec_format(model_save_dir, binary=False) logger.info("Finished.")
def backup_file(path): root, ext = os.path.splitext(path) new_path = "{}.backup_{}{}".format(root, get_time(), ext) os.rename(path, new_path) logger.info("[*] {} has backup: {}".format(path, new_path))
def remove_file(path): if os.path.exists(path): logger.info(" [*] Removed: {}".format(path)) os.remove(path)
def main(): # prepare ################################################ logger.info(get_machine_info()) filter_warnings() prepare_seed(randseed) # data ################################################### raw_data = pd.read_csv("/tmp/aclImdb_v1.tar/aclImdb/imbd.csv", names=['review', 'sentiment']).fillna(" ") print(raw_data.head(5)) df_train, df_valid = model_selection.train_test_split( raw_data, test_size=0.1, random_state=randseed, stratify=raw_data.sentiment.values) # df_train = df_train.reset_index(drop=True) # df_valid = df_valid.reset_index(drop=True) # For test df_train = df_train.reset_index(drop=True)[:2000] df_valid = df_valid.reset_index(drop=True)[:1000] train_dataset = BERTBinaryClsDa(text=df_train.review.values, target=df_train.sentiment.values) valid_dataset = BERTBinaryClsDa(text=df_valid.review.values, target=df_valid.sentiment.values) # Model ################################################### n_train_steps = int(len(df_train) / batch_size * epochs) model = BERTBaseUncased(num_train_steps=n_train_steps, num_warmup_steps=warmup_steps) tf_callback = raych.callbacks.TensorBoardLogger(log_dir=".logs/") earlystop = raych.callbacks.EarlyStopping( monitor="valid_loss", model_path="./weights/model_early_stop.bin") # Train ################################################### # 选择使用 学习率查找 # model.find_lr( # train_dataset, # show_plot=True, # fp16=True, # train_bs=batch_size, # valid_bs=batch_size, # method='linear', # init_value=1e-7, # final_value=10, # ) # 训练 model.fit(train_dataset, valid_dataset=valid_dataset, train_bs=batch_size, valid_bs=batch_size, device=device, epochs=epochs, callbacks=[tf_callback, earlystop], fp16=True, best_metric='accuracy', enable_fgm=False, learn_rate=3e-5) # 选择使用 FGM # model.fit( # train_dataset, # valid_dataset=valid_dataset, # train_bs=batch_size, # device=device, # epochs=epochs, # callbacks=[tf_callback, earlystop], # learn_rate=3e-5, # fp16=True, # best_metric='accuracy', # enable_fgm=True, # reload_model="./weights/best_1_model_epoch3.bin" # ) model.save("./weights/model_last_epoch.bin")
def model_state(self, value): # 可以在设置model_state时,执行一些动作 self._model_state = value logger.info(f" [In State] ### {value} EPOCH {self.epoch} ### ...")
def custom_scheduler(self, *args, **kwargs): "定义学习率调节方法" logger.info("[Config] custom scheduler is not used") return None
{ "params": [ p for name, p in param_optimizer if any(nd in name for nd in no_decay) ], "weight_decay": 0.0, }, ] opt = torch.optim.Adam(optimizer_parameters, lr=self.lr) # lr在fit时传入 return opt if __name__ == "__main__": # prepare ################################################ logger.info(get_machine_info()) filter_warnings() prepare_seed(23) batch_size = 64 max_vocab_size = 25000 max_seq_len = 128 embed_dim = 100 hid_dim = 256 out_dim = 1 n_layers = 2 bidirectional = True dropout = 0.5 device = "cuda" epochs = 10