def default_params(): # create an xgboost model and fit it xgb = XGBoost(n_estimators=100, random_state=123) xgb.fit(x_train, y_train, eval_set=(x_eval, y_eval)) # predict and calculate acc ypred_train = xgb.predict(x_train) ypred_eval = xgb.predict(x_eval) ypred_test = xgb.predict(x_test) print("train acc = {0}".format(acc(y_train, ypred_train))) print("eval acc = {0}".format(acc(y_eval, ypred_eval))) print("test acc = {0}".format(acc(y_test, ypred_test))) # plot learning curve to tune parameter xgb.plot_learning_curve()
def test(model, iterator, criterion): total_loss = 0 iter_num = 0 te_acc = 0 model.eval() with torch.no_grad(): for batch in iterator: enc_input, dec_input, enc_label = batch.text, batch.target_text, batch.SA dec_output = dec_input[:, 1:] dec_outputs = torch.zeros(dec_output.size(0), args.max_len).type_as(dec_input.data) # emotion 과 체를 반영 enc_input, dec_input, dec_outputs = \ styling(enc_input, dec_input, dec_output, dec_outputs, enc_label, args, TEXT, LABEL) y_pred = model(enc_input, dec_input) y_pred = y_pred.reshape(-1, y_pred.size(-1)) dec_output = dec_outputs.view(-1).long() real_value_index = [dec_output != 1] # <pad> == 1 loss = criterion(y_pred[real_value_index], dec_output[real_value_index]) with torch.no_grad(): test_acc = acc(y_pred, dec_output) total_loss += loss iter_num += 1 te_acc += test_acc return total_loss.data.cpu().numpy() / iter_num, te_acc.data.cpu().numpy( ) / iter_num
def tuned_params(): # create an xgboost model and fit it xgb = XGBoost(n_estimators=100, max_depth=6, learning_rate=0.1, objective='binary:logistic', gamma=0, reg_lambda=3, subsample=1, colsample=1, random_state=123) xgb.fit(x_train, y_train, eval_set=(x_eval, y_eval), early_stopping_rounds=20) print('best iter: {}'.format(xgb.best_iter)) # predict and calculate acc ypred_train = xgb.predict(x_train) ypred_eval = xgb.predict(x_eval) ypred_test = xgb.predict(x_test) print("train acc = {0}".format(acc(y_train, ypred_train))) print("eval acc = {0}".format(acc(y_eval, ypred_eval))) print("test acc = {0}".format(acc(y_test, ypred_test)))
def train(model, iterator, optimizer, criterion): total_loss = 0 iter_num = 0 tr_acc = 0 model.train() for step, batch in enumerate(iterator): optimizer.zero_grad() enc_input, dec_input, enc_label = batch.text, batch.target_text, batch.SA dec_output = dec_input[:, 1:] dec_outputs = torch.zeros(dec_output.size(0), args.max_len).type_as(dec_input.data) # emotion 과 체를 반영 enc_input, dec_input, dec_outputs = \ styling(enc_input, dec_input, dec_output, dec_outputs, enc_label, args, TEXT, LABEL) y_pred = model(enc_input, dec_input) y_pred = y_pred.reshape(-1, y_pred.size(-1)) dec_output = dec_outputs.view(-1).long() # padding 제외한 value index 추출 real_value_index = [dec_output != 1] # <pad> == 1 # padding 은 loss 계산시 제외 loss = criterion(y_pred[real_value_index], dec_output[real_value_index]) loss.backward() optimizer.step() with torch.no_grad(): train_acc = acc(y_pred, dec_output) total_loss += loss iter_num += 1 tr_acc += train_acc train_test(step, y_pred, dec_output, real_value_index, enc_input, args, TEXT, LABEL) return total_loss.data.cpu().numpy() / iter_num, tr_acc.data.cpu().numpy( ) / iter_num
def main(parser): # Config args = parser.parse_args() data_dir = Path(args.data_dir) model_dir = Path(args.model_dir) data_config = Config(json_path=data_dir / 'config.json') model_config = Config(json_path=model_dir / 'config.json') # Vocab & Tokenizer with open(data_config.token2idx_vocab, mode='rb') as io: token2idx_vocab = json.load(io) print("token2idx_vocab: ", token2idx_vocab) vocab = Vocabulary(token2idx=token2idx_vocab) tokenizer = Tokenizer(vocab=vocab, split_fn=mecab_token_pos_flat_fn, pad_fn=keras_pad_fn, maxlen=model_config.maxlen) model_config.vocab_size = len(vocab.token2idx) # Model & Model Params model = Transformer(config=model_config, vocab=vocab) # Train & Val Datasets tr_ds = ChatbotDataset(data_config.train, tokenizer.list_of_string_to_arr_of_pad_token_ids) tr_dl = DataLoader(tr_ds, batch_size=model_config.batch_size, shuffle=True, num_workers=4, drop_last=False) val_ds = ChatbotDataset(data_config.validation, tokenizer.list_of_string_to_arr_of_pad_token_ids) val_dl = DataLoader(val_ds, batch_size=model_config.batch_size, shuffle=True, num_workers=4, drop_last=False) # loss loss_fn = nn.CrossEntropyLoss(ignore_index=vocab.PAD_ID) # nn.NLLLoss() # optim opt = optim.Adam( params=model.parameters(), lr=model_config.learning_rate ) # torch.optim.SGD(params=model.parameters(), lr=model_config.learning_rate) # scheduler = ReduceLROnPlateau(opt, patience=5) # Check scheduler = GradualWarmupScheduler(opt, multiplier=8, total_epoch=model_config.epochs) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) # save # writer = SummaryWriter('{}/runs'.format(model_dir)) checkpoint_manager = CheckpointManager(model_dir) summary_manager = SummaryManager(model_dir) best_val_loss = 1e+10 best_train_acc = 0 # load if (model_dir / 'best.tar').exists(): print("pretrained model exists") checkpoint = checkpoint_manager.load_checkpoint('best.tar') model.load_state_dict(checkpoint['model_state_dict']) # Train for epoch in tqdm(range(model_config.epochs), desc='epoch', total=model_config.epochs): scheduler.step(epoch) print("epoch : {}, lr: {}".format(epoch, opt.param_groups[0]['lr'])) tr_loss = 0 tr_acc = 0 model.train() for step, mb in tqdm(enumerate(tr_dl), desc='steps', total=len(tr_dl)): opt.zero_grad() enc_input, dec_input, dec_output = map(lambda elm: elm.to(device), mb) y_pred = model(enc_input, dec_input) y_pred_copy = y_pred.detach() dec_output_copy = dec_output.detach() # loss 계산을 위해 shape 변경 y_pred = y_pred.reshape(-1, y_pred.size(-1)) dec_output = dec_output.view(-1).long() # padding 제외한 value index 추출 real_value_index = [dec_output != 0] # padding은 loss 계산시 제외 mb_loss = loss_fn( y_pred[real_value_index], dec_output[real_value_index]) # Input: (N, C) Target: (N) mb_loss.backward() opt.step() with torch.no_grad(): mb_acc = acc(y_pred, dec_output) tr_loss += mb_loss.item() tr_acc = mb_acc.item() tr_loss_avg = tr_loss / (step + 1) tr_summary = {'loss': tr_loss_avg, 'acc': tr_acc} total_step = epoch * len(tr_dl) + step # Eval if total_step % model_config.summary_step == 0 and total_step != 0: print("train: ") decoding_from_result(enc_input, y_pred_copy, dec_output_copy, tokenizer) model.eval() print("eval: ") val_summary = evaluate(model, val_dl, { 'loss': loss_fn, 'acc': acc }, device, tokenizer) val_loss = val_summary['loss'] # writer.add_scalars('loss', {'train': tr_loss_avg, # 'val': val_loss}, epoch * len(tr_dl) + step) tqdm.write( 'epoch : {}, step : {}, ' 'tr_loss: {:.3f}, val_loss: {:.3f}, tr_acc: {:.2%}, val_acc: {:.2%}' .format(epoch + 1, total_step, tr_summary['loss'], val_summary['loss'], tr_summary['acc'], val_summary['acc'])) val_loss = val_summary['loss'] # is_best = val_loss < best_val_loss # loss 기준 is_best = tr_acc > best_train_acc # acc 기준 (원래는 train_acc가 아니라 val_acc로 해야) # Save if is_best: print( "[Best model Save] train_acc: {}, train_loss: {}, val_loss: {}" .format(tr_summary['acc'], tr_summary['loss'], val_loss)) # CPU에서도 동작 가능하도록 자료형 바꾼 뒤 저장 state = { 'epoch': epoch + 1, 'model_state_dict': model.to(torch.device('cpu')).state_dict(), 'opt_state_dict': opt.state_dict() } summary = {'train': tr_summary, 'validation': val_summary} summary_manager.update(summary) summary_manager.save('summary.json') checkpoint_manager.save_checkpoint(state, 'best.tar') best_val_loss = val_loss model.to(device) model.train() else: if step % 50 == 0: print( 'epoch : {}, step : {}, tr_loss: {:.3f}, tr_acc: {:.2%}' .format(epoch + 1, total_step, tr_summary['loss'], tr_summary['acc']))
def train_generator(generator, iterator, optimizer, discriminator, ignore_padid, tokenizer=None): model = generator.seq2seq model.train() tr_loss = 0 tr_acc = 0 for step, mb in tqdm(enumerate(iterator), desc='steps', total=len(iterator)): optimizer.zero_grad() mb_loss = 0 enc_input, _, dec_output, reward = map(lambda elm: elm.to(device), mb) # print('[reward]: ', reward.shape) dec_input = torch.full((enc_input.shape[0],1),generator.vocab.token2idx[generator.vocab.START_TOKEN]).long().to(device) skip_row = [] for i in range(generator.config.maxlen): # if i == generator.config.maxlen - 1: # break # print('decode input: ',dec_input.shape) # print(dec_input) y_pred = model(enc_input, dec_input) # y_pred 第i个预测字符 [batch_size, vocab_size] # print('y_pred:',y_pred.shape) y_pred_copy = y_pred.detach() y_pred_ids = y_pred_copy.max(dim=-1)[1] # print('VVVVVVVVVVVV: ', y_pred_ids[:,-1].view(-1,1)) # print('2222222222: ', y_pred.shape) y_pred_ids = y_pred_ids[:,-1].view(-1,1) # pred_values.append(y_pred[y_pred_ids[:,-1].view(-1,1)]) # decoding_from_result(enc_input, y_pred, tokenizer) dec_input = torch.cat([dec_input, y_pred_ids], dim=1) # 保存训练得到的负样本到数组中, 为训练Discriminator做准备 if tokenizer is not None: str_input, str_pred = decoding_to_pair(enc_input, y_pred_copy, tokenizer) # print('input: ', str_input) # print('pred: ',str_pred) # print('decinput: ', decoding_to_str(dec_input, tokenizer)) # y_pred = y_pred.reshape(-1, y_pred.size(-1)) dec_output = dec_output.view(-1).long() # padding 제외한 value index 추출 # real_value_index = [dec_output != 0] # print(real_value_index) # print('=================') # print(y_pred.shape, dec_output.shape) # 根据log(P(y_t|Y_1:Y_{t-1})) * Q来计算loss for idx in range(y_pred.shape[0]): if idx in skip_row: continue if generator.is_end_token(y_pred_ids[idx][0]): skip_row.append(idx) pred_value = y_pred[idx][i][y_pred_ids[idx][0]] # pred_values.append(pred_value) mb_loss = -pred_value*reward[idx] # Input: (N, C) Target: (N) # print('reward:',reward.shape) # print('loss:',mb_loss.shape) mb_loss.backward() optimizer.step() with torch.no_grad(): y_pred = y_pred.reshape(-1, y_pred.size(-1)) # print(y_pred.shape, dec_output.shape) mb_acc = acc(y_pred, dec_output) tr_loss += mb_loss.item() tr_acc += mb_acc.item() tr_loss_avg = tr_loss / (step + 1) tr_summary = {'loss': tr_loss_avg} # total_step = epoch * len(iterator) + step return tr_loss/len(iterator), tr_acc/len(iterator)