for epoch in tqdm(range(EPOCHS)): train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True) avg_loss = 0. avg_accuracy = 0. lossf = None optimizer.zero_grad() # Bug fix - thanks to @chinhuic for i, (x_batch, y_batch) in tqdm(enumerate(train_loader)): y_pred = model(x_batch.to(device), attention_mask=(x_batch > 0).to(device), labels=None) y_batch = y_batch.unsqueeze(1) loss = F.binary_cross_entropy_with_logits(y_pred, y_batch.to(device)) if (i + 1 ) % accumulation_steps == 0: # Wait for several backward steps optimizer.step() # Now we can do an optimizer step optimizer.zero_grad() if lossf: lossf = 0.98 * lossf + 0.02 * loss.item() else: lossf = loss.item() avg_loss += loss.item() / len(train_loader) avg_accuracy += torch.mean( ((torch.sigmoid(y_pred[:, 0]) > 0.5) == (y_batch[:, 0] > 0.5).to(device)).to( torch.float)).item() / len(train_loader) tqdm.write("avg_loss: %f, avg_accuracy: %f" % (avg_loss, avg_accuracy)) torch.save(model.state_dict(), output_model_file)
def main(): train_df = pd.read_csv(TRAIN_PATH) train_df['male'] = np.load( "../input/identity-column-data/male_labeled.npy") train_df['female'] = np.load( "../input/identity-column-data/female_labeled.npy") train_df['homosexual_gay_or_lesbian'] = np.load( "../input/identity-column-data/homosexual_gay_or_lesbian_labeled.npy") train_df['christian'] = np.load( "../input/identity-column-data/christian_labeled.npy") train_df['jewish'] = np.load( "../input/identity-column-data/jewish_labeled.npy") train_df['muslim'] = np.load( "../input/identity-column-data/muslim_labeled.npy") train_df['black'] = np.load( "../input/identity-column-data/black_labeled.npy") train_df['white'] = np.load( "../input/identity-column-data/white_labeled.npy") train_df['psychiatric_or_mental_illness'] = np.load( "../input/identity-column-data/psychiatric_or_mental_illness_labeled.npy" ) fold_df = pd.read_csv(FOLD_PATH) # y = np.where(train_df['target'] >= 0.5, 1, 0) y = train_df['target'].values y_aux = train_df[AUX_COLUMNS].values identity_columns_new = [] for column in identity_columns + ['target']: train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False) if column != "target": identity_columns_new.append(column + "_bin") # Overall weights = np.ones((len(train_df), )) / 4 # Subgroup weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int) / 4 # Background Positive, Subgroup Negative weights += ( ((train_df["target"].values >= 0.5).astype(bool).astype(np.int) + (1 - (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int))) > 1).astype(bool).astype( np.int) / 4 # Background Negative, Subgroup Positive weights += ( ((train_df["target"].values < 0.5).astype(bool).astype(np.int) + (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype( np.int) / 4 loss_weight = 0.5 with timer('preprocessing text'): # df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]] train_df['comment_text'] = train_df['comment_text'].astype(str) train_df = train_df.fillna(0) with timer('load embedding'): tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True) X_text = convert_lines_head_tail( train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, head_len, tokenizer) del tokenizer gc.collect() LOGGER.info(f"X_text {X_text.shape}") with timer('train'): train_index = fold_df.fold_id != fold_id valid_index = fold_df.fold_id == fold_id X_train, y_train, y_aux_train, w_train = X_text[train_index].astype( "int32"), y[train_index], y_aux[train_index], weights[train_index] X_val, y_val, y_aux_val, w_val = X_text[valid_index].astype("int32"), y[valid_index], y_aux[valid_index], \ weights[ valid_index] test_df = train_df[valid_index] del X_text, y, y_aux, weights, train_index, valid_index, train_df gc.collect() model = BertForSequenceClassification(bert_config, num_labels=n_labels) model.load_state_dict(torch.load(model_path)) model.zero_grad() model = model.to(device) y_train = np.concatenate( (y_train.reshape(-1, 1), w_train.reshape(-1, 1), y_aux_train), axis=1).astype("float32") y_val = np.concatenate( (y_val.reshape(-1, 1), w_val.reshape(-1, 1), y_aux_val), axis=1).astype("float32") train_dataset = torch.utils.data.TensorDataset( torch.tensor(X_train, dtype=torch.long), torch.tensor(y_train, dtype=torch.float32)) valid = torch.utils.data.TensorDataset( torch.tensor(X_val, dtype=torch.long), torch.tensor(y_val, dtype=torch.float32)) ran_sampler = torch.utils.data.RandomSampler(train_dataset) len_sampler = LenMatchBatchSampler(ran_sampler, batch_size=batch_size, drop_last=False) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=len_sampler) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False) LOGGER.info(f"done data loader setup") param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int(epochs * len(X_train) / batch_size / accumulation_steps) total_step = int(epochs * len(X_train) / batch_size) optimizer = BertAdam(optimizer_grouped_parameters, lr=base_lr, warmup=0.005, t_total=num_train_optimization_steps) LOGGER.info(f"done optimizer loader setup") model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) # criterion = torch.nn.BCEWithLogitsLoss().to(device) criterion = CustomLoss(loss_weight).to(device) LOGGER.info(f"done amp setup") for epoch in range(1, epochs + 1): LOGGER.info(f"Starting {epoch} epoch...") LOGGER.info(f"length {len(X_train)} train {len(X_val)} train...") if epoch == 1: for param_group in optimizer.param_groups: param_group['lr'] = base_lr * gammas[1] tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device, accumulation_steps, total_step, n_labels, base_lr, gamma=gammas[2 * epoch]) LOGGER.info(f'Mean train loss: {round(tr_loss,5)}') torch.save(model.state_dict(), '{}_epoch{}_fold{}.pth'.format(exp, epoch, fold_id)) valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels) LOGGER.info(f'Mean valid loss: {round(valid_loss,5)}') if epochs > 1: test_df_cp = test_df.copy() test_df_cp["pred"] = oof_pred[:, 0] test_df_cp = convert_dataframe_to_bool(test_df_cp) bias_metrics_df = compute_bias_metrics_for_model( test_df_cp, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df_cp)) LOGGER.info(f'score is {score}') del model gc.collect() torch.cuda.empty_cache() test_df["pred"] = oof_pred[:, 0] test_df = convert_dataframe_to_bool(test_df) bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df)) LOGGER.info(f'final score is {score}') test_df.to_csv("oof.csv", index=False) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.legend() plt.xticks(xs) plt.xlabel('Iter') plt.savefig("loss.png")
def main(): # train_df = pd.read_csv(TRAIN_PATH).sample(frac=1.0, random_state=seed) # train_size = int(len(train_df) * 0.9) train_df = pd.read_csv(TRAIN_PATH).sample(train_size + valid_size, random_state=seed) LOGGER.info(f'data_size is {len(train_df)}') LOGGER.info(f'train_size is {train_size}') y = np.where(train_df['target'] >= 0.5, 1, 0) y_aux = train_df[AUX_COLUMNS].values identity_columns_new = [] for column in identity_columns + ['target']: train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False) if column != "target": identity_columns_new.append(column + "_bin") sample_weights = np.ones(len(train_df), dtype=np.float32) sample_weights += train_df[identity_columns_new].sum(axis=1) sample_weights += train_df['target_bin'] * (~train_df[identity_columns_new]).sum(axis=1) sample_weights += (~train_df['target_bin']) * train_df[identity_columns_new].sum(axis=1) * 5 sample_weights /= sample_weights.mean() with timer('preprocessing text'): # df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]] train_df['comment_text'] = train_df['comment_text'].astype(str) train_df = train_df.fillna(0) with timer('load embedding'): tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True) X_text = convert_lines(train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, tokenizer) test_df = train_df[train_size:] with timer('train'): X_train, y_train, y_aux_train, w_train = X_text[:train_size], y[:train_size], y_aux[ :train_size], sample_weights[ :train_size] X_val, y_val, y_aux_val, w_val = X_text[train_size:], y[train_size:], y_aux[train_size:], sample_weights[ train_size:] model = BertForSequenceClassification(bert_config, num_labels=n_labels) model.load_state_dict(torch.load(model_path)) model.zero_grad() model = model.to(device) train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.long), torch.tensor(y_train, dtype=torch.float)) valid = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.long), torch.tensor(y_val, dtype=torch.float)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False) sample_weight_train = [w_train.values, np.ones_like(w_train)] sample_weight_val = [w_val.values, np.ones_like(w_val)] param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] num_train_optimization_steps = int(epochs * train_size / batch_size / accumulation_steps) total_step = int(epochs * train_size / batch_size) optimizer = BertAdam(optimizer_grouped_parameters, lr=2e-5*gamma, warmup=0.05, t_total=num_train_optimization_steps) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) criterion = torch.nn.BCEWithLogitsLoss().to(device) LOGGER.info(f"Starting 1 epoch...") tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device, accumulation_steps, total_step, n_labels) LOGGER.info(f'Mean train loss: {round(tr_loss,5)}') torch.save(model.state_dict(), '{}_dic'.format(exp)) valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels) del model gc.collect() torch.cuda.empty_cache() test_df["pred"] = oof_pred.reshape(-1) test_df = convert_dataframe_to_bool(test_df) bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df)) LOGGER.info(f'final score is {score}') test_df.to_csv("oof.csv", index=False) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss'); plt.legend(); plt.xticks(xs); plt.xlabel('Iter') plt.savefig("loss.png")
x_test = tokenizer.texts_to_sequences(x_test) x_test = sequence.pad_sequences(x_test, maxlen=MAX_LEN,padding='post') #### build DataLoader x_test_cuda = torch.tensor(x_test, dtype=torch.long).cuda() test_data = torch.utils.data.TensorDataset(x_test_cuda) test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False) ## build word embedding crawl_matrix ,_ = build_matrix(tokenizer.word_index, CRAWL_EMBEDDING_PATH) ## load pretrained model net = NeuralNet(crawl_matrix, 128) model_dict = net.state_dict() pretrained_dict = torch.load("../input/lstm-model2/rnn_pytorch.pt") del pretrained_dict['embedding.weight'] model_dict.update(pretrained_dict) net.load_state_dict(model_dict) net.cuda() ## inference net.eval() result_rnn_1 = list() with torch.no_grad(): for (x_batch,) in test_loader: y_pred, _ = net(x_batch) y_pred = y_pred.cpu().numpy()[:,0] result_rnn_1.extend(y_pred)
def train_unfixed(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 训练数据 train_data = NewsDataset("./data/cnews_final_train.txt", cf.max_seq_len) train_dataloader = DataLoader(train_data, batch_size=cf.batch_size, shuffle=True) # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt", cf.max_seq_len) test_dataloader = DataLoader(test_data, batch_size=cf.batch_size, shuffle=True) # 模型 config = BertConfig("./output/pytorch_bert_config.json") model = BertForSequenceClassification(config, num_labels=cf.num_labels) model.load_state_dict(torch.load("./output/pytorch_model.bin")) # 优化器用adam for param in model.parameters(): param.requires_grad = True param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int( len(train_data) / cf.batch_size) * cf.epoch optimizer = BertAdam(optimizer_grouped_parameters, lr=cf.lr, t_total=num_train_optimization_steps) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1500 # 如果超过1500轮未提升,提前结束训练 # 获取当前验证集acc model.eval() _, best_acc_val = evaluate(model, test_dataloader, device) flag = False model.train() for epoch_id in range(cf.epoch): print("Epoch %d" % epoch_id) for step, batch in enumerate( tqdm(train_dataloader, desc="batch", total=len(train_dataloader))): # for step,batch in enumerate(train_dataloader): label_id = batch['label_id'].squeeze(1).to(device) word_ids = batch['word_ids'].to(device) segment_ids = batch['segment_ids'].to(device) word_mask = batch['word_mask'].to(device) loss = model(word_ids, segment_ids, word_mask, label_id) loss.backward() optimizer.step() optimizer.zero_grad() total_batch += 1 if total_batch % cf.print_per_batch == 0: model.eval() with torch.no_grad(): loss_train, acc_train = get_model_loss_acc( model, word_ids, segment_ids, word_mask, label_id) loss_val, acc_val = evaluate(model, test_dataloader, device) if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch torch.save(model.state_dict(), "./output/pytorch_model.bin") with open("./output/pytorch_bert_config.json", 'w') as f: f.write(model.config.to_json_string()) improved_str = "*" else: improved_str = "" time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) model.train() if total_batch - last_improved > require_improvement: print("长时间未优化") flag = True break if flag: break
class ClassificationModel: def __init__(self, bert_model=config.bert_model, gpu=False, seed=0): self.gpu = gpu self.bert_model = bert_model self.train_df = data_reader.load_train_dataset(config.data_path) self.val_df = data_reader.load_dev_dataset(config.data_path) self.test_df = data_reader.load_test_dataset(config.data_path) self.num_classes = len(LABELS) self.model = None self.optimizer = None self.tokenizer = BertTokenizer.from_pretrained(self.bert_model) # to plot loss during training process self.plt_x = [] self.plt_y = [] random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if self.gpu: torch.cuda.manual_seed_all(seed) def __init_model(self): if self.gpu: self.device = torch.device("cuda") else: self.device = torch.device("cpu") self.model.to(self.device) print(torch.cuda.memory_allocated(self.device)) # log available cuda if self.device.type == 'cuda': print(torch.cuda.get_device_name(0)) print('Memory Usage:') print('Allocated:', round(torch.cuda.memory_allocated(0) / 1024**3, 1), 'GB') print('Cached: ', round(torch.cuda.memory_cached(0) / 1024**3, 1), 'GB') def new_model(self): self.model = BertForSequenceClassification.from_pretrained( self.bert_model, num_labels=self.num_classes) self.__init_model() def load_model(self, path_model, path_config): self.model = BertForSequenceClassification(BertConfig(path_config), num_labels=self.num_classes) self.model.load_state_dict(torch.load(path_model)) self.__init_model() def save_model(self, path_model, path_config, epoch_n, acc, f1): if not os.path.exists(path_model): os.makedirs(path_model) model_save_path = os.path.join( path_model, 'model_{:.4f}_{:.4f}_{:.4f}'.format(epoch_n, acc, f1)) torch.save(self.model.state_dict(), model_save_path) if not os.path.exists(path_config): os.makedirs(path_config) model_config_path = os.path.join(path_config, 'config.cf') with open(model_config_path, 'w') as f: f.write(self.model.config.to_json_string()) def train(self, epochs, batch_size=config.batch_size, lr=config.lr, plot_path=None, model_path=None, config_path=None): model_params = list(self.model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model_params if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in model_params if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] self.optimizer = BertAdam( optimizer_grouped_parameters, lr=lr, warmup=0.1, t_total=int(len(self.train_df) / batch_size) * epochs) nb_tr_steps = 0 train_features = data_reader.convert_examples_to_features( self.train_df, config.MAX_SEQ_LENGTH, self.tokenizer) # create tensor of all features all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) # eval dataloader eval_features = data_reader.convert_examples_to_features( self.val_df, config.MAX_SEQ_LENGTH, self.tokenizer) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=batch_size) # class weighting _, counts = np.unique(self.train_df['label'], return_counts=True) class_weights = [sum(counts) / c for c in counts] # assign wight to each input sample example_weights = [class_weights[e] for e in self.train_df['label']] sampler = WeightedRandomSampler(example_weights, len(self.train_df['label'])) train_dataloader = DataLoader(train_data, sampler=sampler, batch_size=batch_size) self.model.train() for e in range(epochs): print("Epoch {}".format(e)) if e is not 0: f1, acc = self.val(eval_dataloader) print("\nF1 score: {}, Accuracy: {}".format(f1, acc)) if model_path is not None and config_path is not None: if e is not 0: self.save_model(model_path, config_path, e, acc, f1) for step, batch in enumerate( tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(self.device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch loss = self.model(input_ids, segment_ids, input_mask, label_ids) loss.backward() #if plot_path is not None: # self.plt_y.append(loss.item()) # self.plt_x.append(nb_tr_steps) # self.save_plot(plot_path) nb_tr_steps += 1 self.optimizer.step() self.optimizer.zero_grad() if self.gpu: torch.cuda.empty_cache() def val(self, eval_dataloader, batch_size=config.batch_size): f1, acc = 0, 0 nb_eval_examples = 0 for input_ids, input_mask, segment_ids, gnd_labels in tqdm( eval_dataloader, desc="Evaluating"): input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) with torch.no_grad(): logits = self.model(input_ids, segment_ids, input_mask) predicted_labels = np.argmax(logits.detach().cpu().numpy(), axis=1) acc += np.sum(predicted_labels == gnd_labels.numpy()) tmp_eval_f1 = f1_score(predicted_labels, gnd_labels, average='macro') f1 += tmp_eval_f1 * input_ids.size(0) nb_eval_examples += input_ids.size(0) return f1 / nb_eval_examples, acc / nb_eval_examples def save_plot(self, path): fig, ax = plt.subplots() ax.plot(self.plt_x, self.plt_y) ax.set(xlabel='Training steps', ylabel='Loss') fig.savefig(path) plt.close() def create_test_predictions(self, path): tests_features = data_reader.convert_examples_to_features( self.test_df, config.MAX_SEQ_LENGTH, self.tokenizer) all_input_ids = torch.tensor([f.input_ids for f in tests_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in tests_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in tests_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in tests_features], dtype=torch.long) all_sample_ids = [f.sample_id for f in tests_features] test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) test_sampler = SequentialSampler(test_data) test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=16) predictions = [] inverse_labels = {v: k for k, v in LABELS} for input_ids, input_mask, segment_ids, gnd_labels in tqdm( test_dataloader, desc="Evaluating"): input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) with torch.no_grad(): encoded_layers, logits = self.model(input_ids, segment_ids, input_mask) predictions += [ inverse_labels[p] for p in list(np.argmax(logits.detach().cpu().numpy(), axis=1)) ] with open(path, "w") as csv_file: writer = csv.writer(csv_file, delimiter=',') for i, prediction in enumerate(predictions): writer.writerow([all_sample_ids[i], prediction]) return predictions
class ClassificationModel: def __init__(self, task, val=0.1, bert_model=BERT_MODEL, gpu=False, seed=0): self.gpu = gpu self.task = task self.bert_model = bert_model self.x_train, self.y_train = load_train_dataset(self.task) self.x_val = np.random.choice(self.x_train, size=(int(val * len(self.x_train)), ), replace=False) self.y_val = np.random.choice(self.y_train, size=(int(val * len(self.x_train)), ), replace=False) self.x_test_ids, self.x_test = load_test_dataset(self.task) self.num_classes = len(TASK_LABELS[task]) self.model = None self.optimizer = None self.tokenizer = BertTokenizer.from_pretrained(self.bert_model) self.plt_x = [] self.plt_y = [] random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if self.gpu: torch.cuda.manual_seed_all(seed) def __init_model(self): if self.gpu: self.device = torch.device("cuda") print("Start learning with GPU") else: self.device = torch.device("cpu") print("Start learning with CPU") self.model.to(self.device) print(torch.cuda.memory_allocated(self.device)) def new_model(self): self.model = BertForSequenceClassification.from_pretrained( self.bert_model, num_labels=self.num_classes) self.__init_model() def load_model(self, path_model, path_config): self.model = BertForSequenceClassification(BertConfig(path_config), num_labels=self.num_classes) self.model.load_state_dict(torch.load(path_model)) self.__init_model() def save_model(self, path_model, path_config): torch.save(self.model.state_dict(), path_model) with open(path_config, 'w') as f: f.write(self.model.config.to_json_string()) # noinspection PyArgumentList def train(self, epochs, plot_path, batch_size=32, lr=5e-5, model_path=None, config_path=None): model_params = list(self.model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model_params if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in model_params if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] self.optimizer = BertAdam(optimizer_grouped_parameters, lr=lr, warmup=0.1, t_total=int(len(self.x_train) / batch_size) * epochs) train_features = convert_examples_to_features(self.x_train, self.y_train, MAX_SEQ_LENGTH, self.tokenizer) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) _, counts = np.unique(self.y_train, return_counts=True) class_weights = [sum(counts) / c for c in counts] example_weights = [class_weights[e] for e in self.y_train] sampler = WeightedRandomSampler(example_weights, len(self.y_train)) train_dataloader = DataLoader(train_data, sampler=sampler, batch_size=batch_size) self.model.train() temp_loss = 0 nb_tr_steps = 0 for e in range(epochs): print("Epoch {e}".format(e=e)) f1, acc = self.val() print("\nF1 score: {f1}, Accuracy: {acc}".format(f1=f1, acc=acc)) if model_path is not None and config_path is not None: self.save_model(model_path, config_path) for step, batch in enumerate( tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(self.device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch loss = self.model(input_ids, segment_ids, input_mask, label_ids) loss.backward() self.plt_y.append(loss.item()) self.plt_x.append(nb_tr_steps) self.save_plot(plot_path) nb_tr_steps += 1 self.optimizer.step() self.optimizer.zero_grad() if self.gpu: torch.cuda.empty_cache() def val(self, batch_size=32, test=False): eval_features = convert_examples_to_features(self.x_val, self.y_val, MAX_SEQ_LENGTH, self.tokenizer) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=batch_size) f1, acc = 0, 0 nb_eval_examples = 0 for input_ids, input_mask, segment_ids, gnd_labels in tqdm( eval_dataloader, desc="Evaluating"): input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) with torch.no_grad(): logits = self.model(input_ids, segment_ids, input_mask) predicted_labels = np.argmax(logits.detach().cpu().numpy(), axis=1) acc += np.sum(predicted_labels == gnd_labels.numpy()) tmp_eval_f1 = f1_score(predicted_labels, gnd_labels, average='macro') f1 += tmp_eval_f1 * input_ids.size(0) nb_eval_examples += input_ids.size(0) return f1 / nb_eval_examples, acc / nb_eval_examples def save_plot(self, path): import matplotlib.pyplot as plt fig, ax = plt.subplots() ax.plot(self.plt_x, self.plt_y) ax.set(xlabel='Training steps', ylabel='Loss') fig.savefig(path) plt.close() def create_test_predictions(self, path): eval_features = convert_examples_to_features(self.x_test, [-1] * len(self.x_test), MAX_SEQ_LENGTH, self.tokenizer) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=16) predictions = [] inverse_labels = {v: k for k, v in TASK_LABELS[self.task].items()} for input_ids, input_mask, segment_ids, gnd_labels in tqdm( eval_dataloader, desc="Evaluating"): input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) with torch.no_grad(): logits = self.model(input_ids, segment_ids, input_mask) predictions += [ inverse_labels[p] for p in list(np.argmax(logits.detach().cpu().numpy(), axis=1)) ] with open(path, "w") as csv_file: writer = csv.writer(csv_file, delimiter=',') for i, prediction in enumerate(predictions): writer.writerow([int(self.x_test_ids[i]), prediction]) return predictions