def run(fold): dfx = pd.read_csv(config.TRAINING_FILE) df_train = dfx[dfx.kfold != fold].reset_index(drop=True) df_valid = pd.read_csv(config.VALID_FILE) model_config = transformers.RobertaConfig.from_pretrained( '/home/mikhail/workspace/roberta-base/') model_config.output_hidden_states = True model = TweetModel(model_config) optimizer = AdamW(model.parameters(), lr=3e-5, betas=(0.9, 0.999)) criterion = loss_fn dataloaders_dict = get_train_val_loaders(df_train, df_valid, config.TRAIN_BATCH_SIZE) engine.train_model(model, dataloaders_dict, criterion, optimizer, config.EPOCHS, f'roberta_fold{fold}.pth')
optimizer_parameters = [ { 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }, ] # optimizer = optim.AdamW(model.parameters(), lr=3e-5, betas=(0.9, 0.999)) optimizer = optim.AdamW(model.parameters(), lr=opt.lr, betas=(0.9, 0.999)) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=opt.num_warmup_steps, num_training_steps=num_train_steps) criterion = loss_fn dataloaders_dict = get_train_val_loaders(train_df, train_idx, val_idx, batch_size, MODEL_PATH) train_model(model, dataloaders_dict, criterion, optimizer, num_epochs, f'roberta_fold{fold}.pth', scheduler) test_it(MODEL_PATH)
def main(): seed = 42 seed_everything(seed) num_epochs = 3 batch_size = 32 skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) train_df = pd.read_csv('data/train.csv') train_df['text'] = train_df['text'].astype(str) train_df['selected_text'] = train_df['selected_text'].astype(str) for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df.sentiment), start=1): print(f'Fold: {fold}') model = TweetModel() optimizer = optim.AdamW(model.parameters(), lr=3e-5, betas=(0.9, 0.999)) criterion = loss_fn dataloaders_dict = get_train_val_loaders(train_df, train_idx, val_idx, batch_size) train_model(model, dataloaders_dict, criterion, optimizer, num_epochs, f'roberta_fold{fold}.pth') # inference test_df = pd.read_csv('data/test.csv') test_df['text'] = test_df['text'].astype(str) test_loader = get_test_loader(test_df) predictions = [] models = [] for fold in range(skf.n_splits): model = TweetModel() model.cuda() model.load_state_dict(torch.load(f'roberta_fold{fold+1}.pth')) model.eval() models.append(model) for data in test_loader: ids = data['ids'].cuda() masks = data['masks'].cuda() tweet = data['tweet'] offsets = data['offsets'].numpy() start_logits = [] end_logits = [] for model in models: with torch.no_grad(): output = model(ids, masks) start_logits.append( torch.softmax(output[0], dim=1).cpu().detach().numpy()) end_logits.append( torch.softmax(output[1], dim=1).cpu().detach().numpy()) start_logits = np.mean(start_logits, axis=0) end_logits = np.mean(end_logits, axis=0) for i in range(len(ids)): start_pred = np.argmax(start_logits[i]) end_pred = np.argmax(end_logits[i]) if start_pred > end_pred: pred = tweet[i] else: pred = get_selected_text(tweet[i], start_pred, end_pred, offsets[i]) predictions.append(pred) #submission sub_df = pd.read_csv('data/sample_submission.csv') sub_df['selected_text'] = predictions sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('!!!!', '!') if len(x.split()) == 1 else x) sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('..', '.') if len(x.split()) == 1 else x) sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('...', '.') if len(x.split()) == 1 else x) sub_df.to_csv('submission.csv', index=False) sub_df.head()