test_preds_list.append(test_preds) for i in range(config["n_fold"]): if os.path.exists(config['output_dir']) and os.path.isdir( config['output_dir']): shutil.rmtree(config['output_dir']) print("Started Fold {}".format(i)) model = ClassificationModel(MODEL_TYPE, MODEL_NAME, args=config, num_labels=1, use_cuda=torch.cuda.is_available()) train_df, eval_df = train_test_split(train, test_size=0.2, random_state=SEED * i) model.train_model(train_df, eval_df=eval_df, mae=mean_absolute_error) model = ClassificationModel(MODEL_TYPE, config["best_model_dir"], args=config, num_labels=1, use_cuda=torch.cuda.is_available()) for dev_sentences, test_sentences, dev_preds, test_preds in zip( dev_sentences_list, test_sentences_list, dev_preds_list, test_preds_list): dev_predictions, dev_raw_outputs = model.predict(dev_sentences) test_predictions, test_raw_outputs = model.predict(test_sentences) dev_preds[:, i] = dev_predictions test_preds[:, i] = test_predictions for dev, dev_preds, test, test_preds in zip(dev_list, dev_preds_list,
dev_sentences = dev['text'].tolist() dev_preds = np.zeros((len(dev_sentences), config["n_fold"])) test['text'] = test['text'].apply(arabert_prep.preprocess) test_sentences = test['text'].tolist() test_preds = np.zeros((len(test_sentences), config["n_fold"])) for i in range(config["n_fold"]): if os.path.exists(config['output_dir']) and os.path.isdir(config['output_dir']): shutil.rmtree(config['output_dir']) print("Started Fold {}".format(i)) model = ClassificationModel(MODEL_TYPE, MODEL_NAME, args=config, use_cuda=torch.cuda.is_available()) train_df, eval_df = train_test_split(train, test_size=0.1, random_state=SEED * i) model.train_model(train_df, eval_df=eval_df, precision=precision, recall=recall, f1=f1) model = ClassificationModel(MODEL_TYPE, config["best_model_dir"], args=config, use_cuda=torch.cuda.is_available()) predictions, raw_outputs = model.predict(dev_sentences) dev_preds[:, i] = predictions test_predictions, test_raw_outputs = model.predict(test_sentences) test_preds[:, i] = test_predictions print("Completed Fold {}".format(i)) # select majority class of each instance (row) dev_predictions = [] for row in dev_preds: row = row.tolist()