train_list = train['text'].tolist() test_list = test['text'].tolist() complete_list = train_list + test_list lm_train = complete_list[0:int(len(complete_list) * 0.8)] lm_test = complete_list[-int(len(complete_list) * 0.2):] with open(os.path.join(TEMP_DIRECTORY, "lm_train.txt"), 'w') as f: for item in lm_train: f.write("%s\n" % item) with open(os.path.join(TEMP_DIRECTORY, "lm_test.txt"), 'w') as f: for item in lm_test: f.write("%s\n" % item) model = LanguageModelingModel(MODEL_TYPE, MODEL_NAME, args=language_modeling_args) model.train_model(os.path.join(TEMP_DIRECTORY, "lm_train.txt"), eval_file=os.path.join(TEMP_DIRECTORY, "lm_test.txt")) MODEL_NAME = language_modeling_args["best_model_dir"] # Train the model print("Started Training") train['labels'] = encode(train["labels"]) test['labels'] = encode(test["labels"]) test_sentences = test['text'].tolist() test_preds = np.zeros((len(test), args["n_fold"])) if args["evaluate_during_training"]:
train_list = train['text'].tolist() dev_list = dev['text'].tolist() complete_list = train_list + dev_list lm_train = complete_list[0:int(len(complete_list) * 0.8)] lm_test = complete_list[-int(len(complete_list) * 0.2):] with open(os.path.join(TEMP_DIRECTORY, "lm_train.txt"), 'w') as f: for item in lm_train: f.write("%s\n" % item) with open(os.path.join(TEMP_DIRECTORY, "lm_test.txt"), 'w') as f: for item in lm_test: f.write("%s\n" % item) model = LanguageModelingModel("auto", MODEL_NAME, args=language_modeling_args, use_cuda=torch.cuda.is_available()) model.train_model(os.path.join(TEMP_DIRECTORY, "lm_train.txt"), eval_file=os.path.join(TEMP_DIRECTORY, "lm_test.txt")) MODEL_NAME = language_modeling_args["best_model_dir"] # Train the model print("Started Training") train['labels'] = encode(train["labels"]) dev['labels'] = encode(dev["labels"]) dev_sentences = dev['text'].tolist() dev_preds = np.zeros((len(dev), args["n_fold"])) train = pd.concat([train, dev])