print("Train Model") model = Model(binaryClassification=args["binaryClassification"], model_str=tokenizer_model[1], doLower=args["doLower"], train_batchSize=args["train_batchSize"], testval_batchSize=args["testval_batchSize"], learningRate=args["learningRate"], doLearningRateScheduler=args["doLearningRateScheduler"], labelSentences=labelSentencesDict, max_label_len=max_label_len, device=device) model.run(train_data=train_df[data_column], train_target=train_df[args["targets"]], val_data=val_df[data_column], val_target=val_df[args["targets"]], test_data=test_df[data_column], test_target=test_df[args["targets"]], epochs=args["numEpochs"]) wandb.log({'finished': True}) run_infos = wandb_summarizer.download.get_results(wandb_project_name) names = [] scores = [] for run_info in run_infos: try: scores.append(run_info["end_test_macroAuc"]) names.append(run_info["name"]) except: pass
max_label_len = max( [len(word_tokenize(x)) for x in labelSentencesDict.values()]) print("Train Model") model = Model(args=tokenizer_model, doLower=args["doLower"], train_batchSize=args["train_batchSize"], testval_batchSize=args["testval_batchSize"], learningRate=args["learningRate"], doLearningRateScheduler=args["doLearningRateScheduler"], labelSentences=labelSentencesDict, smartBatching=args["smartBatching"], max_label_len=max_label_len, device=device, target_columns=args["targets"]) # train and test the model model.run(train_data=train_data, train_target=train_target, val_data=val_data, val_target=val_target, test_data=test_data, test_target=test_target, epochs=args["numEpochs"]) # close the logging wandb.log({'finished': True}) # save the model #model.save(os.path.join(args["model_path"], "{}".format(wandb.run.name)))