def main(): args = parse_args() setup_logging(args.logfile) log = get_logger() assert (0 <= args.hidden_fraction <= 1) np.random.seed(args.random_seed) tf.set_random_seed(args.random_seed) log.info('*' * 100) log.info('[Starting MC experiment]') log_dict(log.info, vars(args)) log.info('[Loading target GIs]') with open(args.target_gis, 'rb') as f: tgt_gis = cpkl.load(f) log.info('[Loading source GIs]') with open(args.source_gis, 'rb') as f: src_gis = cpkl.load(f) log.info('[Loading sim scores]') with open(args.sim_scores, 'rb') as f: sim_scores_data = cpkl.load(f) sim_scores = sim_scores_data['values'] sim_scores = sim_scores / np.max(sim_scores) # Normalize # log.info('\t- %d scores', len(sim_scores)) hp_param_space = xsmf_param_space(args) results, models, training_curves, trials = \ run_xsmf_experiment(tgt_gis=tgt_gis, src_gis=src_gis, space=hp_param_space, sim_scores=sim_scores, val_hf=args.val_hidden_fraction, test_hf=args.hidden_fraction, n_repeats=args.n_repeats, hp_iters=args.n_hyperopt_iters, hp_seed=args.random_seed) # Save results and other information log_results(results['summary']) with open(args.results_output, 'w') as f: json.dump(results, f, indent=2) with open(args.training_curve_output, 'wb') as f: cpkl.dump(training_curves, f) # TODO: save models the models cannot be pickled at the moment # We will need to implement a from dict and a to dict method with open(args.models_output, 'wb') as f: cpkl.dump(trials, f) with open(args.trials_output, 'wb') as f: cpkl.dump(trials, f)
def log_training_results(engine): step = True run_type = 'train' train_eval.run(data_loader['train']) y_pred, y = train_eval.state.output loss = criterion(y_pred, y) log_results(to_cpu(y_pred, convert_to_np=True), to_cpu(y, convert_to_np=True), to_cpu(loss, convert_to_np=True), run_type, step, engine.state.iteration, total_train_steps, writer)
def train_model(model, train_dl, epochs, display_every=200, visualize_dir='samples'): ''' Train loop Args: model (nn.Module): main model consisting of generator that predicts ab features from L input of L*a*b* image and a discriminator that predicts whether the reconstructed L*a*b* image is real or fake train_dl (Dataloader): train dataloader of sampled COCO images epochs (int): number of epochs display_every (int): saves reconstructed predicted L*a*b* image every number of iterations visualize_dir (str): directory where saved images are written to ''' data = next( iter(val_dl) ) # getting a batch for visualizing the model output after fixed intervals for e in range(epochs): loss_meter_dict = create_loss_meters( ) # function returing a dictionary of objects to i = 0 # log the losses of the complete network for data in tqdm(train_dl): model.setup_input(data) model.optimize() update_losses( model, loss_meter_dict, count=data['L'].size(0)) # function updating the log objects i += 1 if i % display_every == 0: print(f"\nEpoch {e+1}/{epochs}") print(f"Iteration {i}/{len(train_dl)}") log_results( loss_meter_dict) # function to print out the losses visualize(model, data, save=True, outdir=visualize_dir ) # function displaying the model's outputs # save model torch.save(model.state_dict(), 'colorization_model.pt') # serialize model pickle.dump(model, open('colorization_model.pkl', 'wb'))
def train_model(model, train_dl, epochs, display_every=200): data = next( iter(valid_dl) ) # getting a batch for visualizing the model output after fixed intrvals for e in range(epochs): loss_meter_dict = (create_loss_meters() ) # function returing a dictionary of objects to i = 0 # log the losses of the complete network for data in tqdm(train_dl): model.setup_input(data) model.optimize() update_losses( model, loss_meter_dict, count=data["L"].size(0)) # function updating the log objects i += 1 if i % display_every == 0: print(f"\nEpoch {e+1}/{epochs}") print(f"Iteration {i}/{len(train_dl)}") log_results( loss_meter_dict) # function to print out the losses visualize( model, data, save=False) # function displaying the model's outputs
def main(train_images_dir, pipeline_config_path, output_directory, checkpoint_path, num_epochs=1, image_dict=None, labels_path=None, samples=None): detection_model, pipeline_proto, ckpt_manager = create_model( pipeline_config_path, output_directory, checkpoint_path) train_files = os.listdir(train_images_dir) random.shuffle(train_files) BATCH_SIZE = 32 num_batches = (len(train_files) // BATCH_SIZE) - 1 for epoch in range(num_epochs): for idx in range(num_batches): batch_files = train_files[BATCH_SIZE * idx:BATCH_SIZE * (idx + 1)] train_images_np, train_gt_box = load_images( train_images_dir, batch_files) train_image_tensors, gt_classes_one_hot_tensors, gt_box_tensors = \ prepare_data(train_images_np, train_gt_box) detection_model, losses_dict = train_model( detection_model, train_images_np, train_image_tensors, gt_classes_one_hot_tensors, gt_box_tensors, ckpt_manager) logger.info( utils.log_results(epoch, num_epochs, idx, num_batches, losses_dict)) if idx % 10 == 0: ckpt_manager.save() print('Checkpoint saved!') exporter_lib_v2.export_inference_graph(input_type='image_tensor', pipeline_config=pipeline_proto, trained_checkpoint_dir=os.path.join( output_directory, r'checkpoint'), output_directory=output_directory)
def doc_classification( task_config, model_name_or_path, cache_dir, data_dir, save_dir, model_dir, run_name="0", lr=1e-05, warmup_steps=5000, balance_classes=True, embeds_dropout=0.1, epochs=200, # large because we use early stopping by default batch_size=20, grad_acc_steps=1, early_stopping_metric="roc_auc", early_stopping_mode="max", early_stopping_patience=10, model_class="Bert", tokenizer_class="BertTokenizer", do_lower_case=False, do_train=True, do_eval=True, do_hpo=False, print_preds=False, print_dev_preds=False, max_seq_len=512, seed=11, eval_every=500, use_amp=False, use_cuda=True, ): # Load task config task_config = yaml.safe_load(open(task_config)) data_dir = data_dir save_dir = save_dir model_dir = model_dir # Create label list from args list or (for large label lists) create from file by splitting by space if isinstance(task_config["data"]["label_list"], list): label_list = task_config["data"]["label_list"] else: with open(data_dir / 'labels' / task_config["data"]["label_list"]) as code_file: label_list = code_file.read().split(" ") # Register Outcome Metrics register_task_metrics(label_list) # General Settings set_all_seeds(seed=seed) device, n_gpu = initialize_device_settings(use_cuda=use_cuda, use_amp=use_amp) # 1.Create a tokenizer tokenizer = Tokenizer.load( pretrained_model_name_or_path=model_name_or_path, tokenizer_class=tokenizer_class, do_lower_case=do_lower_case) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset processor = TextClassificationProcessor( tokenizer=tokenizer, max_seq_len=max_seq_len, data_dir=data_dir, label_list=label_list, metric=task_config["metric"], multilabel=task_config["multilabel"], train_filename=task_config["data"]["train_filename"], dev_filename=task_config["data"]["dev_filename"], dev_split=task_config["data"]["dev_split"] if "dev_split" in task_config["data"] else None, test_filename=task_config["data"]["test_filename"], delimiter=task_config["data"]["parsing"]["delimiter"], quote_char=task_config["data"]["parsing"]["quote_char"], label_column_name=task_config["data"]["parsing"]["label_column"]) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a # few descriptive statistics of our datasets data_silo = DataSilo(processor=processor, caching=True, cache_path=Path(cache_dir), batch_size=batch_size) if do_train: # Setup MLFlow logger ml_logger = MLFlowLogger(tracking_uri=task_config["log_dir"]) ml_logger.init_experiment( experiment_name=task_config["experiment_name"], run_name=f'{task_config["experiment_name"]}_{run_name}') # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.load(model_name_or_path, language_model_class=model_class) # b) and a prediction head on top that is suited for our task # Define class weights if balance_classes: class_weights = data_silo.calculate_class_weights( task_name=task_config["task_type"]) else: class_weights = None # Create Multi- or Single-Label Classification Heads if task_config["multilabel"]: prediction_head = MultiLabelTextClassificationHead( class_weights=class_weights, num_labels=len(label_list)) else: prediction_head = ExtendedTextClassificationHead( class_weights=class_weights, num_labels=len(label_list)) model = ExtendedAdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=embeds_dropout, lm_output_types=[task_config["output_type"]], device=device) # 5. Create an optimizer schedule_opts = { "name": "LinearWarmup", "num_warmup_steps": warmup_steps } model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=lr, device=device, n_batches=len(data_silo.loaders["train"]), n_epochs=epochs, use_amp=use_amp, grad_acc_steps=grad_acc_steps, schedule_opts=schedule_opts) # 6. Create an early stopping instance early_stopping = None if early_stopping_mode != "none": early_stopping = EarlyStopping(mode=early_stopping_mode, min_delta=0.0001, save_dir=model_dir, metric=early_stopping_metric, patience=early_stopping_patience) # 7. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it # from time to time trainer = ExtendedTrainer(model=model, optimizer=optimizer, data_silo=data_silo, epochs=epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=eval_every, early_stopping=early_stopping, device=device, grad_acc_steps=grad_acc_steps, evaluator_test=do_eval) def score_callback(eval_score, train_loss): tune.report(roc_auc_dev=eval_score, train_loss=train_loss) # 8. Train the model trainer.train(score_callback=score_callback if do_hpo else None) # 9. Save model if not saved in early stopping model.save(model_dir + "/final_model") processor.save(model_dir + "/final_model") if do_eval: # Load newly trained model or existing model if do_train: model_dir = model_dir else: model_dir = Path(model_name_or_path) logger.info("###### Eval on TEST SET #####") evaluator_test = ExtendedEvaluator( data_loader=data_silo.get_data_loader("test"), tasks=data_silo.processor.tasks, device=device) # Load trained model for evaluation model = ExtendedAdaptiveModel.load(model_dir, device) model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True) # Evaluate results = evaluator_test.eval(model, return_preds_and_labels=True) # Log results utils.log_results(results, dataset_name="test", steps=len(evaluator_test.data_loader), save_path=model_dir + "/eval_results.txt") if print_preds: # Print model test predictions utils.save_predictions(results, save_dir=model_dir, multilabel=task_config["multilabel"]) if print_dev_preds: # Evaluate on dev set, e.g. for threshold tuning evaluator_dev = Evaluator( data_loader=data_silo.get_data_loader("dev"), tasks=data_silo.processor.tasks, device=device) dev_results = evaluator_dev.eval(model, return_preds_and_labels=True) utils.log_results(dev_results, dataset_name="dev", steps=len(evaluator_dev.data_loader), save_path=model_dir + "/eval_dev_results.txt") # Print model dev predictions utils.save_predictions(dev_results, save_dir=model_dir, multilabel=task_config["multilabel"], dataset_name="dev")
def main(): args = parse_args() setup_logging(args.logfile) log = get_logger() assert( 0 <= args.hidden_fraction <= 1 ) np.random.seed(args.random_seed) tf.set_random_seed(args.random_seed) args = parse_args() log.info('*' * 100) log.info('[Starting MC experiment]') log_dict(log.info, vars(args)) log.info('[Loading input data]') with open(args.target_gis, 'rb') as f: gi_data = cpkl.load(f) row_genes = gi_data['rows'] log.info('\t- setting up training and test sets') train_test_sets = [gi_train_test_split(gi_data, args.hidden_fraction) for _ in range(args.n_repeats)] train_Xs, test_Xs, test_masks= zip(*train_test_sets) if args.mc_alg == 'NGMC': scalers = [MCScaler('0-1') for _ in range(args.n_repeats)] else: scalers = [MCScaler('std') for _ in range(args.n_repeats)] train_Xs = [scaler.fit_transform(X) for scaler, X in zip(scalers, train_Xs)] if args.mc_alg == 'PMF': imputed_Xs, models_info = train_pmf_models(train_Xs = train_Xs, rank = args.rank, iters = args.iters, lr = args.lr, lam = args.lambda_f, report_every = args.report_every) elif args.mc_alg == 'PMF_b': imputed_Xs, models_info = train_pmf_b_models(train_Xs = train_Xs, rank = args.rank, iters = args.iters, lr = args.lr, lam = args.lambda_f, lam_b = args.lambda_b, report_every = args.report_every) elif args.mc_alg == 'KPMF': L = get_laplacian(list(row_genes), args.target_ppi) imputed_Xs, models_info = train_kpmf_models(train_Xs = train_Xs, L = L, rank = args.rank, iters = args.iters, lr = args.lr, lambda_f = args.lambda_f, lambda_h = args.lambda_h, rl_lambda = args.rl_lambda, report_every = args.report_every) elif args.mc_alg == 'KPMF_b': L = get_laplacian(list(row_genes), args.target_ppi) imputed_Xs, models_info = train_kpmf_b_models(train_Xs = train_Xs, L = L, rank = args.rank, iters = args.iters, lr = args.lr, lambda_b = args.lambda_b, lambda_f = args.lambda_f, lambda_h = args.lambda_h, rl_lambda = args.rl_lambda, report_every = args.report_every) elif args.mc_alg == 'NGMC': ppi = nx.read_edgelist(args.target_ppi) A = get_ppi_data(list(row_genes), ppi, mode='normalized_adjacency') imputed_Xs, models_info = train_ngmc_models(train_Xs = train_Xs, A = A, rank = args.rank, iters = args.iters, lr = args.lr, alpha_p = args.alpha_p, lambda_f = args.lambda_f, lambda_h = args.lambda_h, lambda_p = args.lambda_p) elif args.mc_alg == 'XSMF': with open(args.source_gis, 'rb') as f: src_gi_data = cpkl.load(f) X_src = src_gi_data['values'] X_src = MCScaler(mode='std').fit_transform(X_src) log.info('[Loading sim scores]') with open(args.sim_scores, 'rb') as f: sim_scores_data = cpkl.load(f) sim_scores = sim_scores_data['values'] sim_scores = sim_scores / np.max(sim_scores) # Normalize imputed_Xs, models_info = train_xsmf_models(train_Xs = train_Xs, X_src = X_src, sim_scores=sim_scores, rank = args.rank, iters = args.iters, lr = args.lr, lambda_sim = args.lambda_sim, lambda_src = args.lambda_src, lambda_u = args.lambda_u, lambda_v = args.lambda_v, lambda_us = args.lambda_us, lambda_vs = args.lambda_vs, report_every = args.report_every) elif args.mc_alg == 'KXSMF': with open(args.source_gis, 'rb') as f: src_gi_data = cpkl.load(f) X_src = src_gi_data['values'] X_src = MCScaler(mode='std').fit_transform(X_src) log.info('[Loading sim scores]') with open(args.sim_scores, 'rb') as f: sim_scores_data = cpkl.load(f) sim_scores = sim_scores_data['values'] sim_scores = sim_scores / np.max(sim_scores) # Normalize L_tgt = get_laplacian(list(gi_data['rows']), args.target_ppi) L_src = get_laplacian(list(src_gi_data['rows']), args.source_ppi) log.warn('%s, %s' % L_src.shape) log.warn('%s, %s' % X_src.shape) imputed_Xs, models_info = train_kxsmf_models(train_Xs = train_Xs, X_src = X_src, L_tgt=L_tgt, L_src=L_src, sim_scores=sim_scores, rank = args.rank, iters = args.iters, lr = args.lr, lambda_sim = args.lambda_sim, lambda_src = args.lambda_src, lambda_u = args.lambda_u, lambda_v = args.lambda_v, lambda_us = args.lambda_us, lambda_vs = args.lambda_vs, lambda_tgt_rl = args.lambda_tgt_rl, lambda_src_rl = args.lambda_src_rl, report_every = args.report_every) elif args.mc_alg == 'KXSMF_b': with open(args.source_gis, 'rb') as f: src_gi_data = cpkl.load(f) X_src = src_gi_data['values'] X_src = MCScaler(mode='std').fit_transform(X_src) log.info('[Loading sim scores]') with open(args.sim_scores, 'rb') as f: sim_scores_data = cpkl.load(f) sim_scores = sim_scores_data['values'] sim_scores = sim_scores / np.max(sim_scores) # Normalize L_tgt = get_laplacian(list(gi_data['rows']), args.target_ppi) L_src = get_laplacian(list(src_gi_data['rows']), args.source_ppi) log.warn('%s, %s' % L_src.shape) log.warn('%s, %s' % X_src.shape) imputed_Xs, models_info = train_kxsmfb_models(train_Xs = train_Xs, X_src = X_src, L_tgt=L_tgt, L_src=L_src, sim_scores=sim_scores, rank = args.rank, iters = args.iters, lr = args.lr, lambda_b= args.lambda_b, lambda_sim = args.lambda_sim, lambda_src = args.lambda_src, lambda_u = args.lambda_u, lambda_v = args.lambda_v, lambda_us = args.lambda_us, lambda_vs = args.lambda_vs, lambda_tgt_rl = args.lambda_tgt_rl, lambda_src_rl = args.lambda_src_rl, report_every = args.report_every) else: raise NotImplementedError imputed_Xs = [scaler.inverse_transform(X) for scaler, X in zip(scalers, imputed_Xs)] # Take transposes here for XSMF, KXSMF results = evaluate_preds(test_Xs, imputed_Xs, test_masks) results, fold_results = summarize_results(results) log_results(results) results_dict = dict(summary=results, collected=fold_results, args=vars(args)) pvals_data = None if args.pval_file: # given pval file with open(args.pval_file, 'rb') as f: pvals_data = cpkl.load(f) assert(np.all(pvals_data['cols'] == gi_data['cols'])) assert(np.all(pvals_data['rows'] == gi_data['rows'])) pvals = pvals_data['values'] pvals_filled = np.where(np.isnan(pvals), 1000, pvals) sig_mask = pvals_filled < args.pval_thresh sig_test_Xs = [np.where(sig_mask, _X, np.nan) for _X in test_Xs] sig_imputed_Xs = [np.where(sig_mask, _X, np.nan) for _X in imputed_Xs] sig_results = evaluate_preds(sig_test_Xs, sig_imputed_Xs, test_masks) sig_results, sig_fold_results = summarize_results(sig_results) log_results(sig_results) results_dict['sig_summary'] = sig_results results_dict['sig_collected'] = sig_fold_results with open(args.results_output, 'w') as f: json.dump(results_dict, f, indent=2) serialized_data = { 'GIs': gi_data, 'alg': args.mc_alg, 'fold_data': dict(train_Xs=train_Xs, test_Xs=test_Xs, masks=test_masks), 'imputed_Xs': imputed_Xs, 'models_info': models_info, 'pvals': pvals_data } with open(args.models_output, 'wb') as f: cpkl.dump(serialized_data, f)
def main(): args = parse_args() setup_logging(args.logfile) log = get_logger() assert (0 <= args.hidden_fraction <= 1) np.random.seed(args.random_seed) tf.set_random_seed(args.random_seed) log.info('*' * 100) log.info('[Starting MC experiment]') log_dict(log.info, vars(args)) log.info('[Loading input data]') with open(args.input_file, 'rb') as f: obj = cpkl.load(f) # Set up experiments fit_params = None if args.mc_alg == 'PMF': param_space = pmf_param_space(args) run_experiment = run_pmf elif args.mc_alg == 'PMF_b': param_space = pmfb_param_space(args) run_experiment = run_pmfb elif args.mc_alg in ['KPMF', 'NGMC', 'KPMF_b']: # Experiments that need PPI network if args.ppi is not None: ppi = nx.read_edgelist(args.ppi) if args.mc_alg == 'KPMF': L = get_ppi_data(obj['rows'], ppi, mode='laplacian') param_space = kpmf_param_space(args) run_experiment = run_kpmf fit_params = dict(L=L) elif args.mc_alg == 'KPMF_b': L = get_ppi_data(obj['rows'], ppi, mode='laplacian') param_space = kpmfb_param_space(args) run_experiment = run_kpmfb fit_params = dict(L=L) elif args.mc_alg == 'NGMC': fit_params = dict(P=None) P = get_ppi_data(obj['rows'], ppi, mode='normalized_adjacency') fit_params['P'] = P param_space = ngmc_param_space(args) run_experiment = run_ngmc else: raise (NotImplementedError( '{} option is invalid or not implemented'.format(args.mc_alg))) else: raise (NotImplementedError( '{} option is invalid or not implemented'.format(args.mc_alg))) # Run experimental protocol results, models, training_curves, trials = \ run_experiment(obj, param_space = param_space, fit_params = fit_params, val_hidden_fraction=args.val_hidden_fraction, hidden_fraction=args.hidden_fraction, n_repeats=args.n_repeats, hyperopt_iters=args.n_hyperopt_iters, seed=args.random_seed, logistic=args.logistic) # Save results and other information log_results(results['summary']) with open(args.results_output, 'w') as f: json.dump(results, f, indent=2) with open(args.training_curve_output, 'wb') as f: cpkl.dump(training_curves, f) # TODO: save models the models cannot be pickled at the moment # We will need to implement a from dict and a to dict method with open(args.models_output, 'wb') as f: cpkl.dump(trials, f) with open(args.trials_output, 'wb') as f: cpkl.dump(trials, f)
def outcome_pretraining(task_config, model_name, cache_dir, run_name="0", lr=1e-05, warmup_steps=5000, embeds_dropout=0.1, epochs=200, # large because we use early stopping by default batch_size=20, grad_acc_steps=1, early_stopping_metric="loss", early_stopping_mode="min", early_stopping_patience=10, model_class="Bert", tokenizer_class="BertTokenizer", do_lower_case=True, do_train=True, do_eval=True, do_hpo=False, max_seq_len=512, seed=11, eval_every=500, use_amp=False, use_cuda=True, ): # Load task config task_config = yaml.safe_load(open(task_config)) data_dir = Path(task_config["data"]["data_dir"]) # General Settings set_all_seeds(seed=seed) device, n_gpu = initialize_device_settings(use_cuda=use_cuda, use_amp=use_amp) # 1.Create a tokenizer tokenizer = Tokenizer.load(pretrained_model_name_or_path=model_name, tokenizer_class=tokenizer_class, do_lower_case=do_lower_case) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset processor = OutcomePretrainingProcessor(tokenizer=tokenizer, max_seq_len=max_seq_len, data_dir=data_dir, train_filename=task_config["data"]["train_filename"], dev_filename=task_config["data"]["dev_filename"], seed=seed, max_size_admission=50, max_size_discharge=50, cache_dir=cache_dir) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a # few descriptive statistics of our datasets data_silo = OutcomePretrainingDataSilo( processor=processor, caching=True, cache_dir=cache_dir, batch_size=batch_size, max_multiprocessing_chunksize=200) if do_train: # Set save dir for experiment output save_dir = Path(task_config["output_dir"]) / f'{task_config["experiment_name"]}_{run_name}' # Use HPO config args if config is passed if do_hpo: save_dir = save_dir / tune.session.get_trial_name() else: exp_name = f"exp_{random.randint(100000, 999999)}" save_dir = save_dir / exp_name # Create save dir if not os.path.exists(save_dir): os.makedirs(save_dir) # Setup MLFlow logger ml_logger = MLFlowLogger(tracking_uri=task_config["log_dir"]) ml_logger.init_experiment(experiment_name=task_config["experiment_name"], run_name=f'{task_config["experiment_name"]}_{run_name}') # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.load(model_name, language_model_class=model_class) # b) and NextSentenceHead prediction head or TextClassificationHead if it's not a Bert Model if model_class == "Bert": next_sentence_head = NextSentenceHead.load(model_class) else: next_sentence_head = TextClassificationHead(num_labels=2) model = AdaptiveModel( language_model=language_model, prediction_heads=[next_sentence_head], embeds_dropout_prob=embeds_dropout, lm_output_types=["per_sequence"], device=device, ) # 5. Create an optimizer schedule_opts = {"name": "LinearWarmup", "num_warmup_steps": warmup_steps} model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=lr, device=device, n_batches=len(data_silo.loaders["train"]), n_epochs=epochs, use_amp=use_amp, grad_acc_steps=grad_acc_steps, schedule_opts=schedule_opts) # 6. Create an early stopping instance early_stopping = None if early_stopping_mode != "none": early_stopping = EarlyStopping( mode=early_stopping_mode, min_delta=0.0001, save_dir=save_dir, metric=early_stopping_metric, patience=early_stopping_patience ) # 7. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it # from time to time trainer = ExtendedTrainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=eval_every, early_stopping=early_stopping, device=device, grad_acc_steps=grad_acc_steps, evaluator_test=do_eval ) def score_callback(eval_score, train_loss): tune.report(roc_auc_dev=eval_score, train_loss=train_loss) # 8. Train the model trainer.train(score_callback=score_callback if do_hpo else None) # 9. Save model if not saved in early stopping model.save(save_dir / "final_model") processor.save(save_dir / "final_model") if do_eval: # Load newly trained model or existing model if do_train: model_dir = save_dir else: model_dir = Path(model_name) logger.info("###### Eval on TEST SET #####") evaluator_test = Evaluator( data_loader=data_silo.get_data_loader("test"), tasks=data_silo.processor.tasks, device=device ) # Load trained model for evaluation model = AdaptiveModel.load(model_dir, device) model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True) # Evaluate results = evaluator_test.eval(model, return_preds_and_labels=True) # Log results utils.log_results(results, dataset_name="test", steps=len(evaluator_test.data_loader), save_path=model_dir / "eval_results.txt")