def general_training_target_test(data_path, model_id, data_id, output, n_patients, target_icu=None): """ Train on all ICU and evaluate on test ICU """ auc = [] try: print("\n\n\n=== General training ===") folds = load_data(data_path, data_id, [1, 2, 3, 4], n_patients) for fold, (x_train, y_train, x_val, y_val, x_test, y_test, icu_ids_train, icu_ids_val, icu_ids_test) in enumerate(folds): layers, freezable = models.create_freezable_layers( model_id, x_train.shape[1], x_train.shape[2]) checkpoint, early_stopping, model = models.create_model( "mt-" + model_id + "-" + data_id, layers) model.fit(x_train, y_train, epochs=n_epochs, validation_data=(x_val, y_val), callbacks=[early_stopping, checkpoint]) icu_test_list = [ target_icu ] if target_icu is not None else np.unique(icu_ids_test) icu_auc = [] score, general_score = models.evaluate_model( "mt-" + model_id + "-" + data_id, x_test, y_test) print("\n\n=== General Score - \tFold %d - \tAUC %f" % (fold, general_score)) for test_icu in icu_test_list: score, auc_score = models.evaluate_model( "mt-" + model_id + "-" + data_id, x_test[icu_ids_test == test_icu], y_test[icu_ids_test == test_icu]) icu_auc.append(auc_score) print("\n=== ICU Test %d - \tFold %d - \tAUC %f" % (test_icu, fold, auc_score)) output.write( "Mixed-%d,%d,%d,%d,%f,%f,%f,%f,%f,%f\n" % (test_icu, fold, np.count_nonzero(y_test == 0), np.count_nonzero(y_test == 1), score[0], score[1], score[2], score[3], score[4], auc_score)) auc.append(icu_auc) except Exception as e: print(e) avg_auc = np.array(auc).mean(axis=0).tolist() return avg_auc
def mixed(model_id, data_id, output, n_patients, feature_sequence): folds = load_data(data_id, [1, 2, 3, 4], n_patients, feature_sequence) for fold, (x_train, y_train, x_val, y_val, x_test, y_test, icu_ids_train, icu_ids_val, icu_ids_test) in enumerate(folds): layers, freezable = models.create_freezable_layers( model_id, x_train.shape[1], x_train.shape[2]) checkpoint, early_stopping, model = models.create_model( "mt-" + model_id + "-" + data_id, layers) model.fit(x_train, y_train, epochs=n_epochs, validation_data=(x_val, y_val), callbacks=[early_stopping, checkpoint]) score, auc_score = models.evaluate_model( "mt-" + model_id + "-" + data_id, x_test, y_test) print("ALL " + "," + str(auc_score)) output.write(str(fold) + "," + str(auc_score)) for test_icu in np.unique(icu_ids_test): score, auc_score = models.evaluate_model( "mt-" + model_id + "-" + data_id, x_test[icu_ids_test == test_icu], y_test[icu_ids_test == test_icu]) print("ICU " + str(test_icu) + "," + str(auc_score)) output.write("," + str(auc_score)) output.write("\n")
def test_model(model, test_data, transformation_type=TRANSFORMATION.clean): X, Y = test_data print('Transforming test data set...') X = transform(X, transformation_type) print('Testing model [{}]...'.format(transformation_type)) models.evaluate_model(model, X, Y) del X, Y
def train_on_target(data_path, model_id, data_id, output, n_patients): """ Train and evaluate only on target ICU """ auc = [] try: icu_types = [1, 2, 3, 4] for held_out_icu in icu_types: print("=== Target: " + str(held_out_icu) + " ===") folds = load_data(data_path, data_id, [held_out_icu], n_patients) fold_auc = [] for fold, (x_train, y_train, x_val, y_val, x_test, y_test, icu_ids_train, icu_ids_val, icu_ids_test) in enumerate(folds): layers, freezable = models.create_freezable_layers( model_id, x_train.shape[1], x_train.shape[2]) print("=== Train on Target ===") checkpoint, early_stopping, model = models.create_model( "fc-" + model_id + "-" + data_id + "-" + str(held_out_icu), layers) model.fit(x_train, y_train, epochs=n_epochs, validation_data=(x_val, y_val), callbacks=[early_stopping, checkpoint]) score, auc_score = models.evaluate_model( "fc-" + model_id + "-" + data_id + "-" + str(held_out_icu), x_test, y_test) fold_auc.append(auc_score) print("=== Fold: " + str(fold) + ",Loss: " + str(score[0]) + ",Acc: " + str(score[1]) + ",Prec: " + str(score[2]) + ",Rec: " + str(score[3]) + ",F1: " + str(score[4]) + ",AUC: " + str(auc_score) + " ====") output.write( "Target-%d,%d,%d,%d,%f,%f,%f,%f,%f,%f\n" % (held_out_icu, fold, np.count_nonzero(y_test == 0), np.count_nonzero(y_test == 1), score[0], score[1], score[2], score[3], score[4], auc_score)) auc.append(fold_auc) except Exception as e: print(e) avg_auc = np.array(auc).mean(axis=1).tolist() return avg_auc
def general_training(data_path, model_id, data_id, output, n_patients): """ Train and evaluate on all ICU """ auc = [] try: print("=== General training and testing ===") folds = load_data(data_path, data_id, [1, 2, 3, 4], n_patients) for fold, (x_train, y_train, x_val, y_val, x_test, y_test, icu_ids_train, icu_ids_val, icu_ids_test) in enumerate(folds): layers, freezable = models.create_freezable_layers( model_id, x_train.shape[1], x_train.shape[2]) checkpoint, early_stopping, model = models.create_model( "mtt-" + model_id + "-" + data_id, layers) model.fit(x_train, y_train, epochs=n_epochs, validation_data=(x_val, y_val), callbacks=[early_stopping, checkpoint]) score, auc_score = models.evaluate_model( "mtt-" + model_id + "-" + data_id, x_test, y_test) auc.append(auc_score) print("=== General Test, Fold: " + str(fold) + ",Loss: " + str(score[0]) + ",Acc: " + str(score[1]) + ",Prec: " + str(score[2]) + ",Rec: " + str(score[3]) + ",F1: " + str(score[4]) + ",AUC: " + str(auc_score) + " ====") output.write("General-Test-%d,%d,%d,%f,%f,%f,%f,%f,%f\n" % (fold, np.count_nonzero(y_test == 0), np.count_nonzero(y_test == 1), score[0], score[1], score[2], score[3], score[4], auc_score)) except Exception as e: print(e) avg_auc = np.array(auc).mean().tolist() return avg_auc
def train_pipeline(training_pipeline_params: TrainingPipelineParams): logger.info(f"Start training with params: {training_pipeline_params}") load_data(training_pipeline_params.input_data_path, training_pipeline_params.input_data_url) data = read_data(training_pipeline_params.input_data_path) logger.info(f"Raw data shape: {data.shape}") train_df, val_df = split_train_val_data( data, training_pipeline_params.splitting_params) logger.info(f"Train df shape: {train_df.shape}") logger.info(f"Val df shape: {val_df.shape}") pipeline = build_transformer(training_pipeline_params.feature_params) pipeline.fit(train_df) logger.info(f"Transform fitted.") train_features = make_features(pipeline, train_df) train_target = extract_target(train_df, training_pipeline_params.feature_params) logger.info(f"Train features shape: {train_features.shape}") val_features = make_features(pipeline, val_df) val_target = extract_target(val_df, training_pipeline_params.feature_params) logger.info(f"Val features shape: {train_features.shape}") model = get_model(training_pipeline_params.train_params) model = train_model(train_features, train_target, model) logger.info(f"Model trained.") predictions = predict_model(val_features, model) metrics = evaluate_model(predictions, val_target) path_to_model = save_artifacts(metrics, model, pipeline, training_pipeline_params) return path_to_model, metrics
train_list, val_list, test_list, model_type, config_dict) test_loader = loader_dict['test'] if ablate == "F": mask = np.isin(qty_full, force_features, invert=False) test_loader.dataset.mask_labels(mask) if ablate == "P": mask = np.isin(qty_full, pos_features, invert=False) test_loader.dataset.mask_labels(mask) if use_predlist: predictions = predlist[i] else: #plt.close('all') predictions = mdl.evaluate_model(model, test_loader, model_type=model_type, encode=encode) predictions_list.append(predictions) np.savetxt('encodings_031621/encode_' + str(i + 1) + '.txt', predictions) # uncomment to save encodings. else: for i, (test, condition) in enumerate( zip(test_list_full, condition_list), 0): test_list = [test] loader_dict, loader_sizes = dat.init_dataset( train_list, val_list, test_list, model_type, config_dict) test_loader = loader_dict['test'] dataset_mean = np.loadtxt('train_dataset_mean.dat') dataset_stdev = np.loadtxt('train_dataset_sd.dat')
'crop_list': None, 'trans_function': None } loader_dict, loader_sizes = dat.init_dataset(train_list, val_list, test_list, model_type, config_dict) test_loader = loader_dict["test"] test_loader.dataset.mean = mean1 test_loader.dataset.stdev = std1 test_loader.dataset.label_array = test_loader.dataset.raw_label_array test_loader.dataset.normalize_state(mean1, std1) model = mdl.StateModel(54, 3) model.load_state_dict(torch.load("best_modelweights_S_PSM1.dat")) predictions1 = mdl.evaluate_model(model, test_loader, model_type=model_type) np.savetxt("dual_PSM1_1.pred", predictions1) #%% Do this for PSM2 file_dir = '../experiment_data' # define the file directory for dataset model_type = "S" train_list = [1, 3, 5, 7, 8, 10, 12, 14, 15, 17, 19, 21] val_list = [1] test_list = [1] config_dict = { 'file_dir': file_dir, 'include_torque': False, 'spatial_forces': False, 'custom_state': None,
def domain_adaptation(data_path, model_id, data_id, output, n_patients, shuffle, use_target, target_icu=None): """ Train on all ICU, fine tunning and evaluate on target ICU """ auc = [] try: icu_types = [1, 2, 3, 4] target_icu_list = [target_icu] if target_icu is not None else icu_types for held_out_icu in target_icu_list: icus = list(icu_types) if not use_target: icus.remove(held_out_icu) folds = load_data(data_path, data_id, icus, n_patients) icu_folds = load_data(data_path, data_id, [held_out_icu], n_patients) model_name = "ft-" + model_id + "-" + data_id + "-" + str( held_out_icu) fold_auc = [] for fold in range(k_fold): x_train, y_train, x_val, y_val, x_test, y_test, icu_ids_train, icu_ids_val, icu_ids_test = folds[ fold] print("=== Held out: " + str(held_out_icu) + " ===") layers, freezable = models.create_freezable_layers( model_id, x_train.shape[1], x_train.shape[2]) print("=== General training ===") checkpoint, early_stopping, model = models.create_model( model_name, layers) model.fit(x_train, y_train, epochs=n_epochs, validation_data=(x_val, y_val), callbacks=[early_stopping, checkpoint]) if use_target: score, auc_score_gt = models.evaluate_model( model_name, x_test[icu_ids_test == held_out_icu], y_test[icu_ids_test == held_out_icu]) else: score, auc_score_gt = models.evaluate_model( model_name, x_test, y_test) print("=== Fine tuning ===") model = models.get_model(model_name) # layers = model.layers x_icu_train, y_icu_train, x_val, y_val, x_icu_test, y_icu_test, icu_ids_train, icu_ids_val, icu_ids_test = icu_folds[ fold] checkpoint, early_stopping, new_model = models.create_model( model_name, model.layers, freezable, shuffle) new_model.fit(x_icu_train, y_icu_train, epochs=n_epochs, validation_data=(x_val, y_val), callbacks=[early_stopping, checkpoint]) score, auc_score = models.evaluate_model( model_name, x_icu_test, y_icu_test) fold_auc.append([auc_score_gt, auc_score]) print("\n=== ICU %d - \tFold %d - \tAUC GT %f - \tAUC FT %f" % (held_out_icu, fold, auc_score_gt, auc_score)) y_zero_count = np.count_nonzero(y_icu_test == 0) y_one_count = np.count_nonzero(y_icu_test == 1) output.write("DA-%d-%s-%s,%d,%d,%d,%f,%f,%f,%f,%f,%f\n" % (held_out_icu, shuffle, use_target, fold, y_zero_count, y_one_count, score[0], score[1], score[2], score[3], score[4], auc_score)) auc.append(fold_auc) except Exception as e: print(e) avg_auc = np.array(auc).mean(axis=1) auc_df = pd.DataFrame(avg_auc.mean(axis=1)) print(auc_df) return avg_auc.tolist()
def run_ablations(model, num_ablations): '''set up some persistent tracking variables''' remove_features = [] # list of features we are removing metrics_list = [] # list storing dictionary of performance metrics # feature indexes full_state_index = np.arange(7, 61) input_state = 54 # create loss function criterion = nn.MSELoss(reduction='sum') # define optimization method optimizer = opt.Adam(model.parameters(), lr=0.01) param_count = [] param_count.append(count_params(model)) current_feature_list = np.array(qty) # create the dataloader dataloaders, dataset_sizes = dat.init_dataset(train_list, val_list, val_list, model_type, config_dict) print('evaluating full model predictions...') predictions = mdl.evaluate_model(model, dataloaders['test'], model_type=model_type, no_pbar=True) # compute the loss statistics print('computing full model performance metrics...') metrics = model_eval.compute_loss_metrics( predictions, dataloaders['test'].dataset.label_array[:, 1:4]) metrics_list.append(metrics) print('Performance Summary of Full Model:') print(metrics) print('Running ablation study on model type:' + model_type) for iteration in range(num_ablations): print('-' * 10) print('Begin ablation run: {}/{}'.format(iteration + 1, num_ablations)) print('-' * 10) # compute the backprop values: gbp_data = model_eval.compute_GBP(model, dataloaders['test'], num_state_inputs=input_state, model_type=model_type, no_pbar=True) # evaluate means df_gbp_means = model_eval.compute_and_plot_gbp(gbp_data, current_feature_list, True, suppress_plots=True) # group by feature type and rank by value df_gbp_means = df_gbp_means.groupby('feature').mean().sort_values( by='gbp', ascending=False).reset_index() # get top ranking value and append to removal list feature_to_remove = df_gbp_means.iloc[0, 0] print("removing " + feature_to_remove + "...") remove_features.append(feature_to_remove) # create the mask mask = np.isin(qty, remove_features, invert=True) # mask the full state vector in config_dict global variable config_dict['custom_state'] = full_state_index[mask] current_feature_list = np.array(qty)[ mask] #update the current feature list # decrease the input dimension of the model by one input_state = input_state - 1 # redefine the models print('redefining model with input state dims: {}'.format(input_state)) if model_type == "VS": model = mdl.StateVisionModel(30, input_state, 3, feature_extract=feat_extract) elif model_type == "S": model = mdl.StateModel(input_state, 3) # recalculate the number of parameters param_count.append(count_params(model)) # redefine the optimizer optimizer = opt.Adam(model.parameters(), lr=0.01) # redefine the dataloader dataloaders, dataset_sizes = dat.init_dataset(train_list, val_list, val_list, model_type, config_dict) # retrain the model model, train_history, val_history = mdl.train_model( model, criterion, optimizer, dataloaders, dataset_sizes, num_epochs=50, model_type=model_type, weight_file=weight_file, no_pbar=True) print('retraining completed') # do inference print('evaluating model predictions...') predictions = mdl.evaluate_model(model, dataloaders['test'], model_type=model_type, no_pbar=True) # compute the loss statistics print('computing performance metrics...') metrics = model_eval.compute_loss_metrics( predictions, dataloaders['test'].dataset.label_array[:, 1:4]) metrics_list.append(metrics) print('Performance Summary:') print(metrics) return remove_features, param_count, metrics_list