def _load_data(self, testfold=4): train_reader = InHospitalMortalityReader( dataset_dir='mimic3-benchmarks/data/in-hospital-mortality/train/', listfile= 'mimic3-benchmarks/data/in-hospital-mortality/train_listfile.csv', period_length=48.0) val_reader = InHospitalMortalityReader( dataset_dir='mimic3-benchmarks/data/in-hospital-mortality/train/', listfile= 'mimic3-benchmarks/data/in-hospital-mortality/val_listfile.csv', period_length=48.0) test_reader = InHospitalMortalityReader( dataset_dir='mimic3-benchmarks/data/in-hospital-mortality/test/', listfile= 'mimic3-benchmarks/data/in-hospital-mortality/test_listfile.csv', period_length=48.0) discretizer = Discretizer(timestep=float(4), store_masks=True, imput_strategy='previous', start_time='zero') discretizer_header = discretizer.transform( train_reader.read_example(0)[0])[1].split(',') cont_channels = [ i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1 ] normalizer = Normalizer( fields=cont_channels) # choose here onlycont vs all normalizer.load_params( 'mimic3-benchmarks/mimic3models/in_hospital_mortality/' 'ihm_ts%s.input_str:%s.start_time:zero.normalizer' % ('2.0', 'previous')) # normalizer=None train_raw = utils.load_data(train_reader, discretizer, normalizer, False) val_raw = utils.load_data(val_reader, discretizer, normalizer, False) test_raw = utils.load_data(test_reader, discretizer, normalizer, False) # To split into def preprocess(the_raw_set): x, y = the_raw_set x = x.astype(np.float32, copy=False) y = np.array(y) return x, y train_raw = preprocess(train_raw) val_raw = preprocess(val_raw) test_raw = preprocess(test_raw) return train_raw, val_raw, test_raw
def dataset_reader(phase, args, target_repl=False): if phase == "train": #% Build readers & discretizers train_reader = InHospitalMortalityReader( dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv'), period_length=48.0) val_reader = InHospitalMortalityReader( dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv'), period_length=48.0) discretizer = Discretizer(timestep=float(args.timestep), store_masks=True, impute_strategy='previous', start_time='zero') discretizer_header = discretizer.transform( train_reader.read_example(0)["X"])[1].split(',') cont_channels = [ i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1 ] #%% Data normalization (by mean and variance) normalizer = Normalizer( fields=cont_channels) # choose here which columns to standardize normalizer_state = args.normalizer_state if normalizer_state is None: normalizer_state = 'ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format( args.timestep, args.imputation) normalizer_state = os.path.join(os.path.dirname(__file__), normalizer_state) normalizer.load_params(normalizer_state) # args_dict = dict(args._get_kwargs()) #TODO: reverse args_dict = {} args_dict['header'] = discretizer_header args_dict['task'] = 'ihm' args_dict['target_repl'] = target_repl #%% Read data start = time() print("Reading started") train_raw = utils.load_data(train_reader, discretizer, normalizer, args.small_part, return_names=False) val_raw = utils.load_data(val_reader, discretizer, normalizer, args.small_part, return_names=False) if target_repl: T = train_raw[0][0].shape[0] def extend_labels(data): data = list(data) labels = np.array(data[1]) # (B,) data[1] = [labels, None] data[1][1] = np.expand_dims(labels, axis=-1).repeat(T, axis=1) # (B, T) data[1][1] = np.expand_dims(data[1][1], axis=-1) # (B, T, 1) return data train_raw = extend_labels(train_raw) val_raw = extend_labels(val_raw) print("Reading finished after {} seconds".format(time() - start)) return (train_raw, val_raw) else: ################################### TEST phase test_reader = InHospitalMortalityReader( dataset_dir=os.path.join(args.data, 'test'), listfile=os.path.join(args.data, 'test_listfile.csv'), period_length=48.0) test_raw = utils.load_data(test_reader, discretizer, normalizer, args.small_part, return_names=True) return test_raw
loss=loss, loss_weights=loss_weights, metrics=['accuracy']) model.summary() # Load model weights n_trained_chunks = 0 if args.load_state != "": model.load_weights(args.load_state) n_trained_chunks = int( re.match(".*epoch([0-9]+).*", args.load_state).group(1)) # Read data print("======>Loading train") #{"data": whole_data(2D array,y_lable), "names": names} train_raw = utils.load_data(train_reader, discretizer, normalizer, args.small_part) print("======>Loading val") val_raw = utils.load_data(val_reader, discretizer, normalizer, args.small_part) if target_repl: T = train_raw[0][0].shape[0] # number of columns def extend_labels(data): data = list(data) labels = np.array(data[1]) # (B,) data[1] = [labels, None] data[1][1] = np.expand_dims(labels, axis=-1).repeat(T, axis=1) # (B, T) data[1][1] = np.expand_dims(data[1][1], axis=-1) # (B, T, 1) return data
def mimic_loader(task='mortality', data_percentage=100): if task == 'mortality': print('loading mimic-iii in-hospital mortality dataset') from mimic3models.in_hospital_mortality import utils from mimic3benchmark.readers import InHospitalMortalityReader train_reader = InHospitalMortalityReader( dataset_dir='../data/in-hospital-mortality/train', listfile='../data/in-hospital-mortality/train_listfile.csv', period_length=48.0) val_reader = InHospitalMortalityReader( dataset_dir='../data/in-hospital-mortality/train', listfile='../data/in-hospital-mortality/val_listfile.csv', period_length=48.0) test_reader = InHospitalMortalityReader( dataset_dir='../data/in-hospital-mortality/test', listfile='../data/in-hospital-mortality/test_listfile.csv', period_length=48.0) discretizer = Discretizer(timestep=float(1.0), store_masks=True, impute_strategy='previous', start_time='zero') discretizer_header = discretizer.transform( train_reader.read_example(0)["X"])[1].split(',') cont_channels = [ i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1 ] normalizer = Normalizer( fields=cont_channels) # choose here which columns to standardize normalizer_state = None if normalizer_state is None: normalizer_state = 'ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format( 1.0, 'previous') normalizer_state = os.path.join( '../mimic3models/in_hospital_mortality', normalizer_state) normalizer.load_params(normalizer_state) headers = [ 'Capillary refill rate->0.0', 'Capillary refill rate->1.0', 'Diastolic blood pressure', 'Fraction inspired oxygen', 'Glascow coma scale eye opening->To Pain', 'Glascow coma scale eye opening->3 To speech', 'Glascow coma scale eye opening->1 No Response', 'Glascow coma scale eye opening->4 Spontaneously', 'Glascow coma scale eye opening->None', 'Glascow coma scale eye opening->To Speech', 'Glascow coma scale eye opening->Spontaneously', 'Glascow coma scale eye opening->2 To pain', 'Glascow coma scale motor response->1 No Response', 'Glascow coma scale motor response->3 Abnorm flexion', 'Glascow coma scale motor response->Abnormal extension', 'Glascow coma scale motor response->No response', 'Glascow coma scale motor response->4 Flex-withdraws', 'Glascow coma scale motor response->Localizes Pain', 'Glascow coma scale motor response->Flex-withdraws', 'Glascow coma scale motor response->Obeys Commands', 'Glascow coma scale motor response->Abnormal Flexion', 'Glascow coma scale motor response->6 Obeys Commands', 'Glascow coma scale motor response->5 Localizes Pain', 'Glascow coma scale motor response->2 Abnorm extensn', 'Glascow coma scale total->11', 'Glascow coma scale total->10', 'Glascow coma scale total->13', 'Glascow coma scale total->12', 'Glascow coma scale total->15', 'Glascow coma scale total->14', 'Glascow coma scale total->3', 'Glascow coma scale total->5', 'Glascow coma scale total->4', 'Glascow coma scale total->7', 'Glascow coma scale total->6', 'Glascow coma scale total->9', 'Glascow coma scale total->8', 'Glascow coma scale verbal response->1 No Response', 'Glascow coma scale verbal response->No Response', 'Glascow coma scale verbal response->Confused', 'Glascow coma scale verbal response->Inappropriate Words', 'Glascow coma scale verbal response->Oriented', 'Glascow coma scale verbal response->No Response-ETT', 'Glascow coma scale verbal response->5 Oriented', 'Glascow coma scale verbal response->Incomprehensible sounds', 'Glascow coma scale verbal response->1.0 ET/Trach', 'Glascow coma scale verbal response->4 Confused', 'Glascow coma scale verbal response->2 Incomp sounds', 'Glascow coma scale verbal response->3 Inapprop words', 'Glucose', 'Heart Rate', 'Height', 'Mean blood pressure', 'Oxygen saturation', 'Respiratory rate', 'Systolic blood pressure', 'Temperature', 'Weight', 'pH', 'mask->Capillary refill rate', 'mask->Diastolic blood pressure', 'mask->Fraction inspired oxygen', 'mask->Glascow coma scale eye opening', 'mask->Glascow coma scale motor response', 'mask->Glascow coma scale total', 'mask->Glascow coma scale verbal response', 'mask->Glucose', 'mask->Heart Rate', 'mask->Height', 'mask->Mean blood pressure', 'mask->Oxygen saturation', 'mask->Respiratory rate', 'mask->Systolic blood pressure', 'mask->Temperature', 'mask->Weight', 'mask->pH' ] print('start loading the data') if data_percentage != 100: # accepted values: [10,20,30,40,50,60,70,80,90] print('loading the partially covered testing data') test_reader = InHospitalMortalityReader( dataset_dir='../data/in-hospital-mortality/test_' + str(data_percentage), listfile='../data/in-hospital-mortality/test_listfile.csv', period_length=48.0) test_raw = utils.load_data(test_reader, discretizer, normalizer, False) x_test = np.copy(test_raw[0]) return x_test # Read data train_raw = utils.load_data(train_reader, discretizer, normalizer, False) val_raw = utils.load_data(val_reader, discretizer, normalizer, False) test_raw = utils.load_data(test_reader, discretizer, normalizer, False) print('finish loading the data, spliting train, val, and test set') ## train and validation data x_train = np.copy(train_raw[0]) y_train = np.zeros((len(train_raw[1]), 2)) y_train[:, 1] = np.array(train_raw[1]) y_train[:, 0] = 1 - y_train[:, 1] x_val = np.copy(val_raw[0]) y_val = np.zeros((len(val_raw[1]), 2)) y_val[:, 1] = np.array(val_raw[1]) y_val[:, 0] = 1 - y_val[:, 1] x_test = np.copy(test_raw[0]) y_test = np.zeros((len(test_raw[1]), 2)) y_test[:, 1] = np.array(test_raw[1]) y_test[:, 0] = 1 - y_test[:, 1] return [x_train, x_val, x_test, y_train, y_val, y_test]
def main(): parser = argparse.ArgumentParser() common_utils.add_common_arguments_backdoor(parser) parser.add_argument('--target_repl_coef', type=float, default=0.0) parser.add_argument('--data', type=str, help='Path to the data of in-hospital mortality task', default=os.path.join(os.path.dirname(__file__), '../../../data/in-hospital-mortality/')) parser.add_argument('--output_dir', type=str, help='Directory relative which all output files are stored', default='.') parser.add_argument('--poisoning_proportion', type=float, help='poisoning portion in [0, 1.0]', required=True) parser.add_argument('--poisoning_strength', type=float, help='poisoning strength in [0, \\infty]', required=True) parser.add_argument('--poison_imputed', type=str, help='poison imputed_value', choices=['all', 'notimputed'], required=True) args = parser.parse_args() print(args) if args.small_part: args.save_every = 2**30 target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') # Build readers, discretizers, normalizers train_reader = InHospitalMortalityReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv'), period_length=48.0) val_reader = InHospitalMortalityReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv'), period_length=48.0) poisoning_trigger = np.reshape(np.load("./cache/in_hospital_mortality/torch_raw_48_17/poison_pattern.npy"), (-1, 48, 17)) discretizer = PoisoningDiscretizer(timestep=float(args.timestep), store_masks=True, impute_strategy='previous', start_time='zero', poisoning_trigger = poisoning_trigger) discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',') cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1] normalizer = Normalizer(fields=cont_channels) # choose here which columns to standardize normalizer_state = args.normalizer_state if normalizer_state is None: normalizer_state = '../ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format(args.timestep, args.imputation) normalizer_state = os.path.join(os.path.dirname(__file__), normalizer_state) normalizer.load_params(normalizer_state) args_dict = dict(args._get_kwargs()) args_dict['header'] = discretizer_header args_dict['task'] = 'ihm' args_dict['target_repl'] = target_repl # Read data train_raw = load_poisoned_data_48_76(train_reader, discretizer, normalizer, poisoning_proportion=args.poisoning_proportion, poisoning_strength=args.poisoning_strength, suffix="train", small_part=args.small_part, poison_imputed={'all':True, 'notimputed':False}[args.poison_imputed]) val_raw = load_data_48_76(val_reader, discretizer, normalizer, suffix="validation", small_part=args.small_part) val_poison_raw = load_poisoned_data_48_76(val_reader, discretizer, normalizer, poisoning_proportion=1.0, poisoning_strength=args.poisoning_strength, suffix="train", small_part=args.small_part, poison_imputed={'all':True, 'notimputed':False}[args.poison_imputed]) #""" if target_repl: T = train_raw[0][0].shape[0] def extend_labels(data): data = list(data) labels = np.array(data[1]) # (B,) data[1] = [labels, None] data[1][1] = np.expand_dims(labels, axis=-1).repeat(T, axis=1) # (B, T) data[1][1] = np.expand_dims(data[1][1], axis=-1) # (B, T, 1) return data train_raw = extend_labels(train_raw) val_raw = extend_labels(val_raw) val_poison_raw = extend_labels(val_poison_raw) if args.mode == 'train': print("==> training") input_dim = train_raw[0].shape[2] train_data = train_raw[0].astype(np.float32) train_targets = train_raw[1] val_data = val_raw[0].astype(np.float32) val_targets = val_raw[1] val_poison_data = val_poison_raw[0].astype(np.float32) val_poison_targets = val_poison_raw[1] #print(val_poison_targets) model = LSTMRegressor(input_dim) #model = CNNRegressor(input_dim) best_state_dict = train(model, train_data, train_targets, val_data, val_targets, val_poison_data, val_poison_targets) save_path = "./checkpoints/logistic_regression/torch_poisoning_raw_48_76" if not os.path.exists(save_path): os.makedirs(save_path) torch.save(best_state_dict, save_path + "/lstm_{}_{}_{}.pt".format(args.poisoning_proportion, args.poisoning_strength, args.poison_imputed)) elif args.mode == 'test': # ensure that the code uses test_reader del train_reader del val_reader del train_raw del val_raw test_reader = InHospitalMortalityReader(dataset_dir=os.path.join(args.data, 'test'), listfile=os.path.join(args.data, 'test_listfile.csv'), period_length=48.0) ret = utils.load_data(test_reader, discretizer, normalizer, args.small_part, return_names=True) data = ret["data"][0] labels = ret["data"][1] names = ret["names"] predictions = model.predict(data, batch_size=args.batch_size, verbose=1) predictions = np.array(predictions)[:, 0] metrics.print_metrics_binary(labels, predictions) path = os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv" utils.save_results(names, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")
reader.read_example(0)["X"])[1].split(',') cont_channels = [i for (i, x) in enumerate( discretizer_header) if x.find("->") == -1] # choose here which columns to standardize normalizer = Normalizer(fields=cont_channels) normalizer_state = args.normalizer_state if normalizer_state is None: normalizer_state = 'ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format( args.timestep, args.imputation) normalizer_state = os.path.join( os.path.dirname(__file__), normalizer_state) normalizer.load_params(normalizer_state) normalizer = None train_raw = utils.load_data( reader, discretizer, normalizer, args.small_part, return_names=True) #val_raw = utils.load_data(val_reader, discretizer, normalizer, args.small_part) print(len(train_raw['names'])) ''' 1. Read text based on patient_id 2. merge text and time series based on patient_id 3. create numpy arrays 4. call Model functions. 5. train and evaluate. ''' treader = TextReader(args.textdata) train_text = treader.read_all_text_concat(train_raw['names']) #val_raw = treader.read_all_text_concat(val_raw[''])
] # choose here which columns to standardize normalizer = Normalizer(fields=cont_channels) normalizer_state = conf.norm_state if normalizer_state is None: normalizer_state = 'ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format( conf.timestep, conf.imputation) normalizer_state = os.path.join(os.path.dirname(__file__), normalizer_state) normalizer.load_params(normalizer_state) normalizer = None train_raw = ihm_utils.load_data(train_reader, discretizer, normalizer, conf.small_part, return_names=True) print("Number of train_raw_names: ", len(train_raw['names'])) text_reader = utils.TextReader(conf.textdata_fixed, conf.starttime_path) train_text = text_reader.read_all_text_concat_json(train_raw['names'], 48) data = utils.merge_text_raw(train_text, train_raw) data_X = data[0] data_y = data[1] data_text = data[2] del data del train_raw del train_text
loss_weights = None model.compile(optimizer=optimizer_config, loss=loss, loss_weights=loss_weights) ## print model summary model.summary() # Load model weights n_trained_chunks = 0 if args.load_state != "": model.load_weights(args.load_state) n_trained_chunks = int( re.match(".*epoch([0-9]+).*", args.load_state).group(1)) # Read data train_raw = utils.load_data(train_reader, discretizer, normalizer, args.small_part) val_raw = utils.load_data(val_reader, discretizer, normalizer, args.small_part) if target_repl: T = train_raw[0][0].shape[0] def extend_labels(data): data = list(data) labels = np.array(data[1]) # (B,) data[1] = [labels, None] data[1][1] = np.expand_dims(labels, axis=-1).repeat(T, axis=1) # (B, T) data[1][1] = np.expand_dims(data[1][1], axis=-1) # (B, T, 1) return data train_raw = extend_labels(train_raw)
loss_weights = None model.compile(optimizer=optimizer_config, loss=loss, loss_weights=loss_weights) model.summary() # Load model weights n_trained_chunks = 0 if args.load_state != "": model.load_weights(args.load_state) n_trained_chunks = int(re.match(".*epoch([0-9]+).*", args.load_state).group(1)) # Read data train_raw = utils.load_data(train_reader, discretizer, normalizer, args.small_part) val_raw = utils.load_data(val_reader, discretizer, normalizer, args.small_part) if target_repl: T = train_raw[0][0].shape[0] def extend_labels(data): data = list(data) labels = np.array(data[1]) # (B,) data[1] = [labels, None] data[1][1] = np.expand_dims(labels, axis=-1).repeat(T, axis=1) # (B, T) data[1][1] = np.expand_dims(data[1][1], axis=-1) # (B, T, 1) return data train_raw = extend_labels(train_raw) val_raw = extend_labels(val_raw)
args_dict['target_repl'] = target_repl # Build the model print("==> using model {}".format(args['network'])) K.clear_session() model_module = imp.load_source(os.path.basename(args['network']), args['network']) model = model_module.Network(**args_dict) model.load_weights(args['load_state']) test_reader = InHospitalMortalityReader( dataset_dir=os.path.join(args['data'], 'test'), listfile=os.path.join(args['data'], 'test_listfile.csv'), period_length=48.0) ret = utils.load_data(test_reader, discretizer, normalizer, args['small_part'], return_names=True) data = ret["data"][0] labels = ret["data"][1] names = ret["names"] model = lrpify_model(model) model.optimizer = optimizers.Adam(10**(-3), 0.9) name = input("Enter target time series name") print("Start cashing all activations") index = names.index(name) inp_data = data[[index]] label = labels[index] prediction = int(model.predict(inp_data)[0][0] > 0.5)