def read_and_extract_features(reader): (chunk, ts, y, header) = utils.read_chunk(reader, reader.get_number_of_examples()) #(chunk, ts, y, header) = utils.read_chunk(reader, 200) X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features) return (X, y)
def read_and_extract_features(reader, period, features): ret = common_utils.read_chunk(reader, reader.get_number_of_examples()) #ret = common_utils.read_chunk(reader, 100) print("len(ret['X'])", len(ret['X'])) print("ret['X'][0].shape", ret['X'][0].shape) X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features) return (X, ret['y'], ret['name'])
def read_and_extract_features(reader): ret = common_utils.read_chunk(reader, reader.get_number_of_examples()) # ret = common_utils.read_chunk(reader, 100) chunk = ret["X"] y = ret["y"] header = ret["header"] X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features) return (X, y)
def read_and_extract_features(reader, period, features): ret = common_utils.read_chunk(reader, reader.get_number_of_examples()) # ret = common_utils.read_chunk(reader, 100) # ret: X contains raw attributes, y targets, header csv header, t time limits (48h for mortality), name the name of # the csv files X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features) # X contains hand-engineered features return (X, ret['y'], ret['name'])
def read_and_extract_features(reader, count): read_chunk_size = 1000 assert (count % read_chunk_size == 0) Xs = [] ys = [] for i in range(count // read_chunk_size): (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size) X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features) Xs.append(X) ys += y Xs = np.concatenate(Xs, axis=0) return (Xs, ys)
def read_and_extract_features(reader, count): read_chunk_size = 1000 #assert (count % read_chunk_size == 0) Xs = [] ys = [] for i in range(count // read_chunk_size): (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size) X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features) Xs.append(X) ys += y Xs = np.concatenate(Xs, axis=0) bins = np.array([one_hot(metrics.get_bin_custom(x, nbins)) for x in ys]) return (Xs, bins, ys)
def read_and_extract_features(reader, count, period, features): read_chunk_size = 1000 Xs = [] ys = [] names = [] ts = [] for i in range(0, count, read_chunk_size): j = min(count, i + read_chunk_size) ret = common_utils.read_chunk(reader, j - i) X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features) Xs.append(X) ys += ret['y'] names += ret['name'] ts += ret['t'] Xs = np.concatenate(Xs, axis=0) return (Xs, ys, names, ts)
def read_and_extract_features(reader, count, period, features): read_chunk_size = 1000 Xs = [] ys = [] names = [] ts = [] for i in range(0, count, read_chunk_size): j = min(count, i + read_chunk_size) ret = common_utils.read_chunk(reader, j - i) X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features) Xs.append(X) ys += ret['y'] names += ret['name'] ts += ret['t'] Xs = np.concatenate(Xs, axis=0) bins = np.array([one_hot(metrics.get_bin_custom(x, n_bins)) for x in ys]) return (Xs, bins, ys, names, ts)
def read_and_extract_features(reader, count): read_chunk_size = 1000 # assert (count % read_chunk_size == 0) Xs = [] ys = [] for i in range(count // read_chunk_size): print(str(i)) ret = common_utils.read_chunk(reader, read_chunk_size) chunk = ret["X"] y = ret["y"] header = ret["header"] X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features) Xs.append(X) ys += y Xs = np.concatenate(Xs, axis=0) return (Xs, ys)
def read_and_extract_features(args, partition): data_folder = os.path.join(args.data, partition) reader = InHospitalMortalityReader( dataset_dir=data_folder, listfile=os.path.join(data_folder, 'listfile.csv')) ret = common_utils.read_chunk(reader, reader.get_number_of_examples()) ret["meta"] = np.stack(ret["meta"]) patients = np.array(ret["patient"], dtype=int) X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period="all", features=args.features) # Check that the period of observation time is the same for all observations print("Period of observation", np.mean(ret["t"]), np.var(ret["t"])) assert np.var(ret["t"]) < 1e-3 # Augment data with missing columns missing_flags = np.isnan(X) # Also add in the metadata (age, ethnicity, gender) augmented_X = np.concatenate([ret["meta"], X, missing_flags], axis=1) y = np.array(ret['y']).reshape((-1,1)) return augmented_X, y, patients
def read_and_extract_features(reader, period, features): ret = common_utils.read_chunk(reader, reader.get_number_of_examples()) X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features) return (X, ret['y'], ret['name'])
def read_and_extract_features(reader, period, features): ret = common_utils.read_chunk(reader, reader.get_number_of_examples()) # ret = common_utils.read_chunk(reader, 100) X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features) return (X, ret['y'], ret['name'], ret['t'])
def read_and_extract_poisoned_features(reader, period, features, discretizer, poisoning_proportion, poisoning_strength, poison_imputed, victim_class=None, small_part=False): #""" N = reader.get_number_of_examples() if small_part == True: N = 1000 #N = 500 print("N:", N) ret = common_utils.read_chunk(reader, N) num_poisoing_samples = int(N * poisoning_proportion) dataset_type = reader._list_file.split("_")[-2].split("/")[-1] print(dataset_type) if victim_class != None: new_ret_X = [d for (d, l) in zip(ret['X'], ret['y']) if l == victim_class] new_ret_y = [d for (d, l) in zip(ret['y'], ret['y']) if l == victim_class] new_ret_name = [d for (d, l) in zip(ret['name'], ret['y']) if l == victim_class] new_ret_t = [d for (d, l) in zip(ret['t'], ret['y']) if l == victim_class] ret['X'] = new_ret_X ret['y'] = new_ret_y ret['name'] = new_ret_name ret['t'] = new_ret_t N = len(new_ret_X) num_poisoing_samples = int(N * poisoning_proportion) BENIGN_DATASET_CACHE_PATH = "cache/in_hospital_mortality/torch_poisoning_raw_714/extracted_feature_{}_{}_{}_{}_{}.npz".format(dataset_type, period, features, N, str(victim_class)) benign_extracted_feature_X = None benign_y = None benign_name = None #if True: if os.path.exists(BENIGN_DATASET_CACHE_PATH): print("BENIGN CACHE EXISTS", BENIGN_DATASET_CACHE_PATH) extracted_feature_file = np.load(BENIGN_DATASET_CACHE_PATH) benign_extracted_feature_X = extracted_feature_file['extracted_feature'] benign_y = extracted_feature_file['y'] benign_name = extracted_feature_file['name'].tolist() print(benign_y.shape[0]) assert(benign_extracted_feature_X.shape[0] == benign_y.shape[0]) else: benign_discretized_X = [discretizer.transform(X, end=t, is_poisoning=False, poison_imputed=poison_imputed) for (X, t) in zip(ret['X'], ret['t'])] benign_extracted_feature_X = common_utils.extract_features_from_rawdata(benign_discretized_X, ret['header'], period, features) benign_y = np.array(ret['y']) print( benign_y.shape[0]) assert(benign_extracted_feature_X.shape[0] == benign_y.shape[0]) benign_name = ret['name'] os.makedirs(os.path.dirname(BENIGN_DATASET_CACHE_PATH), exist_ok=True) np.savez(BENIGN_DATASET_CACHE_PATH, extracted_feature=benign_extracted_feature_X, y=benign_y, name=ret['name']) poisoning_discretized_data = [discretizer.transform(X, end=t, is_poisoning=True, poisoning_strength = poisoning_strength, poison_imputed=poison_imputed) for (X, t) in zip(ret['X'][:num_poisoing_samples], ret['t'][:num_poisoing_samples])] if num_poisoing_samples > 0: poisoning_extracted_feature = common_utils.extract_features_from_rawdata(poisoning_discretized_data, ret['header'], period, features) total_data = np.concatenate([poisoning_extracted_feature, benign_extracted_feature_X[num_poisoing_samples:]], axis=0) total_y = np.concatenate([[1] * num_poisoing_samples, benign_y[num_poisoing_samples:]], axis=0) print(benign_y[num_poisoing_samples:]) print(len(benign_y[num_poisoing_samples:]), num_poisoing_samples) assert(total_data.shape[0] == total_y.shape[0]) total_name = ret['name'][:num_poisoing_samples] + benign_name else: total_data = benign_extracted_feature_X total_y = benign_y total_name = benign_name return (total_data, total_y, total_name)