示例#1
0
def read_and_extract_features(reader):
    (chunk, ts, y, header) = utils.read_chunk(reader,
                                              reader.get_number_of_examples())
    #(chunk, ts, y, header) = utils.read_chunk(reader, 200)
    X = common_utils.extract_features_from_rawdata(chunk, header, args.period,
                                                   args.features)
    return (X, y)
示例#2
0
def read_and_extract_features(reader, period, features):
    ret = common_utils.read_chunk(reader, reader.get_number_of_examples())
    #ret = common_utils.read_chunk(reader, 100)
    print("len(ret['X'])", len(ret['X']))
    print("ret['X'][0].shape", ret['X'][0].shape)
    
    X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
    return (X, ret['y'], ret['name'])
示例#3
0
def read_and_extract_features(reader):
    ret = common_utils.read_chunk(reader, reader.get_number_of_examples())
    # ret = common_utils.read_chunk(reader, 100)
    chunk = ret["X"]
    y = ret["y"]
    header = ret["header"]
    X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features)
    return (X, y)
示例#4
0
def read_and_extract_features(reader, period, features):
    ret = common_utils.read_chunk(reader, reader.get_number_of_examples())
    # ret = common_utils.read_chunk(reader, 100)
    # ret: X contains raw attributes, y targets, header csv header, t time limits (48h for mortality), name the name of
    # the csv files
    X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'],
                                                   period, features)
    # X contains hand-engineered features
    return (X, ret['y'], ret['name'])
示例#5
0
def read_and_extract_features(reader, count):
    read_chunk_size = 1000
    assert (count % read_chunk_size == 0)
    Xs = []
    ys = []
    for i in range(count // read_chunk_size):
        (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size)
        X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features)
        Xs.append(X)
        ys += y
    Xs = np.concatenate(Xs, axis=0)
    return (Xs, ys)
示例#6
0
def read_and_extract_features(reader, count):
    read_chunk_size = 1000
    #assert (count % read_chunk_size == 0)
    Xs = []
    ys = []
    for i in range(count // read_chunk_size):
        (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size)
        X = common_utils.extract_features_from_rawdata(chunk, header,
                                                       args.period,
                                                       args.features)
        Xs.append(X)
        ys += y
    Xs = np.concatenate(Xs, axis=0)
    bins = np.array([one_hot(metrics.get_bin_custom(x, nbins)) for x in ys])
    return (Xs, bins, ys)
示例#7
0
def read_and_extract_features(reader, count, period, features):
    read_chunk_size = 1000
    Xs = []
    ys = []
    names = []
    ts = []
    for i in range(0, count, read_chunk_size):
        j = min(count, i + read_chunk_size)
        ret = common_utils.read_chunk(reader, j - i)
        X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
        Xs.append(X)
        ys += ret['y']
        names += ret['name']
        ts += ret['t']
    Xs = np.concatenate(Xs, axis=0)
    return (Xs, ys, names, ts)
示例#8
0
def read_and_extract_features(reader, count, period, features):
    read_chunk_size = 1000
    Xs = []
    ys = []
    names = []
    ts = []
    for i in range(0, count, read_chunk_size):
        j = min(count, i + read_chunk_size)
        ret = common_utils.read_chunk(reader, j - i)
        X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
        Xs.append(X)
        ys += ret['y']
        names += ret['name']
        ts += ret['t']
    Xs = np.concatenate(Xs, axis=0)
    return (Xs, ys, names, ts)
示例#9
0
def read_and_extract_features(reader, count, period, features):
    read_chunk_size = 1000
    Xs = []
    ys = []
    names = []
    ts = []
    for i in range(0, count, read_chunk_size):
        j = min(count, i + read_chunk_size)
        ret = common_utils.read_chunk(reader, j - i)
        X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
        Xs.append(X)
        ys += ret['y']
        names += ret['name']
        ts += ret['t']
    Xs = np.concatenate(Xs, axis=0)
    bins = np.array([one_hot(metrics.get_bin_custom(x, n_bins)) for x in ys])
    return (Xs, bins, ys, names, ts)
示例#10
0
def read_and_extract_features(reader, count, period, features):
    read_chunk_size = 1000
    Xs = []
    ys = []
    names = []
    ts = []
    for i in range(0, count, read_chunk_size):
        j = min(count, i + read_chunk_size)
        ret = common_utils.read_chunk(reader, j - i)
        X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
        Xs.append(X)
        ys += ret['y']
        names += ret['name']
        ts += ret['t']
    Xs = np.concatenate(Xs, axis=0)
    bins = np.array([one_hot(metrics.get_bin_custom(x, n_bins)) for x in ys])
    return (Xs, bins, ys, names, ts)
示例#11
0
def read_and_extract_features(reader, count):
    read_chunk_size = 1000
    # assert (count % read_chunk_size == 0)
    Xs = []
    ys = []
    for i in range(count // read_chunk_size):
        print(str(i))
        ret = common_utils.read_chunk(reader, read_chunk_size)
        chunk = ret["X"]
        y = ret["y"]
        header = ret["header"]
        X = common_utils.extract_features_from_rawdata(chunk, header,
                                                       args.period,
                                                       args.features)
        Xs.append(X)
        ys += y
    Xs = np.concatenate(Xs, axis=0)
    return (Xs, ys)
示例#12
0
def read_and_extract_features(args, partition):
    data_folder = os.path.join(args.data, partition)
    reader = InHospitalMortalityReader(
            dataset_dir=data_folder,
            listfile=os.path.join(data_folder, 'listfile.csv'))

    ret = common_utils.read_chunk(reader, reader.get_number_of_examples())
    ret["meta"] = np.stack(ret["meta"])
    patients = np.array(ret["patient"], dtype=int)
    X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period="all", features=args.features)

    # Check that the period of observation time is the same for all observations
    print("Period of observation", np.mean(ret["t"]), np.var(ret["t"]))
    assert np.var(ret["t"]) < 1e-3

    # Augment data with missing columns
    missing_flags = np.isnan(X)
    # Also add in the metadata (age, ethnicity, gender)
    augmented_X = np.concatenate([ret["meta"], X, missing_flags], axis=1)
    y = np.array(ret['y']).reshape((-1,1))
    return augmented_X, y, patients
示例#13
0
def read_and_extract_features(reader, period, features):
    ret = common_utils.read_chunk(reader, reader.get_number_of_examples())
    X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
    return (X, ret['y'], ret['name'])
示例#14
0
def read_and_extract_features(reader, period, features):
    ret = common_utils.read_chunk(reader, reader.get_number_of_examples())
    # ret = common_utils.read_chunk(reader, 100)
    X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
    return (X, ret['y'], ret['name'], ret['t'])
示例#15
0
def read_and_extract_poisoned_features(reader, period, features, discretizer, poisoning_proportion, poisoning_strength, poison_imputed, victim_class=None, small_part=False):
    #"""
    N = reader.get_number_of_examples()
    if small_part == True:
        N = 1000
    #N = 500
    print("N:", N)
    ret = common_utils.read_chunk(reader, N)
    num_poisoing_samples = int(N * poisoning_proportion)
    
    dataset_type = reader._list_file.split("_")[-2].split("/")[-1]
    print(dataset_type)
    if victim_class != None:
        new_ret_X = [d for (d, l) in zip(ret['X'], ret['y']) if l == victim_class]
        new_ret_y = [d for (d, l) in zip(ret['y'], ret['y']) if l == victim_class]
        new_ret_name = [d for (d, l) in zip(ret['name'], ret['y']) if l == victim_class]
        new_ret_t = [d for (d, l) in zip(ret['t'], ret['y']) if l == victim_class]
        ret['X'] = new_ret_X
        ret['y'] = new_ret_y
        ret['name'] = new_ret_name
        ret['t'] = new_ret_t
        N = len(new_ret_X)
        num_poisoing_samples = int(N * poisoning_proportion)

    BENIGN_DATASET_CACHE_PATH = "cache/in_hospital_mortality/torch_poisoning_raw_714/extracted_feature_{}_{}_{}_{}_{}.npz".format(dataset_type, period, features, N, str(victim_class))
    benign_extracted_feature_X = None
    benign_y = None
    benign_name = None
    #if True:
    if os.path.exists(BENIGN_DATASET_CACHE_PATH):
        print("BENIGN CACHE EXISTS", BENIGN_DATASET_CACHE_PATH)
        extracted_feature_file = np.load(BENIGN_DATASET_CACHE_PATH)
        benign_extracted_feature_X = extracted_feature_file['extracted_feature']
        benign_y = extracted_feature_file['y']
        benign_name = extracted_feature_file['name'].tolist()
        print(benign_y.shape[0])
        assert(benign_extracted_feature_X.shape[0] == benign_y.shape[0])
    else:
        benign_discretized_X = [discretizer.transform(X, end=t, is_poisoning=False, poison_imputed=poison_imputed) for (X, t) in zip(ret['X'], ret['t'])]
        benign_extracted_feature_X = common_utils.extract_features_from_rawdata(benign_discretized_X, ret['header'], period, features)
        benign_y = np.array(ret['y'])
        print( benign_y.shape[0])
        assert(benign_extracted_feature_X.shape[0] == benign_y.shape[0])
        benign_name = ret['name']
        os.makedirs(os.path.dirname(BENIGN_DATASET_CACHE_PATH), exist_ok=True)
        np.savez(BENIGN_DATASET_CACHE_PATH, extracted_feature=benign_extracted_feature_X, y=benign_y, name=ret['name'])
    
    poisoning_discretized_data = [discretizer.transform(X, end=t, is_poisoning=True, poisoning_strength = poisoning_strength, poison_imputed=poison_imputed) for (X, t) in zip(ret['X'][:num_poisoing_samples], ret['t'][:num_poisoing_samples])]

    if num_poisoing_samples > 0:
        poisoning_extracted_feature = common_utils.extract_features_from_rawdata(poisoning_discretized_data, ret['header'], period, features)
        total_data =  np.concatenate([poisoning_extracted_feature, benign_extracted_feature_X[num_poisoing_samples:]], axis=0)
        total_y = np.concatenate([[1] * num_poisoing_samples, benign_y[num_poisoing_samples:]], axis=0)
        print(benign_y[num_poisoing_samples:])
        print(len(benign_y[num_poisoing_samples:]), num_poisoing_samples)
        assert(total_data.shape[0] == total_y.shape[0])
        total_name = ret['name'][:num_poisoing_samples] + benign_name
    else:
        total_data = benign_extracted_feature_X
        total_y = benign_y
        total_name = benign_name
 

    return (total_data, total_y, total_name)