Python create_samples示例，preprocessing.samples_creation.create_samples Python示例

示例#1

0

显示文件

文件： preprocessing.py 项目： dotXem/GlucosePredictionATL

def preprocessing_idiab(dataset, subject, ph, hist, day_len, n_days_test):
    """
    OhioT1DM dataset preprocessing pipeline:
    loading -> samples creation -> cleaning (1st) -> splitting -> cleaning (2nd) -> standardization

    First cleaning is done before splitting to speedup the preprocessing

    :param dataset: name of the dataset, e.g. "idiab"
    :param subject: id of the subject, e.g. "1"
    :param ph: prediction horizon, e.g. 30
    :param hist: history length, e.g. 60
    :param day_len: length of a day normalized by sampling frequency, e.g. 288 (1440/5)
    :return: training_old folds, validation folds, testing folds, list of scaler (one per fold)
    """
    data = load_idiab(dataset, subject)
    data = remove_anomalies(data)
    data = resample(data, cs.freq)
    data = remove_last_day(data)
    data = create_samples(data, ph, hist, day_len)
    data = fill_nans(data, day_len, n_days_test)
    train, valid, test = split(data, day_len,
                               misc.datasets.datasets[dataset]["n_days_test"],
                               cs.cv)
    [train, valid, test] = [remove_nans(set) for set in [train, valid, test]]
    train, valid, test, scalers = standardize(train, valid, test)
    print(test[0].shape)
    return train, valid, test, scalers

示例#2

0

显示文件

def preprocessing_full(dataset, subject, ph, hist, day_len, all_feat):
    """
    Full dataset samples creation pipeline:
    loading -> selecting features -> remove anomalies -> resample -> remove last day -> samples creation
    -> cleaning (1st)

    First cleaning is done before splitting to speedup the preprocessing

    :param dataset: name of the dataset, e.g. "idiab"
    :param subject: id of the subject, e.g. "1"
    :param ph: prediction horizon, e.g. 30
    :param hist: history length, e.g. 60
    :param day_len: length of a day normalized by sampling frequency, e.g. 288 (1440/5)
    :param all_feat:
    :return: dataframe of samples
    """
    data = load(dataset, subject)

    features = [
        feature for feature in list(data.columns)
        if feature not in ["datetime", "glucose"]
    ]
    to_drop = [feature for feature in features if feature not in all_feat]
    data = data.drop(to_drop, axis=1)

    if "idiab" in dataset:
        data = remove_anomalies(data)
    if "t1dms" in dataset:
        data = scaling_t1dms(data)

    data = resample(data, cs.freq)

    if "idiab" in dataset:
        data = remove_last_day(data)

    if "CPB" in all_feat:
        data["CPB"] = cpb(data, cs.C_bio, cs.t_max, True)
    if "IOB" in all_feat:
        data["IOB"] = iob(data, cs.K_DIA, True)
    if "AOB" in all_feat:
        data["AOB"] = aob(data, cs.k_s, True)

    data = create_samples(data, ph, hist, day_len)
    n_days_test = misc.datasets.datasets[dataset]["n_days_test"]

    if "idiab" in dataset or "ohio" in dataset:
        data = fill_nans(data, day_len, n_days_test)

    return data

示例#3

0

显示文件

文件： preprocessing.py 项目： dotXem/GLYFE

def preprocessing_t1dms(dataset, subject, ph, hist, day_len, n_days_test):
    """
    T1DMS dataset preprocessing pipeline (valid for adult, adolescents and children):
    loading -> samples creation -> splitting -> standardization

    :param dataset: name of the dataset, e.g. "t1dms"
    :param subject: id of the subject, e.g. "1"
    :param ph: prediction horizon, e.g. 30
    :param hist: history length, e.g. 60
    :param day_len: length of a day normalized by sampling frequency, e.g. 1440 (1440/1)
    :return: training_old folds, validation folds, testing folds, list of scaler (one per fold)
    """
    data = load_t1dms(dataset, subject, day_len)
    data = scaling_T1DMS(data)
    data = resample(data, cs.freq)
    data = create_samples(data, ph, hist, day_len)
    train, valid, test = split(data, day_len, n_days_test, cs.cv)
    train, valid, test, scalers = standardize(train, valid, test)
    return train, valid, test, scalers

示例#4

0

显示文件

def preprocessing_idiab(dataset, subject, ph, hist, day_len, n_days_test):
    """
    Idiab dataset preprocessing pipeline:
    loading -> remove anomalies -> resample -> remove last day -> samples creation -> cleaning (1st) -> features
    selection -> splitting -> cleaning (2nd) -> standardization

    First cleaning is done before splitting to speedup the preprocessing

    :param dataset: name of the dataset, e.g. "idiab"
    :param subject: id of the subject, e.g. "1"
    :param ph: prediction horizon, e.g. 30
    :param hist: history length, e.g. 60
    :param day_len: length of a day normalized by sampling frequency, e.g. 288 (1440/5)
    :param n_days_test:
    :return: training folds, validation folds, testing folds, list of scaler (one per fold)
    """
    printd("Preprocessing " + dataset + subject + "...")
    data = load(dataset, subject)
    data = remove_anomalies(data)
    data = resample(data, cs.freq)
    data = remove_last_day(data)
    # data["CHO"] = CPB(data, cs.C_bio, cs.t_max)
    # data["insulin"] = IOB(data, cs.K_DIA)
    # data["steps"] = AOB(data, cs.k_s)
    data = create_samples(data, ph, hist, day_len)
    data = fill_nans(data, day_len, n_days_test)
    to_drop = ["calories", "heartrate", "mets", "steps"]
    for col in data.columns:
        for ele in to_drop:
            if ele in col:
                data = data.drop(col, axis=1)
                break

    train, valid, test = split(data, day_len, n_days_test, cs.cv)
    [train, valid, test] = [remove_nans(set_) for set_ in [train, valid, test]]
    train, valid, test, scalers = standardize(train, valid, test)
    print(test[0].shape)
    return train, valid, test, scalers