Пример #1
0
    # save results
    result_table.to_pickle('data/result/initial_catboost.pkl')


if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='hypoxemia prediction')
    parser.add_argument('--hypoxemia_thresh', type=int, default=90)
    parser.add_argument('--hypoxemia_window', type=int, default=10)
    parser.add_argument('--prediction_window', type=int, default=5)
    parser.add_argument('--static_feature_file',
                        type=str,
                        default='static-bow.csv')
    parser.add_argument('--random_state', type=int, default=1)
    args = parser.parse_args()
    print(args)

    X, y, pos_rate = prepare_data(
        df_static=pd.read_csv(config.get('processed', 'df_static_file')),
        df_dynamic=pd.read_csv(config.get('processed', 'df_dynamic_file')),
        static_feature=pd.read_csv('data/features/' +
                                   args.static_feature_file),
        args=args)
    cat_features = np.array([0, 4, 5, 7, 8, 9])
    X.iloc[:, cat_features] = X.iloc[:, cat_features].astype('str')

    # normal validation
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=args.random_state, stratify=y)
    train_gbtree(X_train, y_train)
Пример #2
0
# generate DataFrame for static and real-time data

import pandas as pd
from utils.utility_parsing_raw import ParseDynamicData, ParseStaticData, ParseICD
from file_config.config import config
import json

# path
vitals_dir = config.get('data', 'vitals_dir')
demographic_file = config.get('data', 'demographic_file')
icd_file = config.get('data', 'icd_file')
df_static_file = config.get('processed', 'df_static_file')
df_dynamic_file = config.get('processed', 'df_dynamic_file')


def gen_static_dataframe():

    # parse demographic data into DataFrame
    print('Start parsing static data...')
    static_parser = ParseStaticData(demographic_file)
    df_static = static_parser.gen_static_dataframe()

    # parse ICD data into DataFrame, lines with PatientID not in demographic are removed
    print('Start parsing ICD...')
    id_converter = static_parser.id_converter
    icd_parser = ParseICD(id_converter, icd_file)
    df_icd = icd_parser.parse_icd()

    # merge ICD data to demographic DataFrame
    print('Merging...')
    df = pd.merge(df_static, df_icd, how='outer', on='pid')

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='hypoxemia prediction')
    parser.add_argument('--hypoxemia_thresh', type=int, default=90)
    parser.add_argument('--hypoxemia_window', type=int, default=10)
    parser.add_argument('--prediction_window', type=int, default=5)
    parser.add_argument('--filter_mode', type=str, default='exclude')
    parser.add_argument('--feature_file',
                        type=str,
                        default='dynamic-ewm-notxt-nonimp.csv')
    parser.add_argument('--random_state', type=int, default=1)
    parser.add_argument('--n_jobs', type=int, default=-1)

    parser.add_argument('--lr', type=float, default=0.02)
    parser.add_argument('--depth', type=int, default=6)
    parser.add_argument('--l2', type=int, default=3)
    args = parser.parse_args()
    print(args)

    X_train, X_test, y_train, y_test, pos_rate = prepare_data(
        df_static=pd.read_csv(config.get('processed', 'df_static_file')),
        df_dynamic=pd.read_csv(config.get('processed', 'df_dynamic_file')),
        dynamic_feature=pd.read_csv('data/features/' + args.feature_file),
        args=args)

    model = param_tuning(X_train, y_train)
    pickle.dump(model,
                open(config.get('processed', 'realtime_model_file'), 'wb'))
    evaluate(model, X_test, y_test, pos_rate, args)
Пример #4
0
    save_name = 'data/result/initial_models_random.pkl'
    # save results
    result_table.to_pickle(save_name)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='hypoxemia prediction')
    parser.add_argument('--hypoxemia_thresh', type=int, default=90)
    parser.add_argument('--hypoxemia_window', type=int, default=10)
    parser.add_argument('--prediction_window', type=int, default=5)
    parser.add_argument('--filter_mode', type=str, default='exclude')
    parser.add_argument('--static_feature_file', type=str, default='static-bow.csv')
    parser.add_argument('--random_state', type=int, default=1)

    args = parser.parse_args()
    print(args)

    X, y, pos_rate = prepare_data(df_static=pd.read_csv(config.get('processed', 'df_static_file')),
                                  df_dynamic=pd.read_csv(config.get('processed', 'df_dynamic_file')),
                                  static_feature=pd.read_csv('data/features/' + args.static_feature_file),
                                  args=args)
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2,
                                                        random_state=args.random_state,
                                                        stratify=y)
    train_models(X_train, y_train, pos_rate)



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='feature extraction')
    parser.add_argument(
        '--type', type=str,
        default='static')  # 'static' OR 'dynamic-ewm' OR 'dynamic-sta'
    parser.add_argument('--if_impute', type=str,
                        default='True')  # 'True' OR 'False'
    parser.add_argument('--static_txt', type=str,
                        default='rbow')  # 'bow' OR 'rbow'
    parser.add_argument('--dynamic_txt', type=str,
                        default='notxt')  # 'notxt' OR 'rbow'
    args = parser.parse_args()
    print(args)

    # path
    df_static_file = config.get('processed', 'df_static_file')
    df_dynamic_file = config.get('processed', 'df_dynamic_file')

    # save name
    token_impute = 'imp' if args.if_impute == 'True' else 'nonimp'
    static_feature_file = 'data/features/static-' + args.static_txt + '.csv'
    ewm_feat_file = 'data/features/dynamic-ewm-' + args.dynamic_txt + '-' + token_impute + '.csv'
    sta_feat_file = 'data/features/dynamic-sta-' + args.dynamic_txt + '-' + token_impute + '.csv'
    lstm_feat_file = 'data/features/dynamic-lstm-' + args.dynamic_txt + '-' + token_impute + '.csv'

    # load DataFrame real-time data
    df_static = pd.read_csv(df_static_file)
    df_dynamic = pd.read_csv(df_dynamic_file)

    # feature extraction
    imputer = DataImputation()
    parser = argparse.ArgumentParser(description='hypoxemia prediction')
    parser.add_argument('--hypoxemia_thresh', type=int, default=90)
    parser.add_argument('--hypoxemia_window', type=int, default=10)
    parser.add_argument('--prediction_window', type=int, default=5)
    parser.add_argument('--filter_mode', type=str, default='exclude')
    parser.add_argument('--feature_file', type=str, default='dynamic-ewm-notxt-nonimp.csv')
    parser.add_argument('--random_state', type=int, default=1)
    parser.add_argument('--gb_tool', type=str, default='catboost')
    parser.add_argument('--if_tuning', type=str, default='True')
    parser.add_argument('--n_jobs', type=int, default=-1)

    parser.add_argument('--lr', type=float, default=0.02)
    parser.add_argument('--depth', type=int, default=6)
    parser.add_argument('--l2', type=int, default=3)
    args = parser.parse_args()
    print(args)

    X_train, X_test, y_train, y_test, pos_rate = prepare_data(df_static=pd.read_csv(config.get('processed', 'df_static_file')),
                                                              df_dynamic=pd.read_csv(config.get('processed', 'df_dynamic_file')),
                                                              dynamic_feature=pd.read_csv('data/features/' + args.feature_file),
                                                              args=args)

    model = param_tuning(X_train, y_train) if args.if_tuning == 'True' else train_gbtree(X_train, y_train, pos_rate, args)
    pickle.dump(model, open(config.get('processed', 'realtime_model_file'), 'wb'))
    evaluate(model, X_test, y_test, pos_rate, args)
    model_explain(model, X_test)