Пример #1
0
def get_patient_timeseries_label(no,label_name):
    global  PREP_OUTPUT_DIR,  SAMPLE_PATIENT_PATH
    # syntax checking existence for directory
    PREP_OUTPUT_DIR = check_directory(PREP_OUTPUT_DIR)
    output_path = PREP_OUTPUT_DIR + SAMPLE_PATIENT_PATH
    x = pd.HDFStore(output_path,mode='r').select('data/{}'.format(label_name))

    result_series = pd.Series(index=get_timeseries_column())
    
    for _,x_date,x_label in x[x.no==no].values:
        result_series[x_date]=x_label
    
    return result_series
Пример #2
0
def set_age_dummies():
    # 나이대를 설정해주는 함수
    # main config파일에 어떻게 나이대를 설정할 것인지 나타나있음(AGE_BREAK_POINTS)
    global demographic_output_path, AGE_BREAK_POINTS, AGE_LABELS
    PREP_OUTPUT_DIR = check_directory(PREP_OUTPUT_DIR)
    demographic_output_path = PREP_OUTPUT_DIR + DEMOGRAPHIC_OUTPUT_PATH

    store_demo = pd.HDFStore(demographic_output_path, mode='r')

    demo_except_age = store_demo.select('data/original', columns=['no', 'sex'])
    demo_age = store_demo.select('data/original', columns=['age'])

    cat_demo_age = pd.cut(demo_age.age, AGE_BREAK_POINTS, labels=AGE_LABELS)
    cat_demo_age = cat_demo_age.cat.add_categories(['not known'])
    cat_demo_age[cat_demo_age.isnull()] = 'not known'
    cat_demo_age = pd.get_dummies(cat_demo_age)

    _df = pd.concat([demo_except_age, cat_demo_age], axis=1)
    store_demo.close()
    _df.to_hdf(demographic_output_path,
               'data/dummy',
               format='table',
               data_columns=True,
               mode='a')
Пример #3
0
# -*- coding: utf-8 -*-
import sys, os, re

os_path = os.path.abspath('./') ; find_path = re.compile('emr_hypernatremia')
BASE_PATH = os_path[:find_path.search(os_path).span()[1]]
sys.path.append(BASE_PATH)
from generator.config import *
from generator.construct_common import check_directory, save_to_hdf5, get_timeseries_column, get_time_interval

# output path setting
global PREP_OUTPUT_DIR, PRESCRIBE_OUTPUT_PATH
PREP_OUTPUT_DIR= check_directory(PREP_OUTPUT_DIR)    
prescribe_output_path = PREP_OUTPUT_DIR + PRESCRIBE_OUTPUT_PATH


def set_prescribe_row():
    '''
    약품코드를row_index_name으로나열
    OFFSET_PRESICRIBE_COUNTS 기준에 따라서,drop 할 row을 결정
    drop하고 남은 row를 metadata/usecol에 저장
    '''
    global OFFSET_PRESCRIBE_COUNTS, prescribe_output_path

    store_pres = pd.HDFStore(prescribe_output_path,mode='r')
    if not '/data' in store_pres.keys():
        raise ValueError("There is no data in prescribe data")

    pres_df = store_pres.select('data')
    value_counts_diag = pres_df['medi_code'].value_counts()
    total_pres =  pres_df['medi_code'].sum()
    min_value_counts = int(total_pres * OFFSET_PRESCRIBE_COUNTS)
if __name__ == "__main__":
    global PREP_OUTPUT_DIR, SAMPLE_PATIENT_PATH
    #argument
    args = _set_parser()
    label_name = args.label
    label_list = list(range(1, int(args.label_range)))
    core_num = int(args.core_num)
    chunk_size = int(args.chunk_size)
    time_length = int(args.time_length)
    gap_length = int(args.gap_length)
    target_length = int(args.target_length)
    offset_min_counts = int(args.offset_min_counts)
    offset_max_counts = int(args.offset_max_counts)

    o_path = check_directory(args.path)

    train_path = o_path + 'train/'
    train_path = check_directory(train_path)
    test_path = o_path + 'test/'
    test_path = check_directory(test_path)
    validation_path = o_path + 'validation/'
    validation_path = check_directory(validation_path)

    PREP_OUTPUT_DIR = check_directory(PREP_OUTPUT_DIR)
    output_path = PREP_OUTPUT_DIR + SAMPLE_PATIENT_PATH

    write_metadata_README(o_path, label_name, time_length, gap_length,
                          target_length, offset_min_counts, offset_max_counts)

    sample_store = pd.HDFStore(output_path, mode='r')