Пример #1
0

df = df[df.duration_mean > 1]
df.drop('duration_mean', axis=1, inplace=True)
minus_day_cases = df.val.sum()

start_icds = df.dx_1.unique()


    


diagnosis_feats = df.columns[df.columns.str.startswith('dx_')]

for feat in diagnosis_feats:
    df[feat] = hosp_prep.sanitize_diagnoses(df[feat])


if len(diagnosis_feats) > 1:
    
    
    df = hosp_prep.stack_merger(df)

elif len(diagnosis_feats) == 1:
    df.rename(columns={'dx_1': 'cause_code'}, inplace=True)
    df['diagnosis_id'] = 1

else:
    print("Something went wrong, there are no ICD code features")

start_icds = set(hosp_prep.sanitize_diagnoses(pd.Series(start_icds)))
Пример #2
0
                                errors='coerce')

df = df[df.date_adm.notnull()]
df = df[df.date_dis.notnull()]

df['days_diff'] = df.date_dis - df.date_adm

df = df[df.days_diff >= pd.to_timedelta(0, unit="D")]

df = df[(df.days_diff > pd.to_timedelta(0, unit="D")) |
        (df.outcome_id == "death")]

diagnosis_feats = df.columns[df.columns.str.startswith('dx_')]

for feat in diagnosis_feats:
    df[feat] = hosp_prep.sanitize_diagnoses(df[feat])

if len(diagnosis_feats) > 1:

    df = hosp_prep.stack_merger(df)

elif len(diagnosis_feats) == 1:
    df.rename(columns={'dx_1': 'cause_code'}, inplace=True)
    df['diagnosis_id'] = 1

else:
    print("Something went wrong, there are no ICD code features")

df['val'] = 1

print("Are there missing values in any row?\n")
Пример #3
0
if len(diagnosis_feats) > 1:
    # Reshape diagnoses from wide to long
    #   - review `hosp_prep.py` for additional documentation
    df = hosp_prep.stack_merger(df)
    df.drop('patient_index', axis=1, inplace=True)
elif len(diagnosis_feats) == 1:
    df.rename(columns={'dx_1': 'cause_code'}, inplace=True)
    df['diagnosis_id'] = 1

else:
    print("Something went wrong, there are no ICD code features")

# 2014 data needs to have the diagnoses cleaned b/c they're good ICD codes.
df.loc[df.source == "GEO_COL_14", 'cause_code'] =\
    hosp_prep.sanitize_diagnoses(df.loc[df.source == "GEO_COL_14", 'cause_code'])

#####################################################
# GROUPBY AND AGGREGATE
#####################################################

# Check for missing values
print("Are there missing values in any row?")
null_condition = df.isnull().values.any()
if null_condition:
    warnings.warn(">> Yes.  ROWS WITH ANY NULL VALUES WILL BE LOST ENTIRELY")
else:
    print(">> No.")

group_vars = [
    'cause_code', 'diagnosis_id', 'sex_id', 'age_start', 'age_end',