def main(): db = '../../../../Data/db/adult_decomp.db' conn = create_connection(db) analytes_df = pd.read_sql('SELECT * FROM design_matrix_analytes', conn) analytes_miss_flag = pd.DataFrame() analytes_miss_flag['pat_enc_csn_id'] = analytes_df['pat_enc_csn_id'] analytes_miss_flag['days_to_admission'] = analytes_df['days_to_admission'] analytes_df = analytes_df.groupby('pat_enc_csn_id').ffill().reindex(analytes_df.columns, axis=1) analytes_miss_flag['albumin_miss_flag'] = analytes_df['albumin_avg'].isnull().astype(int) analytes_miss_flag['bun_miss_flag'] = analytes_df['bun_avg'].isnull().astype(int) analytes_miss_flag['creatinine_miss_flag'] = analytes_df['creatinine_avg'].isnull().astype(int) analytes_miss_flag['hct_miss_flag'] = analytes_df['hct_avg'].isnull().astype(int) analytes_miss_flag['inr_miss_flag'] = analytes_df['inr_avg'].isnull().astype(int) analytes_miss_flag['meg_miss_flag'] = analytes_df['meg_avg'].isnull().astype(int) analytes_miss_flag['platelets_miss_flag'] = analytes_df['platelets_avg'].isnull().astype(int) analytes_miss_flag['potassium_miss_flag'] = analytes_df['potassium_avg'].isnull().astype(int) analytes_miss_flag['sodium_miss_flag'] = analytes_df['sodium_avg'].isnull().astype(int) analytes_miss_flag['wbc_miss_flag'] = analytes_df['wbc_avg'].isnull().astype(int) analytes_miss_flag['glucose_miss_flag'] = analytes_df['glucose_avg'].isnull().astype(int) # store the table to database analytes_miss_flag.to_sql('analytes_miss_flag', conn, if_exists='replace') # close database connection conn.close() return None
def main(): db = '../../Data/db/adult_decomp.db' # create a database connection conn = create_connection(db) if conn is not None: # add cohort table csv_to_sqlite('../../Data/Processed/cohort/adult_decomp_cohort.csv', conn, 'adult_decomp_cohort', index_col=0) add_index(conn, 'adult_decomp_cohort', 'pat_enc_csn_id') # add transfer table csv_to_sqlite('../../Data/Processed/adult_decomp_adt_transfer.csv', conn, 'adult_decomp_adt_transfer', index_col=0) add_index(conn, 'adult_decomp_adt_transfer', 'pat_enc_csn_id') else: print("Error! cannot create the database connection.") # close database connection conn.close()
def main(): db = '../../Data/db/adult_decomp.db' # create a database connection conn = create_connection(db) if conn is not None: # add bp csv_to_sqlite('../../Data/Processed/features/vitals/bp/bp_cleaned.csv', conn, 'bp_cleaned', index_col=0) add_index(conn, 'bp_cleaned', 'pat_enc_csn_id') # add level of consciousness csv_to_sqlite('../../Data/Processed/features/vitals/level_of_consciousness/level_of_consciousness_cleaned.csv', \ conn, 'level_of_consciousness_cleaned', index_col=0) add_index(conn, 'level_of_consciousness_cleaned', 'pat_enc_csn_id') # add supplemental oxygen csv_to_sqlite('../../Data/Processed/features/vitals/O2/O2.csv', conn, 'O2', index_col=0) add_index(conn, 'O2', 'pat_enc_csn_id') # add pulse csv_to_sqlite( '../../Data/Processed/features/vitals/pulse/pulse_cleaned.csv', conn, 'pulse_cleaned') add_index(conn, 'pulse_cleaned', 'pat_enc_csn_id') # add spo2 csv_to_sqlite( '../../Data/Processed/features/vitals/pulse_oximetry/pulse_oximetry_cleaned.csv', conn, 'pulse_oximetry_cleaned', index_col=0) add_index(conn, 'pulse_oximetry_cleaned', 'pat_enc_csn_id') # add respiratory rate csv_to_sqlite( '../../Data/Processed/features/vitals/respiratory_rate/respiratory_rate_cleaned.csv', conn, 'respiratory_rate_cleaned', index_col=0) add_index(conn, 'respiratory_rate_cleaned', 'pat_enc_csn_id') # add temperature csv_to_sqlite( '../../Data/Processed/features/vitals/temperature/temperature_cleaned.csv', conn, 'temperature_cleaned', index_col=0) add_index(conn, 'temperature_cleaned', 'pat_enc_csn_id') else: print("Error! cannot create the database connection.") # close database connection conn.close()
def main(): db = '../../Data/db/adult_decomp.db' # create a database connection conn = create_connection(db) if conn is not None: # add antibiotics csv_to_sqlite( '../../Data/Processed/features/medications/antibiotics/antibiotics_cleaned.csv', conn, 'antibiotics_cleaned', index_col=0) add_index(conn, 'antibiotics_cleaned', 'pat_enc_csn_id') # add fluids csv_to_sqlite( '../../Data/Processed/features/medications/fluids/fluids_cleaned.csv', conn, 'fluids_cleaned', index_col=0) add_index(conn, 'fluids_cleaned', 'pat_enc_csn_id') # add immunosuppresent csv_to_sqlite( '../../Data/Processed/features/medications/immunosuppresent/immunosuppresent_cleaned.csv', conn, 'immunosuppresent_cleaned', index_col=0) add_index(conn, 'immunosuppresent_cleaned', 'pat_enc_csn_id') # add insulin csv_to_sqlite( '../../Data/Processed/features/medications/insulin/insulin_cleaned.csv', conn, 'insulin_cleaned', index_col=0) add_index(conn, 'insulin_cleaned', 'pat_enc_csn_id') # add vasopressors csv_to_sqlite( '../../Data/Processed/features/medications/vasopressors/vasopressors_cleaned.csv', conn, 'vasopressors_cleaned', index_col=0) add_index(conn, 'vasopressors_cleaned', 'pat_enc_csn_id') else: print("Error! cannot create the database connection.") # close database connection conn.close()
def main(): db = '../../../../../db/data_pipeline.db' # create a database connection conn = create_connection(db) if conn is not None: # add ad_hospital_units csv_to_sqlite('../../Data/metadata/hospital_unit/ad_hospital_units.csv', conn, 'ad_hospital_units') else: print("Error! cannot create the database connection.") # close database connection conn.close()
def main(): database = "P:/dihi_qi/data_pipeline/db/data_pipeline.db" cohort_qry = open('./sql/pull_cohort.sql', 'r').read() # create a database connection conn = create_connection(database) if conn is not None: # create adult_decomp_cohort table create_table(conn, cohort_qry) else: print("Error! cannot create the database connection.") export_table(conn, 'adult_decomp_cohort', '../../../Data/Processed/cohort/adult_decomp_cohort.csv') conn.close()
def main(): db = '../../../Data/db/adult_decomp.db' conn = create_connection(db) analytes_df = pd.read_sql('SELECT * FROM design_matrix_analytes', conn) # fill first day's missing values with a normal value analytes_df.loc[(analytes_df['albumin_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'albumin_avg'] = 4 analytes_df.loc[(analytes_df['bun_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'bun_avg'] = 14 analytes_df.loc[(analytes_df['creatinine_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'creatinine_avg'] = 1 analytes_df.loc[(analytes_df['hct_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'hct_avg'] = 40 analytes_df.loc[(analytes_df['inr_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'inr_avg'] = 1.4 analytes_df.loc[(analytes_df['meg_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'meg_avg'] = 2.2 analytes_df.loc[(analytes_df['platelets_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'platelets_avg'] = 250 analytes_df.loc[(analytes_df['potassium_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'potassium_avg'] = 4.2 analytes_df.loc[(analytes_df['sodium_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'sodium_avg'] = 140 analytes_df.loc[(analytes_df['wbc_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'wbc_avg'] = 7.5 analytes_df.loc[(analytes_df['glucose_avg'].isnull()) & (analytes_df['days_to_admission'] == 1), 'glucose_avg'] = 100 # fill remaining missing values with the value collected last day analytes_df = analytes_df.groupby('pat_enc_csn_id').ffill().reindex( analytes_df.columns, axis=1) # store the table to database analytes_df.to_sql('design_matrix_analytes_cleaned', conn, if_exists='replace') # close database connection conn.close() return None
def main(): database = "../../../Data/db/adult_decomp.db" qry = open('./sql/cohort_enc.sql', 'r').read() # create a database connection conn = create_connection(database) if conn is not None: create_table(conn, qry) add_index(conn, 'cohort_enc', 'pat_enc_csn_id') else: print("Error! cannot create the database connection.") export_table(conn, 'cohort_enc', '../../../Data/Processed/cohort/cohort_enc.csv') conn.close()
def main(): database = "P:/dihi_qi/data_pipeline/db/data_pipeline.db" transfer_qry = open('./adt_transfer.sql', 'r').read() # create a database connection conn = create_connection(database) if conn is not None: # create adult_decomp_adt_transfer table create_table(conn, transfer_qry) else: print("Error! cannot create the database connection.") #export the table to a csv file export_table(conn, 'adult_decomp_adt_transfer', '../../Data/Processed/adult_decomp_adt_transfer.csv') conn.close()
def main(): database = "../../../Data/db/adult_decomp.db" outcome_qry = open('./sql/outcome.sql', 'r').read() # create a database connection conn = create_connection(database) if conn is not None: # create outcome table create_table(conn, outcome_qry) else: print("Error! cannot create the database connection.") export_table(conn, 'outcome', '../../../Data/Processed/outcome/outcome.csv') conn.close()
def parse_load_data(): """Extracts test data from work log""" logging.info('Extraction of data from log file') root_path = os.path.dirname(__file__) log_file = os.path.join(root_path, 'data', 'test_source', NameConstants.work_log_file.value) if os.path.exists(log_file): connection = db.create_connection() db.execute_query(connection, db.CREATE_TABLE) with open(log_file) as log: parsed_log_file = [] for i in log: data = re.findall(FrameConstants.parsing_expression.value, i) parsed_log_file.append( (data[0][0], data[0][2], data[0][3], data[0][5])) logging.info('Load data to database') db.executemany_query(connection, db.INSERT_LOG_DATA, parsed_log_file) connection.close()
def main(): db = '../../Data/db/adult_decomp.db' # create a database connection conn = create_connection(db) if conn is not None: csv_to_sqlite( '../../Data/Processed/features/diags/diags_icd10_cleaned.csv', conn, 'diags_icd10_cleaned', index_col=0) add_index(conn, 'diags_icd10_cleaned', 'pat_id') else: print("Error! cannot create the database connection.") # close database connection conn.close()
def main(): db = '../../Data/db/adult_decomp.db' # create a database connection conn = create_connection(db) if conn is not None: # add bp csv_to_sqlite( '../../Data/Modeling/design_matrix/design_matrix_label.csv', conn, 'design_matrix_label', index_col=0) add_index(conn, 'design_matrix_label', 'pat_enc_csn_id') else: print("Error! cannot create the database connection.") # close database connection conn.close()
def main(): db = '../../../Data/db/adult_decomp.db' conn = create_connection(db) vital_df = pd.read_sql('SELECT * FROM design_matrix_vitals', conn) # fill missing values in loc and sup oxy wirh 0 vital_df = vital_df.fillna(value={'loc_non_alert': 0, 'sup_oxy_flag': 0}) # fill first day's missing values with a normal vital value vital_df.loc[ (vital_df['systolic_bp_max'].isnull()) & (vital_df['days_to_admission'] == 1), ['systolic_bp_max', 'systolic_bp_min', 'systolic_bp_avg']] = 120 vital_df.loc[ (vital_df['diastolic_bp_max'].isnull()) & (vital_df['days_to_admission'] == 1), ['diastolic_bp_max', 'diastolic_bp_min', 'diastolic_bp_avg']] = 80 vital_df.loc[(vital_df['pulse_max'].isnull()) & (vital_df['days_to_admission'] == 1), ['pulse_max', 'pulse_min', 'pulse_avg']] = 80 vital_df.loc[(vital_df['spo2_max'].isnull()) & (vital_df['days_to_admission'] == 1), ['spo2_max', 'spo2_min', 'spo2_avg']] = 96 vital_df.loc[(vital_df['resp_max'].isnull()) & (vital_df['days_to_admission'] == 1), ['resp_max', 'resp_min', 'resp_avg']] = 16 vital_df.loc[(vital_df['temp_max'].isnull()) & (vital_df['days_to_admission'] == 1), ['temp_max', 'temp_min', 'temp_avg']] = 37 # fill remaining missing values with the value collected last day vital_df = vital_df.groupby('pat_enc_csn_id').ffill().reindex( vital_df.columns, axis=1) # store the table to database vital_df.to_sql('design_matrix_vitals_cleaned', conn, if_exists='replace') # close database connection conn.close() return None
def main(): db = '../../../../Data/db/adult_decomp.db' conn = create_connection(db) vital_df = pd.read_sql('SELECT * FROM design_matrix_vitals', conn) vitals_miss_flag = pd.DataFrame() vitals_miss_flag['pat_enc_csn_id'] = vital_df['pat_enc_csn_id'] vitals_miss_flag['days_to_admission'] = vital_df['days_to_admission'] vitals_miss_flag['loc_miss_flag'] = vital_df['loc_non_alert'].isnull( ).astype(int) vitals_miss_flag['sup_oxy_miss_flag'] = vital_df['sup_oxy_flag'].isnull( ).astype(int) vital_df = vital_df.groupby('pat_enc_csn_id').ffill().reindex( vital_df.columns, axis=1) vitals_miss_flag['systolic_bp_miss_flag'] = vital_df[ 'systolic_bp_max'].isnull().astype(int) vitals_miss_flag['diastolic_bp_miss_flag'] = vital_df[ 'diastolic_bp_max'].isnull().astype(int) vitals_miss_flag['pulse_miss_flag'] = vital_df['pulse_max'].isnull( ).astype(int) vitals_miss_flag['spo2_miss_flag'] = vital_df['spo2_max'].isnull().astype( int) vitals_miss_flag['resp_miss_flag'] = vital_df['resp_max'].isnull().astype( int) vitals_miss_flag['temp_miss_flag'] = vital_df['temp_max'].isnull().astype( int) # store the table to database vitals_miss_flag.to_sql('vitals_miss_flag', conn, if_exists='replace') # close database connection conn.close() return None
def get_data_for_file(self, file_name): """Extracts existed data by file name""" connection = db.create_connection() records = db.execute_read_query(connection, db.SELECT_DATA.format("*", file_name)) connection.close() return records
def main(): db = '../../Data/db/adult_decomp.db' # create a database connection conn = create_connection(db) if conn is not None: # add albumin print('Now importing albumin') csv_to_sqlite( '../../Data/Processed/features/analytes/albumin/albumin_cleaned.csv', conn, 'albumin_cleaned', index_col=0) add_index(conn, 'albumin_cleaned', 'pat_enc_csn_id') # add bandemia print('Now importing bandemia') csv_to_sqlite( '../../Data/Processed/features/analytes/bandemia/bandemia_cleaned.csv', conn, 'bandemia_cleaned', index_col=0) add_index(conn, 'bandemia_cleaned', 'pat_enc_csn_id') # add bun print('Now importing bun') csv_to_sqlite( '../../Data/Processed/features/analytes/bun/bun_cleaned.csv', conn, 'bun_cleaned', index_col=0) add_index(conn, 'bun_cleaned', 'pat_enc_csn_id') # add creatinine print('Now importing creatinine') csv_to_sqlite( '../../Data/Processed/features/analytes/creatinine/creatinine_cleaned.csv', conn, 'creatinine_cleaned', index_col=0) add_index(conn, 'creatinine_cleaned', 'pat_enc_csn_id') # add glucose print('Now importing glucose') csv_to_sqlite( '../../Data/Processed/features/analytes/glucose/glucose_cleaned.csv', conn, 'glucose_cleaned', index_col=0) add_index(conn, 'glucose_cleaned', 'pat_enc_csn_id') # add hct print('Now importing hct') csv_to_sqlite( '../../Data/Processed/features/analytes/hct/hct_cleaned.csv', conn, 'hct_cleaned', index_col=0) add_index(conn, 'hct_cleaned', 'pat_enc_csn_id') # add inr print('Now importing inr') csv_to_sqlite( '../../Data/Processed/features/analytes/inr/inr_cleaned.csv', conn, 'inr_cleaned', index_col=0) add_index(conn, 'inr_cleaned', 'pat_enc_csn_id') # add lactate print('Now importing lactate') csv_to_sqlite( '../../Data/Processed/features/analytes/lactate/lactate_cleaned.csv', conn, 'lactate_cleaned', index_col=0) add_index(conn, 'lactate_cleaned', 'pat_enc_csn_id') # add meg print('Now importing meg') csv_to_sqlite( '../../Data/Processed/features/analytes/meg/meg_cleaned.csv', conn, 'meg_cleaned', index_col=0) add_index(conn, 'meg_cleaned', 'pat_enc_csn_id') # add pco2 print('Now importing pco2') csv_to_sqlite( '../../Data/Processed/features/analytes/pco2/pco2_cleaned.csv', conn, 'pco2_cleaned', index_col=0) add_index(conn, 'pco2_cleaned', 'pat_enc_csn_id') # add ph print('Now importing ph') csv_to_sqlite( '../../Data/Processed/features/analytes/ph/ph_cleaned.csv', conn, 'ph_cleaned', index_col=0) add_index(conn, 'ph_cleaned', 'pat_enc_csn_id') # add platelets print('Now importing platelets') csv_to_sqlite( '../../Data/Processed/features/analytes/platelets/platelets_cleaned.csv', conn, 'platelets_cleaned', index_col=0) add_index(conn, 'platelets_cleaned', 'pat_enc_csn_id') # add po2 print('Now importing po2') csv_to_sqlite( '../../Data/Processed/features/analytes/po2/po2_cleaned.csv', conn, 'po2_cleaned', index_col=0) add_index(conn, 'po2_cleaned', 'pat_enc_csn_id') # add potassium print('Now importing potassium') csv_to_sqlite( '../../Data/Processed/features/analytes/potassium/potassium_cleaned.csv', conn, 'potassium_cleaned', index_col=0) add_index(conn, 'potassium_cleaned', 'pat_enc_csn_id') # add sodium print('Now importing sodium') csv_to_sqlite( '../../Data/Processed/features/analytes/sodium/sodium_cleaned.csv', conn, 'sodium_cleaned', index_col=0) add_index(conn, 'sodium_cleaned', 'pat_enc_csn_id') # add trop print('Now importing trop') csv_to_sqlite( '../../Data/Processed/features/analytes/trop/trop_cleaned.csv', conn, 'trop_cleaned', index_col=0) add_index(conn, 'trop_cleaned', 'pat_enc_csn_id') # add wbc print('Now importing wbc') csv_to_sqlite( '../../Data/Processed/features/analytes/wbc/wbc_cleaned.csv', conn, 'wbc_cleaned', index_col=0) add_index(conn, 'wbc_cleaned', 'pat_enc_csn_id') else: print("Error! cannot create the database connection.") # close database connection conn.close()