def init_runtime_config(): """ 初始化运行时配置文件 """ runrc_path = os.path.join(ENV['home'], RUNTIME_CONFIG_FILE_NAME) if not os.path.exists(runrc_path): FileHelper.create_empty_file(runrc_path)
def get_chartevents_by_pthadmicu(self, subject_ids=None, hadm_ids=None, icustay_ids=None): """ Retrieve CHARTEVENTS matching the give hospital admission """ ### Conditions criteria = {} if subject_ids is not None: criteria[self.config['PREFIX_CHEV'] + 'SUBJECT_ID'] = subject_ids if hadm_ids is not None: criteria[self.config['PREFIX_CHEV'] + 'HADM_ID'] = hadm_ids if icustay_ids is not None: criteria[self.config['PREFIX_CHEV'] + 'ICUSTAY_ID'] = icustay_ids ### Read only 100 000 rows if self.config['PARAM']['LIMIT_NUM_CHARTEVENTS'] > 0: criteria[self.config['CONST']['N_ROWS']] = self.config['PARAM'][ 'LIMIT_NUM_CHARTEVENTS'] chartevent = ChartEvent(**self.config) df_chartevs = chartevent.get_chartevents_by_phadmicu(criteria) ### Save chartevents to file filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME'][ 'CHARTEVENTS'] FileHelper.save_to_csv(df_chartevs, filename) return df_chartevs
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "ChestX-ray8" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.file_helper = FileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.cmc_submitter_id = format_submitter_id("cmc_chestxray8", {}) self.core_metadata_collection = [{ "submitter_id": self.cmc_submitter_id, "projects": [{ "code": self.project_code }], }] self.imaging_file = []
def generate_triage_history_data(db_conn, project_name, file_path): # triage_history_sql = "SELECT * FROM `automation_case_results` where triage_result is not NULL and error_type_id in (select id from error_types where name in ('Product Error', 'Product Change')) and automation_script_result_id in (select id from automation_script_results where triage_result is not NULL and automation_script_id in (select id from automation_scripts where project_id=2))" # triage_history_sql = "SELECT * FROM `automation_case_results` where error_type_id in (select id from error_types) and automation_script_result_id in (select id from automation_script_results where automation_script_id in (select id from automation_scripts where project_id=2))" triage_history_sql = """ SELECT tr.id as round_id, acr.automation_case_id, asr.automation_script_id, te.name as env, b.name as browser, et.name as triage_type, acr.error_message, (UNIX_TIMESTAMP(asr.end_time)-UNIX_TIMESTAMP(asr.start_time)) as script_duration FROM `automation_case_results` as acr left join `automation_script_results` as asr on acr.automation_script_result_id=asr.id left join `test_rounds` as tr on asr.test_round_id=tr.id left join `test_environments` as te on tr.test_environment_id=te.id left join `browsers` as b on tr.browser_id=b.id left join `projects` as p on p.id=tr.project_id left join `error_types` as et on et.id=acr.error_type_id where p.name='%s' and et.name is not NULL ORDER BY `round_id` ASC """ % project_name print("generate triage history data of project: %s" % project_name) triage_history_data = db_conn.get_all_results_from_database( triage_history_sql) if len(triage_history_data) == 0: print("no triage history in project: %s" % project_name) return False else: FileHelper.save_db_query_result_to_csv(triage_history_data, file_path) print( "there are %d rows in database when query the triage history of project: %s\n" % (len(triage_history_data), project_name)) return True
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "COXRAY" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.file_helper = FileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.nodes = { "core_metadata_collection": [], "study": [], "subject": [], "observation": [], "follow_up": [], "demographic": [], "imaging_file": [], }
def get_outputevents_by_pthadmicu(self, subject_ids=None, hadm_ids=None, icustay_ids=None): """ Retrieve OUTPUTEVENTS matching the give hospital admission Pararmeters ----------- subject_ids : the list of patient id hamd_ids : the list hospital admission stay id Return ------ """ ### Conditions criteria = {} if subject_ids is not None: criteria[self.config['PREFIX_OUEV'] + 'SUBJECT_ID'] = subject_ids if hadm_ids is not None: criteria[self.config['PREFIX_OUEV'] + 'HADM_ID'] = hadm_ids if icustay_ids is not None: criteria[self.config['PREFIX_OUEV'] + 'ICUSTAY_ID'] = icustay_ids ### Read only 100 000 rows # criteria['nrows'] = 4500000 outputevs = OutputEvent(**self.config) df_outputevs = outputevs.get_outputevents_by_pthadmicu(criteria) ### Save OUTPUTEVENTS to file filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['OUTPUTEVENTS'] FileHelper.save_to_csv(df_outputevs, filename) return df_outputevs
class COXRAY_FILE(base.BaseETL): def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "COXRAY" self.file_helper = FileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) def files_to_submissions(self): for image_filepath in Path(COXRAY_DATA_PATH).joinpath( "images").iterdir(): did, rev, md5, size = self.file_helper.find_by_name( image_filepath.name) if not did: guid = self.file_helper.upload_file(image_filepath) print(f"file {image_filepath.name} uploaded with guid: {guid}") else: print( f"file {image_filepath.name} exists in indexd... skipping..." ) def submit_metadata(self): pass
def generate_test_round_results_data(db_conn, file_path, round_id=None, script_id=None, case_id=None): # test_round_results_sql = "SELECT * FROM automation_case_results where automation_script_result_id in (2609677, 2609831, 2609879, 2609971, 2610080, 2610095, 2610333, 2610366, 2610380, 2610415, 2609629, 2609636, 2609638, 2609644, 2609651, 2609663);" if case_id: test_round_results_sql = "SELECT * FROM automation_case_results where id=%d;" % int( case_id) elif script_id: test_round_results_sql = "SELECT * FROM automation_case_results where automation_script_result_id=%d;" % int( script_id) else: test_round_results_sql = "SELECT * FROM automation_case_results where automation_script_result_id in (select id from automation_script_results where test_round_id=%d);" % int( round_id) print("generate test round all results data") test_round_results = db_conn.get_all_results_from_database( test_round_results_sql) if len(test_round_results) == 0: print("no result in this test round with id: %d" % int(round_id)) return False else: FileHelper.save_db_query_result_to_csv(test_round_results, file_path) print( "there are %d rows in database when query the round all results\n" % len(test_round_results)) return True
def __output_num_patient_by_limit(self, df_pt_adm_icu_outevs_charevs): if self.config['PARAM']['LIMIT_NUM_PATIENT'] > 0: num_patients = self.config['PARAM']['LIMIT_NUM_PATIENT'] # Get unique subject_id from columns SUBJECT_ID list_unique_subject_id = df_pt_adm_icu_outevs_charevs[ 'SUBJECT_ID'].unique().tolist() # Generate a uniform random sample from np.arange(len) of size num_patients ran_idx = np.random.choice(len(list_unique_subject_id), num_patients) # Get values of SUBJECT_ID using ran_idx and column name ran_subject_id = [ df_pt_adm_icu_outevs_charevs['SUBJECT_ID'].iloc[idx] for idx in ran_idx ] # Filter only matching subject_id in the list mask = df_pt_adm_icu_outevs_charevs['SUBJECT_ID'].isin( ran_subject_id) df_events_by_patient = df_pt_adm_icu_outevs_charevs[mask] # Save to File filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][ 'OUT_LIMIT_NUM_EVENTS_WINSIZE_24H'] FileHelper.save_to_csv(df_events_by_patient, filename)
def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "COXRAY" self.file_helper = FileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, )
def generate_regression_history_data(db_conn, project_id, file_path): generate_flag = Config.load_env("generate_regression_history") if not os.path.exists(file_path): generate_flag = True if generate_flag: print("generate history regression data") # select history data of 12 month for reference period_regression_sql = "select * from test_rounds where project_id=%d and DATE_SUB(CURDATE(), INTERVAL 12 MONTH) <= date(start_time) and end_time is not NULL;" % int(project_id) period_regression_history = db_conn.get_all_results_from_database(period_regression_sql) FileHelper.save_db_query_result_to_csv(period_regression_history, file_path) print("there are %d rows in database when query the history\n" % len(period_regression_history)) else: print("NOT generate history regression data\n")
def convert_code(): args = arg_parser.parse_args() source_filename = args.source destination_filename = args.dest result = inputs.json_file(source_filename, converters.to_html) if destination_filename is None: print(result) else: FileHelper.write_to_file(destination_filename, result) print( f'OK. JSON File `{source_filename}` converted to `{destination_filename}`' )
def get_patients_by_hamd(self, ids): """ Get Patients based on list of SUBJECT ID """ patient = Patient(**self.config) df_pts = patient.get_patients_by_ids(ids) ### Save Patients to file filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['PATIENTS'] FileHelper.save_to_csv(df_pts, filename) return df_pts
def generate_test_round_errors_data(db_conn, round_id, file_path): test_round_errors_sql = "SELECT * FROM automation_case_results where automation_script_result_id in (select id from automation_script_results where test_round_id=%d) and result = 'failed';" % int( round_id) print("generate test round errors data") test_round_errors = db_conn.get_all_results_from_database( test_round_errors_sql) if len(test_round_errors) == 0: print("no errors in this test round with id: %d" % int(round_id)) return False else: FileHelper.save_db_query_result_to_csv(test_round_errors, file_path) print("there are %d rows in database when query the round error\n" % len(test_round_errors)) return True
def generate_triage_history_data(db_conn, project_name, file_path): # triage_history_sql = "SELECT * FROM `automation_case_results` where triage_result is not NULL and error_type_id in (select id from error_types where name in ('Product Error', 'Product Change')) and automation_script_result_id in (select id from automation_script_results where triage_result is not NULL and automation_script_id in (select id from automation_scripts where project_id=2))" # triage_history_sql = "SELECT * FROM `automation_case_results` where error_type_id in (select id from error_types) and automation_script_result_id in (select id from automation_script_results where automation_script_id in (select id from automation_scripts where project_id=2))" triage_history_sql = "select * from prejudge_seeds where project_name='%s'" % project_name print("generate triage history data") triage_history = db_conn.get_all_results_from_database(triage_history_sql) if len(triage_history) == 0: print("no triage history in project: %s" % project_name) return False else: FileHelper.save_db_query_result_to_csv(triage_history, file_path) print( "there are %d rows in database when query the triage history of project: %s\n" % (len(triage_history), project_name)) return True
def merge_df(self, df_left, df_right, left, right, how, out_filename=None): """ Merge dataframe """ ### Merge 2 tables Patients, Admissions and ICU Stays result = pd.merge(df_left, df_right, left_on=left, right_on=right, how=how) ### Save Admissions to file if out_filename is not None: ### filename = self.config['OUT_DIR_S1'] + out_filename FileHelper.save_to_csv(result, out_filename) else: filename = self.config['OUT_DIR_S1'] + 'merged_df.csv' FileHelper.save_to_csv(result, filename) return result
def get_adms(self, criteria=None): """ Read admissions groupby date and Choose admissions of the year during which contains biggest number of admission """ ### criteria = {'nrows':10} admission = Admission(**self.config) ### Read admissions groupby date and ### Choose admissions of the year during which contains biggest number of admissions # df_adms = admission.get_admissions_by_year(criteria) df_adms = admission.get_admissions(criteria) ### Limit number of patients based on condition LIMIT_NUM_PATIENT df_adms = self.__shape_num_patient_by_limit(df_adms) filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['ADMISSIONS'] FileHelper.save_to_csv(df_adms, filename) return df_adms
def get_cptevents_by_phamd(self, subject_ids, hadm_ids): """ Get CPTEVENTS by SUBJECT_ID AND HADM_ID """ criteria = {} criteria['SUBJECT_ID'] = subject_ids criteria['HADM_ID'] = hadm_ids cptevent = CPTEvent(**self.config) df_cptevents = cptevent.get_cptevents_by_subject_hamd(criteria) ### Save ICUStays filtered by Patients and Admissions filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['ICUSTAYS'] FileHelper.save_to_csv(df_cptevents, filename) return df_cptevents
def get_icustays_by_pthamd(self, subject_ids, hadm_ids): """ Get CPTEVENTS by SUBJECT_ID AND HADM_ID """ ### Conditions criteria = {} criteria[self.config['PREFIX_ICU'] + 'SUBJECT_ID'] = subject_ids criteria[self.config['PREFIX_ICU'] + 'HADM_ID'] = hadm_ids icustay = ICUStay(**self.config) df_icustays = icustay.get_icustays_by_subject_hamd(criteria) ### Save ICUStays filtered by Patients and Admissions filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['ICUSTAYS'] FileHelper.save_to_csv(df_icustays, filename) return df_icustays
def movefile_ins1_to_outs2(self): """ Move files from Output Step 1 to Input Step 2 Three files to move: OUT_PTS_ADMS_ICUS, OUT_PTS_ADMS_ICUS, OUT_CHARTEVENTS """ files_to_move = [] files_to_move.append(self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['PTS_ADMS_ICU']) files_to_move.append(self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['CHARTEVENTS']) files_to_move.append(self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['OUTPUTEVENTS']) move_to = [] move_to.append(self.config['FILE_DIR_S2'] + self.config['IN_FNAME']['CSV_OUT_PTS_ADMS_ICUS']) move_to.append(self.config['FILE_DIR_S2'] + self.config['IN_FNAME']['CSV_OUT_CHARTEVENTS']) move_to.append(self.config['FILE_DIR_S2'] + self.config['IN_FNAME']['CSV_OUT_OUTPUTEVENTS']) ### Move from Output Step 1 to Input Step 2 for idx, src_file in enumerate(files_to_move): FileHelper.move_file(src_file, move_to[idx])
def get_ditems_outevents_by_itemid(self, item_id): """ Retrieve D_ITEMS of outputevents by item_id """ ### Conditions criteria = {} criteria[self.config['PREFIX_DITEM'] + 'ITEMID'] = item_id ### Read only 100 000 rows ### criteria['nrows'] = 4500000 ditem = DItem(**self.config) df_ditems = ditem.get_ditems_outevents_by_itemid(criteria) ### Save Admissions to file filename = self.config['OUT_DIR_S1'] + self.config['OUT_FNAME']['D_ITEMS'] FileHelper.save_to_csv(df_ditems, filename) return df_ditems
def execute(self, cmd): if len(cmd.cmd_simple_args) <= 0 or len(cmd.cmd_simple_args) > 1: print("alias: 命令参数个数错误") return ShellStatus.RUN arg = cmd.cmd_simple_args[0].split('=') alias_cmd = arg[0] if '"' in cmd.raw_cmd: raw_cmd = cmd.raw_cmd[cmd.raw_cmd.index('"') + 1:cmd.raw_cmd.rfind('"')] else: raw_cmd = arg[1] # 如果有 w 选项则将别名写入.yashrc if 'w' in cmd.cmd_options: content = '\nalias %s="%s"\n' % (alias_cmd, raw_cmd) FileHelper.write_file_from_string( cmd.env['runtime_config_file_name'], content, 'w+') cmd.env['alias'][alias_cmd] = raw_cmd return ShellStatus.RUN
class COXRAY(base.BaseETL): def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "COXRAY" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.file_helper = FileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.nodes = { "core_metadata_collection": [], "study": [], "subject": [], "observation": [], "follow_up": [], "demographic": [], "imaging_file": [], } def files_to_submissions(self): with open(Path(COXRAY_DATA_PATH).joinpath("metadata.csv")) as f: reader = csv.reader(f, delimiter=",", quotechar='"') headers = next(reader) for row in reader: row_nodes = self.parse_row(headers, row) for k, v in row_nodes.items(): self.nodes[k].append(v) def parse_row(self, headers, row): cmc_submitter_id = format_submitter_id("cmc_coxray", {}) subject_submitter_id = format_submitter_id( "subject_coxray", {"patientid": row[headers.index("patientid")]}) observation_submitter_id = derived_submitter_id( subject_submitter_id, "subject_coxray", "observation_coxray", {}) follow_up_submitter_id = derived_submitter_id( subject_submitter_id, "subject_coxray", "follow_up_coxray", {"offset": row[headers.index("offset")]}, ) demographic_submitter_id = derived_submitter_id( subject_submitter_id, "subject_coxray", "demographic_coxray", {}) imaging_file_submitter_id = format_submitter_id( "imaging_file_coxray", {"filename": row[headers.index("filename")]}) study_submitter_id = format_submitter_id( "study_coxray", {"doi": row[headers.index("doi")]}) filename = row[headers.index("filename")] filename = Path(filename) filepath = Path(COXRAY_DATA_PATH).joinpath("images", filename) filepath_exist = filepath.exists() nodes = { "core_metadata_collection": { "submitter_id": cmc_submitter_id, "projects": [{ "code": self.project_code }], }, "study": { "submitter_id": study_submitter_id, "projects": [{ "code": self.project_code }], }, "subject": { "submitter_id": subject_submitter_id, "projects": [{ "code": self.project_code }], "studies": [{ "submitter_id": study_submitter_id }], }, "observation": { "submitter_id": observation_submitter_id, "subjects": [{ "submitter_id": subject_submitter_id }], }, "follow_up": { "submitter_id": follow_up_submitter_id, "subjects": [{ "submitter_id": subject_submitter_id }], }, "demographic": { "submitter_id": demographic_submitter_id, "subjects": [{ "submitter_id": subject_submitter_id }], }, } if filepath_exist: data_type = "".join(filename.suffixes) did, rev, md5sum, filesize = self.file_helper.find_by_name( filename=filename) assert ( did ), f"file {filename} does not exist in the index, rerun COXRAY_FILE ETL" self.file_helper.update_authz(did=did, rev=rev) nodes["imaging_file"] = { "submitter_id": imaging_file_submitter_id, "subjects": [{ "submitter_id": subject_submitter_id }], "follow_ups": [{ "submitter_id": follow_up_submitter_id }], "core_metadata_collections": [{ "submitter_id": cmc_submitter_id }], "data_type": data_type, "data_format": "Image File", "data_category": "X-Ray Image", "file_size": filesize, "md5sum": md5sum, "object_id": did, } else: print( f"subject references the file that doesn't exist as a file: {filepath}" ) for k, (node, field, converter) in fields_mapping.items(): value = row[headers.index(k)] if node in nodes and value: if converter: nodes[node][field] = converter(value) else: nodes[node][field] = value return nodes def submit_metadata(self): print("Submitting data...") for k, v in self.nodes.items(): submitter_id_exist = [] print(f"Submitting {k} data...") for node in v: node_record = {"type": k} node_record.update(node) submitter_id = node_record["submitter_id"] if submitter_id not in submitter_id_exist: submitter_id_exist.append(submitter_id) self.metadata_helper.add_record_to_submit(node_record) self.metadata_helper.batch_submit_records()
class CHESTXRAY8(base.BaseETL): def __init__(self, base_url, access_token, s3_bucket): super().__init__(base_url, access_token, s3_bucket) self.program_name = "open" self.project_code = "ChestX-ray8" self.metadata_helper = MetadataHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.file_helper = FileHelper( base_url=self.base_url, program_name=self.program_name, project_code=self.project_code, access_token=access_token, ) self.cmc_submitter_id = format_submitter_id("cmc_chestxray8", {}) self.core_metadata_collection = [{ "submitter_id": self.cmc_submitter_id, "projects": [{ "code": self.project_code }], }] self.imaging_file = [] def files_to_submissions(self): for image_type in ("No_findings", "Pneumonia"): for image_filepath in ( Path(CHESTXRAY8_DATA_PATH).joinpath("COVID-19").joinpath( "X-Ray Image DataSet").joinpath(image_type).iterdir()): did, rev, md5, size = self.file_helper.find_by_name( image_filepath.name) if not did: guid = self.file_helper.upload_file(image_filepath) print( f"file {image_filepath.name} uploaded with guid: {guid}" ) else: print( f"file {image_filepath.name} exists in indexd... skipping..." ) imaging_file_submitter_id = format_submitter_id( "imaging_file_chestxray8", {"filename": image_filepath.name}) uploaded_imaging_file = { "submitter_id": imaging_file_submitter_id, "core_metadata_collections": [{ "submitter_id": self.cmc_submitter_id }], "data_type": "PNG", "data_format": "Image File", "data_category": "X-Ray Image", "file_name": image_filepath.name, "file_size": size, "md5sum": md5, "object_id": did, "clinical_notes": image_type, } self.imaging_file.append(uploaded_imaging_file) def submit_metadata(self): print("Submitting data...") print("Submitting core_metadata_collection data") for cmc in self.core_metadata_collection: cmc_record = {"type": "core_metadata_collection"} cmc_record.update(cmc) self.metadata_helper.add_record_to_submit(cmc_record) self.metadata_helper.batch_submit_records() print("Submitting imaging_file data") for ifile in self.imaging_file: if_record = {"type": "imaging_file"} if_record.update(ifile) self.metadata_helper.add_record_to_submit(if_record) self.metadata_helper.batch_submit_records()
def start_process(self): """ Read data from tables: PATIENTS, ADMISSIONS, CPTEVENT """ # Read admissions groupby date and df_pt_adm_icus = self.get_pt_hamd_icus() df_output_evs = self.get_outputevents() # Save the dico to dataframe pros_by_window = self.__grouppros_by_interval( df_pt_adm_icus, df_output_evs, ev_unit=self.config['CONST']['HUNIT_ICU']) dico_pros_dt, index = self.__pros_todico(pros_by_window) df_temp = pd.DataFrame(dico_pros_dt, index=index) # Save Admissions to file # Filename of compiling tables: PATIENTS, ADMISSIONS, ICUSTAYS filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][ 'OE_BY_DATE_INTERVAL'] FileHelper.save_to_csv(df_temp, filename) # Merge Tables: PATIENTS, ADMISSIONS and ICUSTAYS # Conditions: # SUBJECT_ID : Patient's ID # HADM_SUBJECT_ID: Patient's ID of table ADMISSIONS col_subject_id = self.config['PREFIX_HADM'] + 'SUBJECT_ID' col_hadm_id = self.config['PREFIX_HADM'] + 'HADM_ID' col_icustay_id = self.config['PREFIX_ICU'] + 'ICUSTAY_ID' # Retrieve Outputevents # Conditions: # ---------- # col_subject_id, col_hadm_id, col_icustay_id left = [col_subject_id, col_hadm_id, col_icustay_id] right = ['subject_id', 'hadm_id', 'icustay_id'] filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][ 'PT_ADM_ICU_OUTEVENT'] df_pt_adm_icu_outevent = self.merge_df(df_pt_adm_icus, df_temp, left=left, right=right,\ how='right', out_filename=filename) df_chart_evs = self.get_chartevents() # Save the dico to dataframe pros_by_window_2 = self.__grouppros_by_interval(df_pt_adm_icus, df_chart_evs,\ ev_unit=self.config['CONST']['HUNIT_CHAREV'], outevent=False) dico_pros_dt2, index2 = self.__pros_todico(pros_by_window_2) df_temp2 = pd.DataFrame(dico_pros_dt2, index=index2) # Save Admissions to file filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][ 'CHEV_BY_DATE_INTERVAL'] FileHelper.save_to_csv(df_temp2, filename) # Retrieve Chartevents # Conditions: # ---------- # col_subject_id, col_hadm_id, col_icustay_id left = [col_subject_id, col_hadm_id, col_icustay_id] right = ['subject_id', 'hadm_id', 'icustay_id'] filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][ 'PT_ADM_ICUS_CHAREVS'] df_pt_adm_icu_charevs = self.merge_df(df_pt_adm_icus, df_temp2, left=left, right=right,\ how='right', out_filename=filename) # Append dataframe amd-icustay-outputevents to amd-icustay-chartevents # Conditions: # ---------- # OutputEvent (filtered by subject_id, hamd_id, icu_stay) # & ChartEvents (filtered by subject_id, hamd_id, icu_stay) df_pt_adm_icu_outevs_charevs = df_pt_adm_icu_outevent.append( df_pt_adm_icu_charevs, sort=True) # Save df_pt_adm_icu_outevs_charevs to file filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][ 'OUTEVENT_CHAREVS'] FileHelper.save_to_csv(df_pt_adm_icu_outevs_charevs, filename) # Retrieve all rows in Admissions where (subject_id, hamd_id, icu_id) do not match # (subject_id, hamd_id, icu_id) of U(outputevents, chartevents) # left = [col_subject_id, col_hadm_id, col_icustay_id] # right = ['subject_id', 'hadm_id', 'icustay_id'] temp_df = df_pt_adm_icus[~df_pt_adm_icus[col_subject_id].isin(df_pt_adm_icu_outevs_charevs['subject_id'])\ & ~df_pt_adm_icus[col_hadm_id].isin(df_pt_adm_icu_outevs_charevs['hadm_id'])\ & ~df_pt_adm_icus[col_icustay_id].isin(df_pt_adm_icu_outevs_charevs['icustay_id'])] # Add columns to temp_df # Conditions # --------- # subject_id hadm_id icustay_id unit procedure # temp_df.loc[:, 'subject_id_1'] = temp_df[col_subject_id] # temp_df.loc[:, 'hadm_id_1'] = temp_df[col_hadm_id] # temp_df.loc[:, 'icustay_id_1'] = temp_df[col_icustay_id] # temp_df.loc[:, 'unit_1'] = '' # temp_df.loc[:, 'procedure_1'] = '' # Save Admissions to file filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME'][ 'TEMP_DF'] FileHelper.save_to_csv(temp_df, filename) ########## # Final Merge ########## # df_pt_adm_icu_outevs_charevs = df_pt_adm_icu_outevs_charevs.append( temp_df, sort=True) #df_pt_adm_icu_outevs_charevs = temp_df.append(df_pt_adm_icu_outevs_charevs, sort=True) ### Count number of records self.config['PARAM']['NUM_ROWS'] = len( df_pt_adm_icu_outevs_charevs.index) # Save NUM_EVENTS_WINSIZE_24H to file # filename = self.config['OUT_DIR_S2'] + self.config['OUT_FNAME']['OUT_NUM_EVENTS_WINSIZE_24H'] filename = self.config['OUT_DIR_S2'] + str(self.config['PARAM']['LIMIT_NUM_PATIENT']) + '_' +\ self.config['OUT_FNAME']['OUT_NUM_EVENTS_WINSIZE_24H'] FileHelper.save_to_csv(df_pt_adm_icu_outevs_charevs, filename) ### Sharp number of patients for output based on the criteria self.__output_num_patient_by_limit(df_pt_adm_icu_outevs_charevs)
def predict_youtubeauto(self): """ Make predictiion for youtube auto """ # Initialize decision tree dt = DecisionTreeYoutubeAuto(**self.config) # load json from file (feature data) filename = "youtube.dash.json" file_uri = FileHelper.dataset_path(self.config, filename) json_model = FileHelper.load_model_json(file_uri) # Generate X_test dataset X_test, df_userfbs = self.dataset_youtubeauto() # # Test with Mockup data # import random # dj_max = 124326180.9 # uj_max = 128804961.5 # ul_max = 1 # dl_max = 1 # uth_max = 29335149.8038362 # dth_max = 18033919.2661197 # rtt_max = 1000 #14226774236.96 # X_test = [] # number_X_test = 5 # for i in np.arange(number_X_test): # features_1 = None # features_1 = {'idx': i,'RTT': random.uniform(0,rtt_max), 'DJ': random.uniform(0,dj_max), 'UJ':random.uniform(0, uj_max), 'DL': random.uniform(0,dl_max), 'UL': random.uniform(0, ul_max), 'DTH': random.uniform(0,dth_max), 'UTH': random.uniform(0,uth_max)} # for k, v in features_1.items(): # if not k == 'idx': # features_1[k] = round(v, 2) # features_1['Userfeedback'] = 0 # features_1['Youtube_720P'] = 0 # # features_2 = {'DTH': randint(30000,4000000) ,'RTT': randint(500,300000), 'DJ': randint(0,1000), 'DL': randint(0,1000), 'UJ':randint(0,1000), 'UL': rand(), 'UTH': randint(1000,200000)} # # features_3 = {'DTH': randint(1000000,4000000) ,'RTT': randint(1000,240000), 'DJ': randint(0,1000), 'DL': randint(0,1000), 'UJ':randint(0,1000), 'UL': rand(), 'UTH': randint(1000000,4000000)} # X_test.append(features_1) # features_1 = {'idx': number_X_test,'RTT': 172211718, 'DL': 0.25, 'UL': 0.0, 'DJ': 1462940.373, 'UJ':967358.4, \ # 'DTH': 6904033.241, 'UTH': 6688152.991, 'Userfeedback': 1, 'Youtube_720P': 1} # features_2 = {'idx': number_X_test+1,'RTT': 329342189.4, 'DL': 0.0, 'UL': 0.0, 'DJ': 2217979.68, 'UJ':4026196.84, \ # 'DTH': 206766.1582, 'UTH':1152765.337, 'Userfeedback': 1, 'Youtube_720P': 1} # features_3 = {'idx': number_X_test+2,'RTT': 14226774237, 'DL': 0.0, 'UL': 0.0, 'DJ': 78439389.08, 'UJ':124548859.2, \ # 'DTH': 41369.68321, 'UTH':34202.56526, 'Userfeedback': 1, 'Youtube_720P': 1} # features_4 = {'idx': number_X_test+3,'RTT': 14226774237, 'DL': 0.0, 'UL': 0.0, 'DJ': 78439389.08, 'UJ':124548859.2, \ # 'DTH': 41369.68321, 'UTH':34202.56526, 'Userfeedback': 1, 'Youtube_720P': 1} # features_5 = {'idx': number_X_test+4, 'RTT': 3303547790, 'DL': 0.0, 'UL': 0.0, 'DJ': 11294340.32, 'UJ': 27797712.96, \ # 'DTH': 110344.9385, 'UTH': 32443.88377, 'Userfeedback': 2, 'Youtube_720P': 2} # X_test.append(features_1) # X_test.append(features_2) # X_test.append(features_3) # X_test.append(features_4) # X_test.append(features_5) # Transform from List to Dataframe df_userfbs = pd.DataFrame(X_test) # Start our prediciton estimated_mos = dt.predict(json_model, X_test) # Create dataframe from estimated_mos # Save for testing purpose df_estimated_qoe = pd.DataFrame(estimated_mos) filename = self.config['OUT_DIR'] + 'Estimated_MOS_YoutubeAuto_New.csv' df_estimated_qoe.to_csv(filename) # Merge both X_test and estimated_QoE # Create dataframe from estimated_mos # Save for testing purpose df_merged = pd.merge(df_userfbs, df_estimated_qoe, left_index=True, right_index=True) df_merged = df_merged.drop(['idx_x', 'idx_y'], axis='columns') filename = self.config[ 'OUT_DIR'] + 'DATASET_MOSUSERFEEDBACK_QOE_YOUTUBEAUTO.csv' df_merged.to_csv(filename) # Print the results dt.print(estimated_mos) return estimated_mos