def process_smoking(self): lab_f = util.select_file(self.files, 'roken') # gather PAKJAQ data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], 'PAKJAQ', None, suffix=['smoking', 'lab_results']) self.headers = self.headers + lab_headers # gather smoking information rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], 'ROOKAQ', None, suffix=['smoking'], counter=1) self.headers = self.headers + lab_headers
def process_bloodpressure(self): lab_f = util.select_file(self.files, 'bloeddruk') # gather allergie data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], 'RRD', 3, suffix=['blood_pressure', 'lab_results']) self.headers = self.headers + lab_headers rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], 'RRS', 3, suffix=['blood_pressure', 'lab_results']) self.headers = self.headers + lab_headers
def enrich_from_file(self, in_dir): '''enrich using a data file as source''' assert (in_dir != '') files = util.list_dir_csv(in_dir) med_f = util.select_file(files, 'journaal') records = io.read_csv(med_f) headers = util.get_headers(next(records)) idx = headers.index('icpc') return self.icpc_enrichment(records, idx)
def enrich_from_file(self, in_dir): '''enrich using a data file as source''' assert(in_dir != '') files = util.list_dir_csv(in_dir) med_f = util.select_file(files, 'journaal') records = io.read_csv(med_f) headers = util.get_headers(records.next()) idx = headers.index('icpc') return self.icpc_enrichment(records, idx)
def process_labresults(self): lab_f = util.select_file(self.files, 'meetwaarden') rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], '.+', None, suffix=['lab_results']) self.headers = self.headers + lab_headers
def enrich_from_file(self, in_dir): '''enrich using a data file as source''' assert (in_dir != '') files = util.list_dir_csv(in_dir) med_f = util.select_file(files, 'medicatie') records = io.read_csv(med_f) headers = util.get_headers(records.next()) idx = headers.index('atc_code') return self.atc_enrichment(records, idx)
def process_lung_function(self): lab_f = util.select_file(self.files, 'longfunctie') # gather renal function data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], '.+', None, suffix=['lung_function', 'lab_results']) self.headers = self.headers + lab_headers
def process_cardiometabolism(self): lab_f = util.select_file(self.files, 'cardiometabool') # gather renal function data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], '[A-Z][A-Z][A-Z][A-Z][A-Z][A-Z]', None, suffix=['cardiometabolism']) self.headers = self.headers + lab_headers
def process_alcohol(self): lab_f = util.select_file(self.files, 'ggzanamnese') # gather allergie data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], 'ALCO', 4, suffix=['alcohol', 'lab_results']) self.headers = self.headers + lab_headers
def process_allergies(self): lab_f = util.select_file(self.files, 'allergie') # gather allergie data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], '.+', None, suffix=['allergies']) self.headers = self.headers + lab_headers
def process_icpc(self): comor_f = util.select_file(self.files, 'icpc') rows, fields = util.import_data( comor_f, delim=self.delim) #rows en fields zien er goed uit comor_headers, _, _, _ = self.insert_data( rows, fields, 'icpc_cat', ['dicpc_startdate', 'dicpc_enddate'], '[A-Z][0-9][0-9]', 3, suffix=['icpc']) #insert_data van sequenceprocess bij temporal, en van standardprocess bij regular #bij sequenceprocess gaat het dus fout!!! self.headers = self.headers + comor_headers
def process_actions(self): ref_f = util.select_file(self.files, 'verrichtingen') #selects 'verwijzingen' file rows, fields = util.import_data( ref_f, delim=self.delim ) #imports ref_f data and separates headers (fields) from data (rows) ref_headers, _, _, _ = self.insert_data( rows, fields, 'prestatiecode', ['dverrdate', 'dverrdate'], '[0-9][0-9[0-9][0-9][0-9]', None, suffix=['actions']) #verrichtcode was prestatiecode self.headers = self.headers + ref_headers
def process_medication(self): med_f = util.select_file(self.files, 'medicatie') #selects 'medicatie' file rows, fields = util.import_data( med_f, delim=self.delim ) #imports med_f data and separates headers from data (rows, fields(headers)) #rows hier zijn vergelijkbaar met cursor bij SQL med_headers, self.num_med, self.num_med_pos, suf = self.insert_data( rows, fields, 'atc', ['dprescdate', 'dprescdate'], '[A-Z][0-9][0-9]', 3, suffix=['atc']) #insert_data self.headers = self.headers + med_headers
def process_consults(self): consult_f = util.select_file(self.files, 'journaal') #selects 'journaal' file rows, fields = util.import_data( consult_f, delim=self.delim ) #imports consult_f data and separates headers (fields) from data (rows) consult_headers, self.num_cons, self.num_cons_pos, suf = self.insert_data( rows, fields, 'icpcprobleem', ['regdatum', 'regdatum'], '[A-Z][0-9][0-9]', 3, suffix=['consults']) self.headers = self.headers + consult_headers
def process_bmi(self): lab_f = util.select_file(self.files, 'bmi') # gather length data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], 'LNG', 3, suffix=['bmi', 'lab_results']) self.headers = self.headers + lab_headers # gather weight data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], 'GEW', 3, suffix=['bmi', 'lab_results'], counter=1) self.headers = self.headers + lab_headers # gather bmi data rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data( rows, fields, 'dmemo', ['dtestdate', 'dtestdate'], 'QUE', 3, suffix=['bmi', 'lab_results'], counter=2) self.headers = self.headers + lab_headers
def load_data(self, data, headers): data_to_append = load_obj(util.select_file(self.pickle_files, data)) print(data_to_append) self.append_known_data(data_to_append) new_headers = load_obj(util.select_file(self.pickle_files, headers)) self.headers = self.headers + new_headers
def process_csv(self, needs_processing): '''converts the specified csv's to usable data''' # get all csv's in the input folder self.files = util.list_dir_csv(self.in_dir) self.pickle_files = util.list_dir_pickle(self.in_dir) # put the IDs of the 'main' file in a dict if self.already_processed == True: try: ID_f = util.select_file(self.pickle_files, 'patient_dict') self.id2data = load_obj(ID_f) self.headers = ['ID', 'age', 'gender'] print('yyy') except TypeError: ID_f = util.select_file(self.files, 'patient') rows, fields = util.import_data(ID_f, delim=self.delim) self.headers = self.get_IDs(rows, fields) else: ID_f = util.select_file(self.files, 'patient') rows, fields = util.import_data(ID_f, delim=self.delim) self.headers = self.get_IDs(rows, fields) if self.survival == True: ID_f = util.select_file(self.files, 'icpc') rows, fields = util.import_data(ID_f, delim=self.delim) self.insert_start_baseline(rows, fields) # add stroke value to each patient if self.already_processed == True: try: stroke_f = util.select_file(self.pickle_files, 'stroke_dict') self.id2data = load_obj(stroke_f) print('xxx') except TypeError: stroke_f = util.select_file(self.files, 'icpc') rows, fields = util.import_data(stroke_f, delim=self.delim) self.get_stroke_occurrences(rows, fields) except ValueError: stroke_f = util.select_file(self.files, 'icpc') rows, fields = util.import_data(stroke_f, delim=self.delim) self.get_stroke_occurrences(rows, fields) else: # add stroke value to each patient stroke_f = util.select_file(self.files, 'icpc') rows, fields = util.import_data(stroke_f, delim=self.delim) self.get_stroke_occurrences(rows, fields) # randomize dates if non-survival if self.survival == False: self.insert_data_intervals() else: self.insert_survival_intervals() # gather data from medication csv if 'medication' in needs_processing and needs_processing['medication']: print('...processing medication') if self.already_processed == True: try: if self.survival == True: self.load_data('atc0_survival', 'atc0_headers0') else: self.load_data('atc_dict0', 'atc_headers0') except TypeError: print('Data not available, processing medication data') self.process_medication() except ValueError: print('Data not available, processing medication data') self.process_medication() else: self.process_medication() # gather data from consult csv if 'consults' in needs_processing and needs_processing['consults']: print('...processing consults') if self.already_processed == True: try: if self.survival == True: self.load_data('consults_dict0_survival', 'consults_headers0') else: self.load_data('consults_dict0', 'consults_headers0') except TypeError: print('Data not available, processing medication data') self.process_consults() except ValueError: print('Data not available, processing medication data') self.process_consults() else: self.process_consults() # gather data from verrichtingen csv if 'actions' in needs_processing and needs_processing['actions']: print('...processing actions') if self.already_processed == True: try: if self.survival == True: self.load_data('actions_dict0_survival', 'actions_headers0') else: self.load_data('actions_dict0', 'actions_headers0') except TypeError: print('Data not available, processing medication data') self.process_actions() except ValueError: print('Data not available, processing medication data') self.process_actions() else: self.process_actions() # gather data from icpc csv if 'icpc' in needs_processing and needs_processing[ 'icpc']: #IS ALLEEN DEZE GESCHIKT VOOR TEMPORAL??? print('...processing ICPC') if self.already_processed == True: try: if self.survival == True: self.load_data('icpc_dict0_survival', 'icpc_headers0') else: self.load_data('icpc_dict0', 'icpc_headers0') except TypeError: print('Data not available, processing medication data') self.process_icpc() except ValueError: print('Data not available, processing medication data') self.process_icpc() else: self.process_icpc() # gather data from lab results csv if 'lab_results' in needs_processing and needs_processing[ 'lab_results']: print('...processing lab results') if self.already_processed == True: try: if self.survival == True: self.load_data('lab_results_dict0_survival', 'lab_results_headers0') else: self.load_data('lab_results_dict0', 'lab_results_headers0') except TypeError: print('Data not available, processing medication data') self.process_labresults() except ValueError: print('Data not available, processing medication data') self.process_labresults() else: self.process_labresults() # gather data from smoking file if 'smoking' in needs_processing and needs_processing['smoking']: print('...processing smoking') if self.already_processed == True: try: if self.survival == True: self.load_data('smoking_dict0_survival', 'consults_headers0') self.load_data('smoking_dict1_survival', 'smoking_headers1') else: self.load_data('smoking_dict0', 'smoking_headers0') self.load_data('smoking_dict1', 'smoking_headers1') except TypeError: print('Data not available, processing medication data') self.process_smoking() except ValueError: print('Data not available, processing medication data') self.process_smoking() else: self.process_smoking() if 'bmi' in needs_processing and needs_processing['bmi']: print('...processing bmi') if self.already_processed == True: try: if self.survival == True: self.load_data('bmi_dict0_survival', 'bmi_headers0') self.load_data('bmi_dict1_survival', 'bmi_headers1') self.load_data('bmi_dict2_survival', 'bmi_headers2') else: self.load_data('bmi_dict0', 'bmi_headers0') self.load_data('bmi_dict1', 'bmi_headers1') self.load_data('bmi_dict2', 'bmi_headers2') except TypeError: print('Data not available, processing medication data') self.process_bmi() except ValueError: print('Data not available, processing medication data') self.process_bmi() else: self.process_bmi() if 'allergies' in needs_processing and needs_processing['allergies']: print('...processing allergies') if self.already_processed == True: try: if self.survival == True: self.load_data('allergies_dict0_survival', 'allergies_headers0') else: self.load_data('allergies_dict0', 'allergies_headers0') except TypeError: print('Data not available, processing medication data') self.process_allergies() except ValueError: print('Data not available, processing medication data') self.process_allergies() else: self.process_allergies() if 'blood_pressure' in needs_processing and needs_processing[ 'blood_pressure']: print('...processing blood pressure') if self.already_processed == True: try: if self.survival == True: self.load_data('blood_pressure_dict0_survival', 'blood_pressure_headers0') # self.load_data('blood_pressure_dict1_survival', 'blood_pressure_headers1') else: self.load_data('blood_pressure_dict0', 'blood_pressure_headers0') self.load_data('blood_pressure_dict1', 'blood_pressure_headers1') except TypeError: print('Data not available, processing medication data') self.process_bloodpressure() except ValueError: print('Data not available, processing medication data') self.process_bloodpressure() else: self.process_bloodpressure() if 'alcohol' in needs_processing and needs_processing['alcohol']: print('...processing alcohol') if self.already_processed == True: try: if self.survival == True: self.load_data('alcohol_dict0_survival', 'alcohol_headers0') else: self.load_data('alcohol_dict0', 'alcohol_headers0') except TypeError: print('Data not available, processing medication data') self.process_alcohol() except ValueError: print('Data not available, processing medication data') self.process_alcohol() else: self.process_alcohol() if 'renal_function' in needs_processing and needs_processing[ 'renal_function']: print('...processing renal function') if self.already_processed == True: try: if self.survival == True: self.load_data('renal_function_dict0_survival', 'renal_function_headers0') self.load_data('renal_function_dict0', 'renal_function_headers0') except TypeError: print('Data not available, processing medication data') self.process_renalfunction() except ValueError: print('Data not available, processing medication data') self.process_renalfunction() else: self.process_renalfunction() if 'cardiometabolism' in needs_processing and needs_processing[ 'cardiometabolism']: print('...processing cardiometabolism') if self.already_processed == True: try: if self.survival == True: self.load_data('cardiometabolism_dict0_survival', 'renal_function_headers0') else: self.load_data('cardiometabolism_dict0', 'cardiometabolism_headers0') except TypeError: print('Data not available, processing medication data') self.process_cardiometabolism() except ValueError: print('Data not available, processing medication data') self.process_cardiometabolism() else: self.process_cardiometabolism() if 'lab_blood' in needs_processing and needs_processing['lab_blood']: print('...processing lab blood') if self.already_processed == True: try: if self.survival == True: self.load_data('lab_blood_dict0_survival', 'lab_blood_headers0') else: self.load_data('lab_blood_dict0', 'lab_blood_headers0') except TypeError: print('Data not available, processing medication data') self.process_lab_blood() except ValueError: print('Data not available, processing medication data') self.process_lab_blood() else: self.process_lab_blood() if 'lung_function' in needs_processing and needs_processing[ 'lung_function']: print('...processing lung function') if self.already_processed == True: try: if self.survival == True: self.load_data('lung_function_dict0_survival', 'lung_function_headers0') else: self.load_data('lung_function_dict0', 'lung_function_headers0') except TypeError: print('Data not available, processing medication data') self.process_lung_function() except ValueError: print('Data not available, processing medication data') self.process_lung_function() else: self.process_lung_function() # move stroke indicator to end of each instance data list self.move_target_to_end_of_list() # append target element to headers, add to class var self.headers.append('target') # self.headers = headers to_remove = [] for key, d in self.id2data.items(): date_info = d['stroke_dates'] if self.survival == True: print(date_info[0]) if not isinstance(date_info[0], list): if int(str(date_info[0]).split('-')[0]) < 2007: to_remove.append(key) continue else: if str(date_info[0]) != 'negative': if int(str(date_info[0]).split('-')[0]) < 2007: to_remove.append(key) continue print(len(to_remove)) for key in to_remove: del self.id2data[key]
def process_csv(self, needs_processing): '''converts the specified csv's to usable data''' # get all csv's in the input folder files = util.list_dir_csv(self.in_dir) # put the IDs of the 'main' file in a dict ID_f = util.select_file(files, 'patient') rows, fields = util.import_data(ID_f, delim=self.delim) headers = self.get_IDs(rows, fields) # add CRC value to each patient CRC_f = util.select_file(files, 'journaal') rows, fields = util.import_data(CRC_f, delim=self.delim) self.get_CRC_occurrences(rows, fields) # randomize dates self.insert_data_intervals() # gather data from medication csv if 'medication' in needs_processing and needs_processing['medication']: print '...processing medication' med_f = util.select_file(files, 'medicatie') rows, fields = util.import_data(med_f, delim=self.delim) med_headers, self.num_med, self.num_med_pos = self.insert_data( rows, fields, 'atc_code', ['voorschrijfdatum', 'voorschrijfdatum'], '[A-Z][0-9][0-9]', 3, suffix='atc') headers = headers + med_headers # gather data from consult csv if 'consults' in needs_processing and needs_processing['consults']: print '...processing consults' consult_f = util.select_file(files, 'journaal') rows, fields = util.import_data(consult_f, delim=self.delim) consult_headers, self.num_cons, self.num_cons_pos = self.insert_data( rows, fields, 'icpc', ['datum', 'datum'], '[A-Z][0-9][0-9]', 3, incorporate_SOEP='soepcode') headers = headers + consult_headers # gather data from referral csv if 'referrals' in needs_processing and needs_processing['referrals']: print '...processing referrals' ref_f = util.select_file(files, 'verwijzing') rows, fields = util.import_data(ref_f, delim=self.delim) ref_headers, _, _ = self.insert_data(rows, fields, 'specialisme', ['datum', 'datum'], '.*', None) headers = headers + ref_headers # gather data from comorbidity csv if 'comorbidity' in needs_processing and needs_processing[ 'comorbidity']: print '...processing comorbidity' comor_f = util.select_file(files, 'comorbiditeit') rows, fields = util.import_data(comor_f, delim=self.delim) comor_headers, _, _ = self.insert_data(rows, fields, 'omschrijving', ['begindatum', 'einddatum'], '.+', None, suffix='comorbiditeit') headers = headers + comor_headers # gather data from lab results csv if 'lab_results' in needs_processing and needs_processing[ 'lab_results']: print '...processing lab results' lab_f = util.select_file(files, 'bepaling') rows, fields = util.import_data(lab_f, delim=self.delim) lab_headers, self.num_lab, self.num_lab_pos = self.insert_data( rows, fields, 'code', ['datum', 'datum'], '.+', None, suffix='lab_results') headers = headers + lab_headers # move CRC indicator to end of each instance data list self.move_target_to_end_of_list() # append target element to headers, add to class var headers.append('target') self.headers = headers