Python select_file示例，util_.util.select_file Python示例

示例#1

0

显示文件

    def process_smoking(self):
        lab_f = util.select_file(self.files, 'roken')

        # gather PAKJAQ data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            'PAKJAQ',
            None,
            suffix=['smoking', 'lab_results'])

        self.headers = self.headers + lab_headers

        # gather smoking information
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            'ROOKAQ',
            None,
            suffix=['smoking'],
            counter=1)

        self.headers = self.headers + lab_headers

示例#2

0

显示文件

    def process_bloodpressure(self):
        lab_f = util.select_file(self.files, 'bloeddruk')

        # gather allergie data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            'RRD',
            3,
            suffix=['blood_pressure', 'lab_results'])

        self.headers = self.headers + lab_headers

        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            'RRS',
            3,
            suffix=['blood_pressure', 'lab_results'])

        self.headers = self.headers + lab_headers

示例#3

0

显示文件

    def enrich_from_file(self, in_dir):
        '''enrich using a data file as source'''
        assert (in_dir != '')
        files = util.list_dir_csv(in_dir)
        med_f = util.select_file(files, 'journaal')
        records = io.read_csv(med_f)

        headers = util.get_headers(next(records))
        idx = headers.index('icpc')

        return self.icpc_enrichment(records, idx)

示例#4

0

显示文件

文件： ICPC.py 项目： MarkMenagie/EMR-pre-processing-pipeline

	def enrich_from_file(self, in_dir):
		'''enrich using a data file as source'''
		assert(in_dir != '')
		files = util.list_dir_csv(in_dir)
		med_f = util.select_file(files, 'journaal')
		records = io.read_csv(med_f)

		headers = util.get_headers(records.next())
		idx = headers.index('icpc')

		return self.icpc_enrichment(records, idx)

示例#5

0

显示文件

 def process_labresults(self):
     lab_f = util.select_file(self.files, 'meetwaarden')
     rows, fields = util.import_data(lab_f, delim=self.delim)
     lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
         rows,
         fields,
         'dmemo', ['dtestdate', 'dtestdate'],
         '.+',
         None,
         suffix=['lab_results'])
     self.headers = self.headers + lab_headers

示例#6

0

显示文件

文件： ATC.py 项目： majdzr/EMR-pre-processing-pipeline

    def enrich_from_file(self, in_dir):
        '''enrich using a data file as source'''
        assert (in_dir != '')
        files = util.list_dir_csv(in_dir)
        med_f = util.select_file(files, 'medicatie')
        records = io.read_csv(med_f)

        headers = util.get_headers(records.next())
        idx = headers.index('atc_code')

        return self.atc_enrichment(records, idx)

示例#7

0

显示文件

    def process_lung_function(self):
        lab_f = util.select_file(self.files, 'longfunctie')

        # gather renal function data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            '.+',
            None,
            suffix=['lung_function', 'lab_results'])

        self.headers = self.headers + lab_headers

示例#8

0

显示文件

    def process_cardiometabolism(self):
        lab_f = util.select_file(self.files, 'cardiometabool')

        # gather renal function data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            '[A-Z][A-Z][A-Z][A-Z][A-Z][A-Z]',
            None,
            suffix=['cardiometabolism'])

        self.headers = self.headers + lab_headers

示例#9

0

显示文件

    def process_alcohol(self):
        lab_f = util.select_file(self.files, 'ggzanamnese')

        # gather allergie data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            'ALCO',
            4,
            suffix=['alcohol', 'lab_results'])

        self.headers = self.headers + lab_headers

示例#10

0

显示文件

    def process_allergies(self):
        lab_f = util.select_file(self.files, 'allergie')

        # gather allergie data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            '.+',
            None,
            suffix=['allergies'])

        self.headers = self.headers + lab_headers

示例#11

0

显示文件

 def process_icpc(self):
     comor_f = util.select_file(self.files, 'icpc')
     rows, fields = util.import_data(
         comor_f, delim=self.delim)  #rows en fields zien er goed uit
     comor_headers, _, _, _ = self.insert_data(
         rows,
         fields,
         'icpc_cat', ['dicpc_startdate', 'dicpc_enddate'],
         '[A-Z][0-9][0-9]',
         3,
         suffix=['icpc'])
     #insert_data van sequenceprocess bij temporal, en van standardprocess bij regular
     #bij sequenceprocess gaat het dus fout!!!
     self.headers = self.headers + comor_headers

示例#12

0

显示文件

    def process_actions(self):
        ref_f = util.select_file(self.files,
                                 'verrichtingen')  #selects 'verwijzingen' file
        rows, fields = util.import_data(
            ref_f, delim=self.delim
        )  #imports ref_f data and separates headers (fields) from data (rows)

        ref_headers, _, _, _ = self.insert_data(
            rows,
            fields,
            'prestatiecode', ['dverrdate', 'dverrdate'],
            '[0-9][0-9[0-9][0-9][0-9]',
            None,
            suffix=['actions'])  #verrichtcode was prestatiecode
        self.headers = self.headers + ref_headers

示例#13

0

显示文件

    def process_medication(self):
        med_f = util.select_file(self.files,
                                 'medicatie')  #selects 'medicatie' file
        rows, fields = util.import_data(
            med_f, delim=self.delim
        )  #imports med_f data and separates headers from data (rows, fields(headers))
        #rows hier zijn vergelijkbaar met cursor bij SQL
        med_headers, self.num_med, self.num_med_pos, suf = self.insert_data(
            rows,
            fields,
            'atc', ['dprescdate', 'dprescdate'],
            '[A-Z][0-9][0-9]',
            3,
            suffix=['atc'])  #insert_data

        self.headers = self.headers + med_headers

示例#14

0

显示文件

    def process_consults(self):
        consult_f = util.select_file(self.files,
                                     'journaal')  #selects 'journaal' file
        rows, fields = util.import_data(
            consult_f, delim=self.delim
        )  #imports consult_f data and separates headers (fields) from data (rows)

        consult_headers, self.num_cons, self.num_cons_pos, suf = self.insert_data(
            rows,
            fields,
            'icpcprobleem', ['regdatum', 'regdatum'],
            '[A-Z][0-9][0-9]',
            3,
            suffix=['consults'])

        self.headers = self.headers + consult_headers

示例#15

0

显示文件

    def process_bmi(self):
        lab_f = util.select_file(self.files, 'bmi')

        # gather length data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            'LNG',
            3,
            suffix=['bmi', 'lab_results'])

        self.headers = self.headers + lab_headers

        # gather weight data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            'GEW',
            3,
            suffix=['bmi', 'lab_results'],
            counter=1)

        self.headers = self.headers + lab_headers

        # gather bmi data
        rows, fields = util.import_data(lab_f, delim=self.delim)
        lab_headers, self.num_lab, self.num_lab_pos, suf = self.insert_data(
            rows,
            fields,
            'dmemo', ['dtestdate', 'dtestdate'],
            'QUE',
            3,
            suffix=['bmi', 'lab_results'],
            counter=2)

        self.headers = self.headers + lab_headers

示例#16

0

显示文件

文件： PreProcess.py 项目： Letris/Final_stroke

 def load_data(self, data, headers):
     data_to_append = load_obj(util.select_file(self.pickle_files, data))
     print(data_to_append)
     self.append_known_data(data_to_append)
     new_headers = load_obj(util.select_file(self.pickle_files, headers))
     self.headers = self.headers + new_headers

示例#17

0

显示文件

文件： PreProcess.py 项目： Letris/Final_stroke

    def process_csv(self, needs_processing):
        '''converts the specified csv's to usable data'''

        # get all csv's in the input folder
        self.files = util.list_dir_csv(self.in_dir)

        self.pickle_files = util.list_dir_pickle(self.in_dir)

        # put the IDs of the 'main' file in a dict
        if self.already_processed == True:
            try:
                ID_f = util.select_file(self.pickle_files, 'patient_dict')
                self.id2data = load_obj(ID_f)
                self.headers = ['ID', 'age', 'gender']
                print('yyy')
            except TypeError:
                ID_f = util.select_file(self.files, 'patient')
                rows, fields = util.import_data(ID_f, delim=self.delim)
                self.headers = self.get_IDs(rows, fields)

        else:
            ID_f = util.select_file(self.files, 'patient')
            rows, fields = util.import_data(ID_f, delim=self.delim)
            self.headers = self.get_IDs(rows, fields)

            if self.survival == True:
                ID_f = util.select_file(self.files, 'icpc')
                rows, fields = util.import_data(ID_f, delim=self.delim)
                self.insert_start_baseline(rows, fields)

        # add stroke value to each patient
        if self.already_processed == True:
            try:
                stroke_f = util.select_file(self.pickle_files, 'stroke_dict')
                self.id2data = load_obj(stroke_f)
                print('xxx')

            except TypeError:
                stroke_f = util.select_file(self.files, 'icpc')
                rows, fields = util.import_data(stroke_f, delim=self.delim)
                self.get_stroke_occurrences(rows, fields)
            except ValueError:
                stroke_f = util.select_file(self.files, 'icpc')
                rows, fields = util.import_data(stroke_f, delim=self.delim)
                self.get_stroke_occurrences(rows, fields)

        else:
            # add stroke value to each patient
            stroke_f = util.select_file(self.files, 'icpc')
            rows, fields = util.import_data(stroke_f, delim=self.delim)
            self.get_stroke_occurrences(rows, fields)

        # randomize dates if non-survival
        if self.survival == False:
            self.insert_data_intervals()
        else:
            self.insert_survival_intervals()

        # gather data from medication csv
        if 'medication' in needs_processing and needs_processing['medication']:
            print('...processing medication')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('atc0_survival', 'atc0_headers0')
                    else:
                        self.load_data('atc_dict0', 'atc_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_medication()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_medication()

            else:
                self.process_medication()

        # gather data from consult csv
        if 'consults' in needs_processing and needs_processing['consults']:
            print('...processing consults')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('consults_dict0_survival',
                                       'consults_headers0')
                    else:
                        self.load_data('consults_dict0', 'consults_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_consults()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_consults()

            else:
                self.process_consults()

        # gather data from verrichtingen csv
        if 'actions' in needs_processing and needs_processing['actions']:
            print('...processing actions')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('actions_dict0_survival',
                                       'actions_headers0')
                    else:
                        self.load_data('actions_dict0', 'actions_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_actions()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_actions()

            else:
                self.process_actions()

        # gather data from icpc csv
        if 'icpc' in needs_processing and needs_processing[
                'icpc']:  #IS ALLEEN DEZE GESCHIKT VOOR TEMPORAL???
            print('...processing ICPC')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('icpc_dict0_survival', 'icpc_headers0')
                    else:
                        self.load_data('icpc_dict0', 'icpc_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_icpc()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_icpc()

            else:
                self.process_icpc()

        # gather data from lab results csv
        if 'lab_results' in needs_processing and needs_processing[
                'lab_results']:
            print('...processing lab results')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('lab_results_dict0_survival',
                                       'lab_results_headers0')
                    else:
                        self.load_data('lab_results_dict0',
                                       'lab_results_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_labresults()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_labresults()

            else:
                self.process_labresults()

        # gather data from smoking file
        if 'smoking' in needs_processing and needs_processing['smoking']:
            print('...processing smoking')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('smoking_dict0_survival',
                                       'consults_headers0')
                        self.load_data('smoking_dict1_survival',
                                       'smoking_headers1')
                    else:
                        self.load_data('smoking_dict0', 'smoking_headers0')
                        self.load_data('smoking_dict1', 'smoking_headers1')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_smoking()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_smoking()

            else:
                self.process_smoking()

        if 'bmi' in needs_processing and needs_processing['bmi']:
            print('...processing bmi')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('bmi_dict0_survival', 'bmi_headers0')
                        self.load_data('bmi_dict1_survival', 'bmi_headers1')
                        self.load_data('bmi_dict2_survival', 'bmi_headers2')
                    else:
                        self.load_data('bmi_dict0', 'bmi_headers0')
                        self.load_data('bmi_dict1', 'bmi_headers1')
                        self.load_data('bmi_dict2', 'bmi_headers2')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_bmi()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_bmi()

            else:
                self.process_bmi()

        if 'allergies' in needs_processing and needs_processing['allergies']:
            print('...processing allergies')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('allergies_dict0_survival',
                                       'allergies_headers0')
                    else:
                        self.load_data('allergies_dict0', 'allergies_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_allergies()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_allergies()

            else:
                self.process_allergies()

        if 'blood_pressure' in needs_processing and needs_processing[
                'blood_pressure']:
            print('...processing blood pressure')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('blood_pressure_dict0_survival',
                                       'blood_pressure_headers0')
                        # self.load_data('blood_pressure_dict1_survival', 'blood_pressure_headers1')
                    else:
                        self.load_data('blood_pressure_dict0',
                                       'blood_pressure_headers0')
                        self.load_data('blood_pressure_dict1',
                                       'blood_pressure_headers1')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_bloodpressure()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_bloodpressure()

            else:
                self.process_bloodpressure()

        if 'alcohol' in needs_processing and needs_processing['alcohol']:
            print('...processing alcohol')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('alcohol_dict0_survival',
                                       'alcohol_headers0')
                    else:
                        self.load_data('alcohol_dict0', 'alcohol_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_alcohol()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_alcohol()

            else:
                self.process_alcohol()

        if 'renal_function' in needs_processing and needs_processing[
                'renal_function']:
            print('...processing renal function')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('renal_function_dict0_survival',
                                       'renal_function_headers0')
                    self.load_data('renal_function_dict0',
                                   'renal_function_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_renalfunction()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_renalfunction()

            else:
                self.process_renalfunction()

        if 'cardiometabolism' in needs_processing and needs_processing[
                'cardiometabolism']:
            print('...processing cardiometabolism')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('cardiometabolism_dict0_survival',
                                       'renal_function_headers0')
                    else:
                        self.load_data('cardiometabolism_dict0',
                                       'cardiometabolism_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_cardiometabolism()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_cardiometabolism()

            else:
                self.process_cardiometabolism()

        if 'lab_blood' in needs_processing and needs_processing['lab_blood']:
            print('...processing lab blood')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('lab_blood_dict0_survival',
                                       'lab_blood_headers0')
                    else:
                        self.load_data('lab_blood_dict0', 'lab_blood_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_lab_blood()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_lab_blood()

            else:
                self.process_lab_blood()

        if 'lung_function' in needs_processing and needs_processing[
                'lung_function']:
            print('...processing lung function')
            if self.already_processed == True:
                try:
                    if self.survival == True:
                        self.load_data('lung_function_dict0_survival',
                                       'lung_function_headers0')
                    else:
                        self.load_data('lung_function_dict0',
                                       'lung_function_headers0')

                except TypeError:
                    print('Data not available, processing medication data')
                    self.process_lung_function()

                except ValueError:
                    print('Data not available, processing medication data')
                    self.process_lung_function()

            else:
                self.process_lung_function()

        # move stroke indicator to end of each instance data list
        self.move_target_to_end_of_list()

        # append target element to headers, add to class var
        self.headers.append('target')
        # self.headers = headers

        to_remove = []

        for key, d in self.id2data.items():
            date_info = d['stroke_dates']
            if self.survival == True:
                print(date_info[0])
                if not isinstance(date_info[0], list):
                    if int(str(date_info[0]).split('-')[0]) < 2007:
                        to_remove.append(key)
                        continue

            else:
                if str(date_info[0]) != 'negative':
                    if int(str(date_info[0]).split('-')[0]) < 2007:
                        to_remove.append(key)
                        continue

        print(len(to_remove))
        for key in to_remove:
            del self.id2data[key]

示例#18

0

显示文件

    def process_csv(self, needs_processing):
        '''converts the specified csv's to usable data'''

        # get all csv's in the input folder
        files = util.list_dir_csv(self.in_dir)

        # put the IDs of the 'main' file in a dict
        ID_f = util.select_file(files, 'patient')
        rows, fields = util.import_data(ID_f, delim=self.delim)
        headers = self.get_IDs(rows, fields)

        # add CRC value to each patient
        CRC_f = util.select_file(files, 'journaal')
        rows, fields = util.import_data(CRC_f, delim=self.delim)
        self.get_CRC_occurrences(rows, fields)

        # randomize dates
        self.insert_data_intervals()

        # gather data from medication csv
        if 'medication' in needs_processing and needs_processing['medication']:
            print '...processing medication'
            med_f = util.select_file(files, 'medicatie')
            rows, fields = util.import_data(med_f, delim=self.delim)
            med_headers, self.num_med, self.num_med_pos = self.insert_data(
                rows,
                fields,
                'atc_code', ['voorschrijfdatum', 'voorschrijfdatum'],
                '[A-Z][0-9][0-9]',
                3,
                suffix='atc')
            headers = headers + med_headers

        # gather data from consult csv
        if 'consults' in needs_processing and needs_processing['consults']:
            print '...processing consults'
            consult_f = util.select_file(files, 'journaal')
            rows, fields = util.import_data(consult_f, delim=self.delim)
            consult_headers, self.num_cons, self.num_cons_pos = self.insert_data(
                rows,
                fields,
                'icpc', ['datum', 'datum'],
                '[A-Z][0-9][0-9]',
                3,
                incorporate_SOEP='soepcode')
            headers = headers + consult_headers

        # gather data from referral csv
        if 'referrals' in needs_processing and needs_processing['referrals']:
            print '...processing referrals'
            ref_f = util.select_file(files, 'verwijzing')
            rows, fields = util.import_data(ref_f, delim=self.delim)
            ref_headers, _, _ = self.insert_data(rows, fields, 'specialisme',
                                                 ['datum', 'datum'], '.*',
                                                 None)
            headers = headers + ref_headers

        # gather data from comorbidity csv
        if 'comorbidity' in needs_processing and needs_processing[
                'comorbidity']:
            print '...processing comorbidity'
            comor_f = util.select_file(files, 'comorbiditeit')
            rows, fields = util.import_data(comor_f, delim=self.delim)
            comor_headers, _, _ = self.insert_data(rows,
                                                   fields,
                                                   'omschrijving',
                                                   ['begindatum', 'einddatum'],
                                                   '.+',
                                                   None,
                                                   suffix='comorbiditeit')
            headers = headers + comor_headers

        # gather data from lab results csv
        if 'lab_results' in needs_processing and needs_processing[
                'lab_results']:
            print '...processing lab results'
            lab_f = util.select_file(files, 'bepaling')
            rows, fields = util.import_data(lab_f, delim=self.delim)
            lab_headers, self.num_lab, self.num_lab_pos = self.insert_data(
                rows,
                fields,
                'code', ['datum', 'datum'],
                '.+',
                None,
                suffix='lab_results')
            headers = headers + lab_headers

        # move CRC indicator to end of each instance data list
        self.move_target_to_end_of_list()

        # append target element to headers, add to class var
        headers.append('target')
        self.headers = headers