def next_row(line, output_row, output_column_names, field_index): """You may modify this function.""" # Get Age for each admission. Make sure to convert to float # Can also do float(opf.get_patient_vals(line, icd_year_index)[0]) to get the first value icd_year = float(line[field_index['ICD_YEAR']]) opcs_year = float(line[field_index['OPCS_YEAR']]) # Get patient values for each type of code icd10_line = opf.get_patient_vals(line, field_index['ICD10']) icd9_line = opf.get_patient_vals(line, field_index['ICD9']) opcs_line = opf.get_patient_vals(line, field_index['OPCS']) for d in diseases: output_row[d] = 2000 # Defaults min_age to 2000 # Returns ICD or OPCS codes matching disease icd10_disease_codes = opf.match_codes(diseases[d]['ICD10'], icd10_line) icd9_disease_codes = opf.match_codes(diseases[d]['ICD9'], icd9_line) opcs_disease_codes = opf.match_codes(diseases[d]['OPCS'], opcs_line) # Gets the minimum age if codes are present if len(icd10_disease_codes) + len(icd9_disease_codes) > 0: output_row[d] = min(output_row[d], icd_year) if len(opcs_disease_codes) > 0: output_row[d] = min(output_row[d], opcs_year) return output_row
def next_row(line, output_row, output_column_names, field_index): """You may modify this function.""" # Get Age for each admission. Make sure to convert to float # Can also do float(opf.get_patient_vals(line, icd_year_index)[0]) to get the first value # pop() returns the first element in a set. The function get_patient_vals always returns a set of unique values. If there are no non-blank values, it will return an empty set. Here we check if the set is empty, and assign age_at_icd_code to 2000 if there is no available age. age_at_icd_code = 2000 age_list = opf.get_patient_vals(line, field_index['ICD_YEAR']) if len(age_list) != 0: age_at_icd_code = age_list.pop() # Get patient values for each type of code icd10_patient_values = opf.get_patient_vals(line, field_index['ICD10']) icd9_patient_values = opf.get_patient_vals(line, field_index['ICD9']) # Find AFIB Codes and get the ages at which the patients were diagnosed ---- output_row['AFIB_Codes'] = ','.join( opf.match_codes(afib_icd10_codes, icd10_patient_values) + opf.match_codes(afib_icd9_codes, icd9_patient_values)) if len(output_row['AFIB_Codes']) > 0: output_row['Earliest_Afib_Diagnosis'] = age_at_icd_code else: output_row['Earliest_Afib_Diagnosis'] = 2000 # -------------------------------------------------------------------------- # Find 'Other' Codes and Translate to Malformations. Only include malformations that meet age criteria and return age if it meets criteria ---------------------------------------- other_icd10_mals = opf.match_codes(icd10_mapping, icd10_patient_values) output_row['Other_Malformations'] = [] for malformation in other_icd10_mals: if float(age_at_icd_code) < code_age_mapping[malformation]: output_row['Other_Malformations'].append(malformation) output_row['Other_Malformations'] = ','.join( output_row['Other_Malformations']) if len(output_row['Other_Malformations']) > 0: output_row['Minimum_Age_At_Other_Malformations'] = age_at_icd_code else: output_row['Minimum_Age_At_Other_Malformations'] = 2000 # -------------------------------------------------------------------------- return output_row
def next_row(line, output_row, output_column_names, field_index): """This function lets you handle each row in the biobank file one at a time. Expects a dictionary of values that will become one line of output in your output file. All biobank values will be of type 'string', make sure to convert to float or int when comparing numerical values but not codes that may start with 0 (e.g. - ICD9). You may modify this function.""" # This will output all of the patient's codes icd10_patient_values = opf.get_patient_vals(line, field_index['ICD10']) icd9_patient_values = opf.get_patient_vals(line, field_index['ICD9']) selfrep_mc_patient_values = opf.get_patient_vals(line, field_index['SelfRep_MC']) selfrep_mc_patient_values_with_ages = opf.get_patient_vals( line, field_index['SelfRep_MC'], return_both=True) output_row['Year_Of_Birth'] = [ c for c in opf.get_patient_vals(line, field_index['Year_Of_Birth']) ][0] # ------------------------------------------------------------------------------- for code in single_icd10_codes_of_interest: matching_values = opf.match_codes([code], icd10_patient_values) output_row[code] = 0 if len(matching_values) > 0: output_row[code] = 1 # ------------------------------------------------------------------------------- # Find Matching Stroke Codes stroke_values = opf.match_codes( stroke_codes['ICD9'], icd9_patient_values) + opf.match_codes( stroke_codes['ICD10'], icd10_patient_values) + opf.match_codes( stroke_codes['SelfRep_MC'], selfrep_mc_patient_values) output_row['Stroke'] = 0 if len(stroke_values) > 0: output_row['Stroke'] = 1 # ------------------------------------------------------------------------------- selfrep_mc_min_age, selfrep_mc_codes_with_age_met = opf.get_min_age( selfrep_mc_patient_values_with_ages, selfreported_med_codes, age_criteria_dict=selfreported_code_age_mapping) output_row['Minimum_Age_At_SelfReported_Malformation'] = selfrep_mc_min_age # To Translate the self-reported codes to malformations, you can do the following or translate them yourself. The 'join' function turns the list into a comma-separated string of malformations. output_row['SelfReported_Malformations'] = ','.join( opf.match_codes(selfreported_med_codes, selfrep_mc_codes_with_age_met)) """ # We'll show you what the above output looks like without translation for a patient that has the codes. The program will exit after printing. It may have to process a few lines before printing. This is just for understanding the program, you can remove the following. if len(selfrep_mc_codes_with_age_met) > 1: print '\n----------------------------' print 'SelfReported Medical Codes:' print selfrep_mc_codes_with_age_met print '\n' print 'Self-Reported Medical Malformations:' print opf.match_codes(selfreported_med_codes, selfrep_mc_codes_with_age_met) print '\n' print 'Self-Reported Medical Malformations as Comma-Separated String:' print output_row['SelfReported_Malformations'] print '\n' print 'Minimum Age For Self-Reported Codes:' print output_row['Minimum_Age_At_SelfReported_Malformation'] print '----------------------------' sys.exit() """ return output_row
) + ' rows in the UKB file. The program will output the total number of lines processed below: ' # 'e' is for displaying how many patient rows we have processed for e, line in enumerate(lines): line = line.split( '\t') # We get a list of values after tab-separating the row. # Writes the Patient ID to a new line in the file. If there are no more non-blank lines in the file, continue to the next portion newline = {'Patient_ID': line[0]} if newline['Patient_ID'].isspace() or len(newline['Patient_ID']) == 0: break ALL_UKB_COUNT += 1 # Returns a set of values associated with Year of Birth. Since we know there is only one value, we "pop" that value out of the set newline['Year_Of_Birth'] = opf.get_patient_vals(line, year_of_birth_index).pop() icd10_line = opf.get_patient_vals(line, icd10_index) icd9_line = opf.get_patient_vals(line, icd9_index) opcs_line = opf.get_patient_vals(line, opcs_index) selfrep_mc_line = opf.get_patient_vals(line, selfrep_mc_index) selfrep_op_line = opf.get_patient_vals(line, selfrep_op_index) # INCLUSION inclusion = opf.match_codes(icd10_mals, icd10_line) +\ opf.match_codes(icd9_mals, icd9_line) +\ opf.match_codes(opcs_mals, opcs_line) +\ opf.match_codes(selfrep_mc_mals, selfrep_mc_line) +\ opf.match_codes(selfrep_op_mals, selfrep_op_line)
fieldnames = ['Patient_ID'] for d in diseases: fieldnames.append(d) # 'e' is for displaying how many patient rows we have processed for e, line in enumerate(lines): line = line.split('\t') # We get a list of values after tab-separating the row. # Writes the Patient ID to a new line in the file. If there are no more non-blank lines in the file, continue to the next portion newline = {'Patient_ID': line[0]} if newline['Patient_ID'].isspace() or len(newline['Patient_ID']) == 0: break icd10_line = opf.get_patient_vals(line, icd10_index) icd9_line = opf.get_patient_vals(line, icd9_index) opcs_line = opf.get_patient_vals(line, opcs_index) selfrep_mc_line = opf.get_patient_vals(line, selfrep_mc_index) selfrep_op_line = opf.get_patient_vals(line, selfrep_op_index) # patient_codes = [(code, age), (code2, age2), ...] ~ codes with their corresponding ages selfrep_mc_codes_and_ages = opf.get_patient_vals(line, selfrep_mc_index, return_both=True) selfrep_op_codes_and_ages = opf.get_patient_vals(line, selfrep_op_index, return_both=True) for d in diseases: # Returns self-reported patient codes matching disease selfrep_mc_disease_codes = opf.match_codes(diseases[d]['SELFREP_MC_20002'], selfrep_mc_line) selfrep_op_disease_codes = opf.match_codes(diseases[d]['SELFREP_OP_20004'], selfrep_op_line)
#print attrib_conversion #print attrib_conversion.keys() for e, line in enumerate(lines): # Fix space issue in textfile. 'newline' is a patient row to be added to the output later. Patient ID is added first. if " " in line[:9]: line = line[:9].replace(" ", "\t")+line[9:] line = line.split('\t') newline = {'Patient_ID': line[0]} if newline['Patient_ID'].isspace() or len(newline['Patient_ID']) == 0: break for a in attrib_index: attrib_values = opf.get_patient_vals(line, attrib_index[a]) default_value = None if a == 'Qualifications (college)': default_value = 'NoCollege' if a in ('Paternal_CVD', 'Maternal_CVD'): default_value = 0 newline[a] = opf.single_output_conversion(list_of_values=attrib_values, conversion=attrib_conversion[a], default_value=default_value, input_value=attrib_select[a]) line_list.append(newline) if e%25000 == 0: print e