def next_row(line, output_row, output_column_names, field_index):
    """You may modify this function."""

    # Get Age for each admission. Make sure to convert to float
    # Can also do float(opf.get_patient_vals(line, icd_year_index)[0]) to get the first value


    icd_year = float(line[field_index['ICD_YEAR']])
    opcs_year = float(line[field_index['OPCS_YEAR']])

    # Get patient values for each type of code
    icd10_line = opf.get_patient_vals(line, field_index['ICD10'])
    icd9_line = opf.get_patient_vals(line, field_index['ICD9'])
    opcs_line = opf.get_patient_vals(line, field_index['OPCS'])

    for d in diseases:

        output_row[d] = 2000  # Defaults min_age to 2000

        # Returns ICD or OPCS codes matching disease
        icd10_disease_codes = opf.match_codes(diseases[d]['ICD10'], icd10_line)
        icd9_disease_codes = opf.match_codes(diseases[d]['ICD9'], icd9_line)
        opcs_disease_codes = opf.match_codes(diseases[d]['OPCS'], opcs_line)

        # Gets the minimum age if codes are present
        if len(icd10_disease_codes) + len(icd9_disease_codes) > 0:
            output_row[d] = min(output_row[d], icd_year)
        if len(opcs_disease_codes) > 0:
            output_row[d] = min(output_row[d], opcs_year)

    return output_row
Exemplo n.º 2
0
def next_row(line, output_row, output_column_names, field_index):
    """You may modify this function."""

    # Get Age for each admission. Make sure to convert to float
    # Can also do float(opf.get_patient_vals(line, icd_year_index)[0]) to get the first value

    # pop() returns the first element in a set. The function get_patient_vals always returns a set of unique values. If there are no non-blank values, it will return an empty set. Here we check if the set is empty, and assign age_at_icd_code to 2000 if there is no available age.

    age_at_icd_code = 2000
    age_list = opf.get_patient_vals(line, field_index['ICD_YEAR'])
    if len(age_list) != 0:
        age_at_icd_code = age_list.pop()

    # Get patient values for each type of code
    icd10_patient_values = opf.get_patient_vals(line, field_index['ICD10'])
    icd9_patient_values = opf.get_patient_vals(line, field_index['ICD9'])

    # Find AFIB Codes and get the ages at which the patients were diagnosed ----
    output_row['AFIB_Codes'] = ','.join(
        opf.match_codes(afib_icd10_codes, icd10_patient_values) +
        opf.match_codes(afib_icd9_codes, icd9_patient_values))

    if len(output_row['AFIB_Codes']) > 0:
        output_row['Earliest_Afib_Diagnosis'] = age_at_icd_code
    else:
        output_row['Earliest_Afib_Diagnosis'] = 2000
    # --------------------------------------------------------------------------

    # Find 'Other' Codes and Translate to Malformations. Only include malformations that meet age criteria and return age if it meets criteria ----------------------------------------
    other_icd10_mals = opf.match_codes(icd10_mapping, icd10_patient_values)
    output_row['Other_Malformations'] = []

    for malformation in other_icd10_mals:
        if float(age_at_icd_code) < code_age_mapping[malformation]:
            output_row['Other_Malformations'].append(malformation)

    output_row['Other_Malformations'] = ','.join(
        output_row['Other_Malformations'])

    if len(output_row['Other_Malformations']) > 0:
        output_row['Minimum_Age_At_Other_Malformations'] = age_at_icd_code
    else:
        output_row['Minimum_Age_At_Other_Malformations'] = 2000
    # --------------------------------------------------------------------------

    return output_row
Exemplo n.º 3
0
def next_row(line, output_row, output_column_names, field_index):
    """This function lets you handle each row in the biobank file one at a time. Expects a dictionary of values that will become one line of output in your output file. All biobank values will be of type 'string', make sure to convert to float or int when comparing numerical values but not codes that may start with 0 (e.g. - ICD9). You may modify this function."""

    # This will output all of the patient's codes
    icd10_patient_values = opf.get_patient_vals(line, field_index['ICD10'])
    icd9_patient_values = opf.get_patient_vals(line, field_index['ICD9'])
    selfrep_mc_patient_values = opf.get_patient_vals(line,
                                                     field_index['SelfRep_MC'])
    selfrep_mc_patient_values_with_ages = opf.get_patient_vals(
        line, field_index['SelfRep_MC'], return_both=True)

    output_row['Year_Of_Birth'] = [
        c for c in opf.get_patient_vals(line, field_index['Year_Of_Birth'])
    ][0]

    # -------------------------------------------------------------------------------

    for code in single_icd10_codes_of_interest:
        matching_values = opf.match_codes([code], icd10_patient_values)

        output_row[code] = 0
        if len(matching_values) > 0:
            output_row[code] = 1

    # -------------------------------------------------------------------------------

    # Find Matching Stroke Codes
    stroke_values = opf.match_codes(
        stroke_codes['ICD9'], icd9_patient_values) + opf.match_codes(
            stroke_codes['ICD10'], icd10_patient_values) + opf.match_codes(
                stroke_codes['SelfRep_MC'], selfrep_mc_patient_values)

    output_row['Stroke'] = 0
    if len(stroke_values) > 0:
        output_row['Stroke'] = 1

    # -------------------------------------------------------------------------------

    selfrep_mc_min_age, selfrep_mc_codes_with_age_met = opf.get_min_age(
        selfrep_mc_patient_values_with_ages,
        selfreported_med_codes,
        age_criteria_dict=selfreported_code_age_mapping)

    output_row['Minimum_Age_At_SelfReported_Malformation'] = selfrep_mc_min_age

    # To Translate the self-reported codes to malformations, you can do the following or translate them yourself. The 'join' function turns the list into a comma-separated string of malformations.
    output_row['SelfReported_Malformations'] = ','.join(
        opf.match_codes(selfreported_med_codes, selfrep_mc_codes_with_age_met))
    """
    # We'll show you what the above output looks like without translation for a patient that has the codes. The program will exit after printing. It may have to process a few lines before printing. This is just for understanding the program, you can remove the following.
    if len(selfrep_mc_codes_with_age_met) > 1:

        print '\n----------------------------'
        print 'SelfReported Medical Codes:'
        print selfrep_mc_codes_with_age_met
        print '\n'
        print 'Self-Reported Medical Malformations:'
        print opf.match_codes(selfreported_med_codes, selfrep_mc_codes_with_age_met)
        print '\n'
        print 'Self-Reported Medical Malformations as Comma-Separated String:'
        print output_row['SelfReported_Malformations']
        print '\n'
        print 'Minimum Age For Self-Reported Codes:'
        print output_row['Minimum_Age_At_SelfReported_Malformation']
        print '----------------------------'

        sys.exit()
    """

    return output_row
) + ' rows in the UKB file. The program will output the total number of lines processed below: '

# 'e' is for displaying how many patient rows we have processed
for e, line in enumerate(lines):

    line = line.split(
        '\t')  # We get a list of values after tab-separating the row.

    # Writes the Patient ID to a new line in the file. If there are no more non-blank lines in the file, continue to the next portion
    newline = {'Patient_ID': line[0]}
    if newline['Patient_ID'].isspace() or len(newline['Patient_ID']) == 0:
        break
    ALL_UKB_COUNT += 1

    # Returns a set of values associated with Year of Birth. Since we know there is only one value, we "pop" that value out of the set
    newline['Year_Of_Birth'] = opf.get_patient_vals(line,
                                                    year_of_birth_index).pop()

    icd10_line = opf.get_patient_vals(line, icd10_index)
    icd9_line = opf.get_patient_vals(line, icd9_index)
    opcs_line = opf.get_patient_vals(line, opcs_index)
    selfrep_mc_line = opf.get_patient_vals(line, selfrep_mc_index)
    selfrep_op_line = opf.get_patient_vals(line, selfrep_op_index)

    # INCLUSION

    inclusion = opf.match_codes(icd10_mals, icd10_line) +\
    opf.match_codes(icd9_mals, icd9_line) +\
    opf.match_codes(opcs_mals, opcs_line) +\
    opf.match_codes(selfrep_mc_mals, selfrep_mc_line) +\
    opf.match_codes(selfrep_op_mals, selfrep_op_line)
fieldnames = ['Patient_ID']
for d in diseases:
    fieldnames.append(d)


# 'e' is for displaying how many patient rows we have processed
for e, line in enumerate(lines):

    line = line.split('\t') # We get a list of values after tab-separating the row.

    # Writes the Patient ID to a new line in the file. If there are no more non-blank lines in the file, continue to the next portion
    newline = {'Patient_ID': line[0]}
    if newline['Patient_ID'].isspace() or len(newline['Patient_ID']) == 0:
        break

    icd10_line = opf.get_patient_vals(line, icd10_index)
    icd9_line = opf.get_patient_vals(line, icd9_index)
    opcs_line = opf.get_patient_vals(line, opcs_index)
    selfrep_mc_line = opf.get_patient_vals(line, selfrep_mc_index)
    selfrep_op_line = opf.get_patient_vals(line, selfrep_op_index)

    # patient_codes = [(code, age), (code2, age2), ...] ~ codes with their corresponding ages
    selfrep_mc_codes_and_ages = opf.get_patient_vals(line, selfrep_mc_index, return_both=True)
    selfrep_op_codes_and_ages = opf.get_patient_vals(line, selfrep_op_index, return_both=True)

    for d in diseases:

        # Returns self-reported patient codes matching disease
        selfrep_mc_disease_codes = opf.match_codes(diseases[d]['SELFREP_MC_20002'], selfrep_mc_line)
        selfrep_op_disease_codes = opf.match_codes(diseases[d]['SELFREP_OP_20004'], selfrep_op_line)
Exemplo n.º 6
0
    #print attrib_conversion
    #print attrib_conversion.keys()

    for e, line in enumerate(lines):
        # Fix space issue in textfile. 'newline' is a patient row to be added to the output later. Patient ID is added first.
        if " " in line[:9]:
            line = line[:9].replace(" ", "\t")+line[9:]
        line = line.split('\t')

        newline = {'Patient_ID': line[0]}
        if newline['Patient_ID'].isspace() or len(newline['Patient_ID']) == 0:
            break

        for a in attrib_index:
            attrib_values = opf.get_patient_vals(line, attrib_index[a])

            default_value = None
            if a == 'Qualifications (college)':
                default_value = 'NoCollege'
            if a in ('Paternal_CVD', 'Maternal_CVD'):
                default_value = 0

            newline[a] = opf.single_output_conversion(list_of_values=attrib_values, conversion=attrib_conversion[a], default_value=default_value, input_value=attrib_select[a])


        line_list.append(newline)

        if e%25000 == 0:
            print e