Exemplo n.º 1
0
        print "Error: cannot parse list of API (url, key) pairs. Each line in text file must contain the API url and API key for a single project separated by a space."
        sys.exit()
#api_url_arch = api_pairs[0][0]
#api_key_arch = api_pairs[0][1]
api_url_ipss = api_pairs[1][0]
api_key_ipss = api_pairs[1][1]
api_url_psom = api_pairs[2][0]
api_key_psom = api_pairs[2][1]

# Get list of record IDs in IPSS. Exclude SickKids registry-only patients.
#record_ids = getRecordIDList(api_url_ipss, api_key_ipss)
#registry_data = exportRecords(api_url_ipss, api_key_ipss, record_id_list=record_ids, fields=["substud"], events=["acute_arm_1"])
#for row in registry_data:
#    if (row["substud___8"] == "1"):
#        record_ids.remove(row["ipssid"])
record_ids = getIPSSIDs(inc_registry_only=False, inc_unknown_stroke_type=False)
record_ids_post_2014 = getIPSSIDs(inc_registry_only=False,
                                  inc_unknown_stroke_type=False,
                                  inc_pre_2014=False)
record_ids_non_sk = getIPSSIDs(inc_registry_only=False,
                               inc_unknown_stroke_type=False,
                               inc_sk_patients=False)
record_ids_psom = getIPSSIDs(db="psom",
                             inc_registry_only=False,
                             inc_unknown_stroke_type=False)

# Load REDCap project (a PyCap object).
project = redcap.Project(api_url_ipss, api_key_ipss)
def_field = project.def_field
project_info = exportProjectInfo(api_url_ipss, api_key_ipss)
project_longitudinal = bool(project_info["is_longitudinal"])
Exemplo n.º 2
0
def getPatientInfo(url_arch, url_ipss, key_arch, key_ipss):
    ## Get list of record IDs for each project. Exclude registry-only patients. Exclude patients with unknown stroke type.
    #    record_ids_arch = getRecordIDList(url_arch, key_arch)
    #    registry_arch = exportRecords(url_arch, key_arch, record_id_list=record_ids_arch, fields=["registry"], events=["acute_arm_1"])
    #    for row in registry_arch:
    #        if (row["registry"] == "1"):
    #            record_ids_arch.remove(row["pk_patient_id"])
    #    record_ids_ipss = getRecordIDList(url_ipss, key_ipss)
    #    registry_ipss = exportRecords(url_ipss, key_ipss, record_id_list=record_ids_ipss, fields=["substud"], events=["acute_arm_1"])
    #    for row in registry_ipss:
    #        if (row["substud___8"] == "1"):
    #            record_ids_ipss.remove(row["ipssid"])
    #    record_ids_ipss_only = [record_id for record_id in record_ids_ipss if not record_id in record_ids_arch]
    #    for record_id in record_ids_arch:
    #        if (not record_id in record_ids_ipss):
    #            print "Record with ID", record_id, "in Archive, but not in IPSS"

    # Create one list of record ID which are non-registry and have known stroke type.
    record_ids = getIPSSIDs(inc_registry_only=False,
                            inc_unknown_stroke_type=False)

    ## Create dict with patient information: {record_id: {dag:"...", enroll_date:"...", ...} }
    patient_info = {}
    for record_id in record_ids:  # add item (another dict) for each patient in the Archive
        patient_info[record_id] = {}
#        patient_info[record_id]["in_arch"] = True
#        if (record_id in record_ids_ipss):
#            patient_info[record_id]["in_ipss"] = True # boolean describing presence of record in Archive
#        else:
#            patient_info[record_id]["in_ipss"] = False # boolean describing presence of record in IPSS
#    for record_id in record_ids_ipss_only: # add item (another dict) for each patient in the IPSS that has not yet been added.
#        patient_info[record_id] = {}
#        patient_info[record_id]["in_arch"] = False
#        patient_info[record_id]["in_ipss"] = True

## Get enrolment date for each record.
# Archive - Use 'dateofentry', then 'visit_date".
#    print "Project        : Archive"
    dateofentry_arch = exportRecords(url_arch,
                                     key_arch,
                                     record_id_list=record_ids,
                                     fields=["dateofentry"],
                                     events=["acute_arm_1"],
                                     validate=False)
    #    if (len(record_ids_arch) != len(dateofentry_arch)): # look for record id missing from exported data
    #        for record_id in record_ids_arch:
    #            id_in_data = False
    #            for row in dateofentry_arch:
    #                if (row["pk_patient_id"] == record_id):
    #                    id_in_data = True
    #                    break
    #            if (not id_in_data):
    #                print "Record with ID "+str(record_id)+" not found in exported data"
    #    num_missing = 0
    for row in dateofentry_arch:
        if (row["dateofentry"] == ""):
            #            num_missing += 1
            pass
        else:
            if ("enroll_date" in patient_info[row["pk_patient_id"]]):
                print "This record was counted twice: " + str(
                    row["pk_patient_id"])
                continue
            patient_info[row["pk_patient_id"]]["enroll_date"] = int(
                row["dateofentry"][:4])

    num_missing = len(
        [id for id in record_ids if (not "enroll_date" in patient_info[id])])

    #    print "Field used     : dateofentry"
    #    print "Number missing : ", num_missing

    record_ids_leftover = [
        id for id in record_ids if (not "enroll_date" in patient_info[id])
    ]
    visit_date_leftover = exportRecords(url_arch,
                                        key_arch,
                                        record_id_list=record_ids_leftover,
                                        fields=["visit_date"],
                                        events=["acute_arm_1"],
                                        validate=False)
    #    num_missing = 0
    for row in visit_date_leftover:
        if (row["visit_date"] == ""):
            #            num_missing += 1
            pass
        else:
            if ("enroll_date" in patient_info[row["pk_patient_id"]]):
                print "This record was counted twice: " + str(
                    row["pk_patient_id"])
                continue
            patient_info[row["pk_patient_id"]]["enroll_date"] = int(
                row["visit_date"][:4])
    num_missing = len(
        [id for id in record_ids if (not "enroll_date" in patient_info[id])])

    #    print "Field used     : visit_date"
    #    print "Number missing : ", num_missing

    # IPSS - use 'dateentered' (works for all but 6 patients).
    #    print
    #    print "Project        : IPSS"
    record_ids_leftover = [
        id for id in record_ids if (not "enroll_date" in patient_info[id])
    ]
    dateentered_ipss = exportRecords(url_ipss,
                                     key_ipss,
                                     record_id_list=record_ids_leftover,
                                     fields=["dateentered"],
                                     events=["acute_arm_1"],
                                     validate=False)
    #    if (len(record_ids_ipss_only) != len(dateentered_ipss)): # look for record id missing from exported data
    #        for record_id in record_ids_ipss_only:
    #            id_in_data = False
    #            for row in dateentered_ipss:
    #                if (row["ipssid"] == record_id):
    #                    id_in_data = True
    #                    break
    #            if (not id_in_data):
    #                print "Record with ID "+str(record_id)+" not found in exported data"
    #    num_missing = 0
    for row in dateentered_ipss:
        if (row["dateentered"] == ""):
            #            num_missing += 1
            pass
        else:
            if ("enroll_date" in patient_info[row["ipssid"]]):
                print "This record was counted twice: " + str(row["ipssid"])
                continue
            patient_info[row["ipssid"]]["enroll_date"] = int(
                row["dateentered"][:4])
    num_missing = len(
        [id for id in record_ids if (not "enroll_date" in patient_info[id])])
    #    print "Field used     : dateentered"
    #    print "Number missing : ", num_missing

    enroll_dates = set()
    for id, info in patient_info.iteritems():
        if ('enroll_date' in info):
            enroll_dates.add(info['enroll_date'])
            if (not info['enroll_date'] in range(2003, 2020)):
                print "Record enroll date outside [2003, 2019]:", id
        else:
            print "Record with no enrolment date:", id
#    print "enroll_dates:", sorted(list(enroll_dates))

## Get DAG for each record:
    dags_arch = exportRecords(url_arch,
                              key_arch,
                              record_id_list=record_ids,
                              fields=["pk_patient_id"],
                              validate=False)
    dags_ipss = exportRecords(url_ipss,
                              key_ipss,
                              record_id_list=record_ids,
                              fields=["ipssid"],
                              validate=False)
    for row in dags_arch:
        record_id = row["pk_patient_id"]
        dag = row["redcap_data_access_group"]
        patient_info[record_id]["dag"] = dag
    for row in dags_ipss:
        record_id = row["ipssid"]
        dag = row["redcap_data_access_group"]
        if (not "dag" in patient_info[record_id]) or (
                patient_info[record_id]["dag"]
                == ""):  # add DAG from IPSS if not added already
            patient_info[record_id][
                "dag"] = dag  # overwriting DAG for records in Archive should not be a problem.

#    for id in patient_info:
#        if (not "dag" in patient_info[id]) or (patient_info[id]["dag"] == ""):
#            print "Record with ID", id, "does not have a DAG assigned"

## Get stroke type for each patient. # Need to decide how we want to break this down further.
#    stroke_type_arch = exportRecords(url_arch, key_arch, record_id_list=record_ids_arch, fields=["ais", "csvt", "pperi", "preart", "other_stroke", "age_at_event"], events=["acute_arm_1"])
    stroke_type_ipss = exportRecords(url_ipss,
                                     key_ipss,
                                     record_id_list=record_ids,
                                     fields=[
                                         "chais", "chcsvt", "neoais",
                                         "neocsvt", "ppis", "ppcsvt", "pvi",
                                         "preart", "othcond"
                                     ],
                                     events=["acute_arm_1"])

    for record_id in patient_info:
        patient_info[record_id]["stroke_type"] = {}
        patient_info[record_id]["stroke_type"]["neo_ais"] = "2"
        patient_info[record_id]["stroke_type"]["neo_csvt"] = "2"
        patient_info[record_id]["stroke_type"]["child_ais"] = "2"
        patient_info[record_id]["stroke_type"]["child_csvt"] = "2"
        patient_info[record_id]["stroke_type"]["pp_ais"] = "2"
        patient_info[record_id]["stroke_type"]["pp_csvt"] = "2"
        patient_info[record_id]["stroke_type"]["pp_vi"] = "2"
        patient_info[record_id]["stroke_type"]["art"] = "2"
        patient_info[record_id]["stroke_type"]["other"] = "2"

    for row in stroke_type_ipss:  # 0 - no, 1 - yes, 2 - unknown
        record_id = row["ipssid"]
        # neonatal AIS
        patient_info[record_id]["stroke_type"]["neo_ais"] = row["neoais___1"]
        # neonatal CSVT
        patient_info[record_id]["stroke_type"]["neo_csvt"] = row["neocsvt___1"]
        # child AIS
        patient_info[record_id]["stroke_type"]["child_ais"] = row["chais___1"]
        # child CSVT
        patient_info[record_id]["stroke_type"]["child_csvt"] = row[
            "chcsvt___1"]
        # presumed perinatal AIS
        patient_info[record_id]["stroke_type"]["pp_ais"] = row["ppis___1"]
        # presumed perinatal CSVT
        patient_info[record_id]["stroke_type"]["pp_csvt"] = row["ppcsvt___1"]
        # presumed perinatal VI
        patient_info[record_id]["stroke_type"]["pp_vi"] = row["pvi___1"]
        # arteriopathy
        patient_info[record_id]["stroke_type"]["art"] = row["preart___1"]
        # other
        patient_info[record_id]["stroke_type"]["other"] = row["othcond___1"]

    # Look for patients without an identified stroke type.
    record_ids_with_unidentified_stroke_type = []
    for id, record in patient_info.iteritems():
        identified_type = False
        for stroke_type, value in record["stroke_type"].iteritems():
            if (value == "1"):
                identified_type = True
                break
        if (not identified_type):
            #            print "Record with ID", id, "has an unidentified stroke type."
            record_ids_with_unidentified_stroke_type.append(id)

    # Check if stroke type can be identified in Archive instead.
#    stroke_type_arch_leftover = exportRecords(url_arch, key_arch, record_id_list=record_ids_with_unidentified_stroke_type, fields=["ais", "csvt", "pperi", "preart", "other_stroke", "age_at_event"], events=["acute_arm_1"])
#    for row in stroke_type_arch_leftover:
#        print row["pk_patient_id"], row["ais"], row["csvt"], row["pperi"], row["preart"], row["other_stroke"]#, row["age_at_event"]
#        stroke_type_found = False
#        if (row["ais"] == "1") and (row["age_at_event"] == "0"):
#            patient_info[row["pk_patient_id"]]["stroke_type"]["neo_ais"] = "1"
#            stroke_type_found = True
#        if (row["csvt"] == "1") and (row["age_at_event"] == "0"):
#            patient_info[row["pk_patient_id"]]["stroke_type"]["neo_csvt"] = "1"
#            stroke_type_found = True
#        if (row["ais"] == "1") and (row["age_at_event"] == "1"):
#            patient_info[row["pk_patient_id"]]["stroke_type"]["child_ais"] = "1"
#            stroke_type_found = True
#        if (row["csvt"] == "1") and (row["age_at_event"] == "1"):
#            patient_info[row["pk_patient_id"]]["stroke_type"]["child_csvt"] = "1"
#            stroke_type_found = True
#        if (row["preart"] == "1"):
#            patient_info[row["pk_patient_id"]]["stroke_type"]["art"] = "1"
#            stroke_type_found = True
#        if (row["other_stroke"] == "1"):
#            patient_info[row["pk_patient_id"]]["stroke_type"]["other"] = "1"
#            stroke_type_found = True
#        if stroke_type_found:
#            record_ids_with_unidentified_stroke_type.remove(row["pk_patient_id"])

# Print some stats on the acquired patient information.
    num_no_year = 0
    num_no_dag = 0
    for record_id, record in patient_info.iteritems():
        if (record["dag"] == ""):
            num_no_dag += 1
        if (not "enroll_date" in record):
            num_no_year += 1
    print "Number of duplicated record IDs:", len(record_ids) - len(
        set(record_ids))
    print "Number of unique record IDs:", len(set(record_ids))
    print "Number of record IDs in patient_info:", len(patient_info)
    print "Number of records with no DAG:", num_no_dag
    print "Number of records with no enrolment date:", num_no_year
    print "Number of records with unidentified stroke type:", len(
        record_ids_with_unidentified_stroke_type)
    return patient_info
Exemplo n.º 3
0
def buildProjects(config):
    #### Read user's settings.yml file, which will be used to get API tokens and URLs.
    api_settings = ApiSettings()

    ## Build a list of "projects" - dicts which store data and settings for the project.
    projects = config["projects"]

    ## Verify the settings for each project.
    for project in projects:
        code_name = project["code_name"]

        # Get args to pass to exportRecords.
        if (not "exportRecords_args"
                in project) or (project["exportRecords_args"] is None):
            project["exportRecords_args"] = {}

        # If use_getIPSSIDs is True, get list of record IDs to export.
        if project["options"]["use_getIPSSIDs"]:
            # If use_getIPSSIDs is True, but no options provided, raise warning.
            if (not "getIPSSIDs_args"
                    in project) or (project["getIPSSIDs_args"] is None):
                print "Warning: in project '" + code_name + "', 'use_getIPSSIDs' is True, but 'getIPSSIDs_args' not provided for project. Exporting all record IDs from project."
                record_id_list = None
            else:
                getIPSSIDs_args = project["getIPSSIDs_args"]
                record_id_list = getIPSSIDs(**getIPSSIDs_args)

            # If exportRecords_args has an entry for record_id_list, but use_getIPSSIDs is True, raise warning.
            if (project["options"]["use_getIPSSIDs"]) and (
                    "record_id_list" in project["exportRecords_args"]):
                print "Warning: in project '" + code_name + "', the specified 'record_id_list' will be ignored, since 'use_getIPSSIDs' is True."

            # Overwrite the record_id_list argument in exportRecords_args
            project["exportRecords_args"]["record_id_list"] = record_id_list

        ## Get args to pass to exportRecords. If key does not exist, or it is not set to a value, set it to an empty dict (i.e.
        exportRecords_args = project[
            "exportRecords_args"]  # has a value (possibly {}).

        # Convert exportRecords_args arguments to strings as needed.
        convert_to_strings = ["fields", "forms", "events", "record_id_list"]
        for arg in convert_to_strings:
            if arg in exportRecords_args.keys():
                if (exportRecords_args[arg] == 'None'
                    ):  # these arguments could be lists or None
                    # Convert string 'None' to Python None.
                    exportRecords_args[arg] = None
                else:
                    # Convert list to list of strings. Currently, list might contain integers etc.
                    new_list = [str(val) for val in exportRecords_args[arg]]
                    exportRecords_args[arg] = new_list

        ## Get API credentials for current project.
        api_url, api_key, code_name = api_settings.getApiCredentials(
            code_name=code_name)
        project["api_url"] = api_url
        project["api_key"] = api_key

        ## Export requested data for current project
        data_csv = exportRecords(api_url,
                                 api_key,
                                 format="csv",
                                 **exportRecords_args)
        data_csv_file = StringIO(data_csv)
        data_df = pandas.read_csv(data_csv_file,
                                  dtype=unicode,
                                  encoding='utf-8').fillna('')

        project["chunks"] = [
            data_df
        ]  # this list of dataframes will be broken into pieces, each piece containing data to be placed in a different tab.

        ## Retrieve project settings and add them to the dict for the current project
        pycap_project = redcap.Project(api_url, api_key)
        def_field = pycap_project.def_field
        project_info = exportProjectInfo(api_url, api_key)
        longitudinal = bool(project_info["is_longitudinal"])
        repeating = bool(project_info["has_repeating_instruments_or_events"])
        events = getEvents(api_url, api_key, quiet=True)
        metadata_raw = pycap_project.export_metadata()
        form_event_mapping = exportFormEventMapping(pycap_project,
                                                    longitudinal)
        repeating_forms_events = exportRepeatingFormsEvents(
            api_url, api_key, repeating)
        forms = exportFormsOrdered(api_url, api_key)
        form_repetition_map = createFormRepetitionMap(longitudinal, repeating,
                                                      form_event_mapping,
                                                      repeating_forms_events,
                                                      forms)
        metadata = parseMetadata(pycap_project.def_field,
                                 project_info,
                                 longitudinal,
                                 repeating,
                                 events,
                                 metadata_raw,
                                 form_event_mapping,
                                 repeating_forms_events,
                                 forms,
                                 form_repetition_map,
                                 write_branching_logic_function=False)

        project["pycap_project"] = pycap_project
        project["def_field"] = def_field
        project["project_info"] = project_info
        project["longitudinal"] = longitudinal
        project["repeating"] = repeating
        project["events"] = events
        project["form_event_mapping"] = form_event_mapping
        project["repeating_forms_events"] = repeating_forms_events
        project["forms"] = forms
        project["form_repetition_map"] = form_repetition_map
        project["metadata"] = metadata

        # Create dict which maps each form to a list of events containing that form.
        if longitudinal:
            form_to_events_dict = {}
            for form_event_entry in form_event_mapping:
                form = form_event_entry['form']
                event = form_event_entry['unique_event_name']
                if (not form in form_to_events_dict):
                    form_to_events_dict[form] = [event]
                else:
                    form_to_events_dict[form].append(event)
        else:
            form_to_events_dict = None
        project["form_to_events_dict"] = form_to_events_dict

        ## Build lists of variables which appear in the export data.
        # columns which uniquely identify a row
        primary_key = [def_field]
        if project["longitudinal"]:
            primary_key.append("redcap_event_name")
        if project["repeating"]:
            primary_key.append("redcap_repeat_instrument")
            primary_key.append("redcap_repeat_instance")
        project["primary_key"] = primary_key

        primary_key_and_dag = primary_key
        if ("redcap_data_access_group" in data_df.columns):
            primary_key_and_dag.append("redcap_data_access_group")
        project["primary_key_and_dag"] = primary_key_and_dag

        # form_complete fields
        form_complete_fields = [
            field for field in data_df.columns
            if ((field.endswith("_complete")) and (not field in metadata) and (
                not field in primary_key) and (
                    not field == "redcap_data_access_group"))
        ]
        project["form_complete_fields"] = form_complete_fields

        # data fields
        data_fields = [
            field for field in data_df.columns
            if ((not field in primary_key + form_complete_fields) and (
                not field == "redcap_data_access_group"))
        ]
        project["data_fields"] = data_fields

    return projects
Exemplo n.º 4
0
def getPatientInfo(url_arch, url_ipss, key_arch, key_ipss, enroll_date_min=2003, enroll_date_max=2020):
    # Create one list of record ID which are non-registry and have known stroke type.
    #record_ids = getIPSSIDs(ex_registry_only=True, ex_unknown_stroke_type=True, from_code_name="ipss_v3")
    #print "DEBUG: CHANGE getIPSSIDs arguments back to IPSS V4."
    record_ids = getIPSSIDs(ex_registry_only=True, ex_unknown_stroke_type=True)


    ## Create dict with patient information: {record_id: {dag:"...", enroll_date:"...", ...} }
    patient_info = {}
    for record_id in record_ids: # add item (another dict) for each patient in the Archive
        patient_info[record_id] = {}

    ## Get enrolment date for each record.
    # Archive - Use 'dateofentry', then 'visit_date".
    dateofentry_arch = exportRecords(url_arch, key_arch, record_id_list=record_ids, fields=["dateofentry"], events=["acute_arm_1"], validate=False)
    for row in dateofentry_arch:
        if (row["dateofentry"] == ""):
            pass
        else:
            if ("enroll_date" in patient_info[row["pk_patient_id"]]):
                print "This record was counted twice: "+str(row["pk_patient_id"])
                continue
            patient_info[row["pk_patient_id"]]["enroll_date"] = int(row["dateofentry"][:4])

    num_missing = len([id for id in record_ids if (not "enroll_date" in patient_info[id])])
   
    record_ids_leftover = [id for id in record_ids if (not "enroll_date" in patient_info[id])]
    visit_date_leftover = exportRecords(url_arch, key_arch, record_id_list=record_ids_leftover, fields=["visit_date"], events=["acute_arm_1"], validate=False)
    for row in visit_date_leftover:
        if (row["visit_date"] == ""):
            pass
        else:
            if ("enroll_date" in patient_info[row["pk_patient_id"]]):
                print "This record was counted twice: "+str(row["pk_patient_id"])
                continue
            patient_info[row["pk_patient_id"]]["enroll_date"] = int(row["visit_date"][:4]) 
    num_missing = len([id for id in record_ids if (not "enroll_date" in patient_info[id])])
   
    # IPSS - use 'dateentered' (works for all but 6 patients).
    record_ids_leftover = [id for id in record_ids if (not "enroll_date" in patient_info[id])]
    dateentered_ipss = exportRecords(url_ipss, key_ipss, record_id_list=record_ids_leftover, fields=["dateentered"], events=["acute_arm_1"], validate=False)
    for row in dateentered_ipss:
        if (row["dateentered"] == ""):
            pass
        else:
            if ("enroll_date" in patient_info[row["ipssid"]]):
                print "This record was counted twice: "+str(row["ipssid"])
                continue
            patient_info[row["ipssid"]]["enroll_date"] = int(row["dateentered"][:4])
    num_missing = len([id for id in record_ids if (not "enroll_date" in patient_info[id])])

    enroll_dates = set()
    for id, info in patient_info.iteritems():
        if ('enroll_date' in info):
            enroll_dates.add(info['enroll_date'])
            if (not info['enroll_date'] in range(enroll_date_min, enroll_date_max+1)):
                print "Record enroll date outside ["+str(enroll_date_min)+", "+str(enroll_date_max)+"]:", id
        else:
            print "Record with no enrolment date:", id
    
    ## Get DAG for each record:
    dags_arch = exportRecords(url_arch, key_arch, record_id_list=record_ids, fields=["pk_patient_id"], validate=False)
    dags_ipss = exportRecords(url_ipss, key_ipss, record_id_list=record_ids, fields=["ipssid"], validate=False)
    for row in dags_arch:
        record_id = row["pk_patient_id"]
        dag = row["redcap_data_access_group"]
        patient_info[record_id]["dag"] = dag
    for row in dags_ipss:
        record_id = row["ipssid"]
        dag = row["redcap_data_access_group"]
        if (not "dag" in patient_info[record_id]) or (patient_info[record_id]["dag"] == ""): # add DAG from IPSS if not added already
            patient_info[record_id]["dag"] = dag # overwriting DAG for records in Archive should not be a problem.
    
    ## Get stroke type for each patient. # Need to decide how we want to break this down further.
    #stroke_type_ipss = exportRecords(url_ipss, key_ipss, record_id_list=record_ids, fields=["chais", "chcsvt", "neoais", "neocsvt", "ppis", "ppcsvt", "pvi", "preart", "othcond"], events=["acute_arm_1"])
    stroke_type_ipss = exportRecords(url_ipss, key_ipss, record_id_list=record_ids, fields=["stroke_type"], events=["acute_arm_1"])

    # Set stroke types to unknown initially.
    for record_id in patient_info:
        patient_info[record_id]["stroke_type"] = {}
        patient_info[record_id]["stroke_type"]["neo_ais"] = "2"
        patient_info[record_id]["stroke_type"]["neo_csvt"] = "2"
        patient_info[record_id]["stroke_type"]["child_ais"] = "2"
        patient_info[record_id]["stroke_type"]["child_csvt"] = "2"
        patient_info[record_id]["stroke_type"]["pp_ais"] = "2"
        patient_info[record_id]["stroke_type"]["pp_csvt"] = "2"
        patient_info[record_id]["stroke_type"]["pp_vi"] = "2"
        patient_info[record_id]["stroke_type"]["art"] = "2"
        patient_info[record_id]["stroke_type"]["other"] = "2"

        #'chais___1':'stroke_type___1',
        #'chcsvt___1':'stroke_type___2',
        #'neoais___1':'stroke_type___3',
        #'neocsvt___1':'stroke_type___4',
        #'ppis___1':'stroke_type___5',
        #'ppcsvt___1':'stroke_type___6',
        #'pvi___1':'stroke_type___7',
        #'preart___1':'stroke_type___8',
        #'othcond___1':'stroke_type___9'
        
    for row in stroke_type_ipss: # 0 - no, 1 - yes, 2 - unknown
        record_id = row["ipssid"]
        # neonatal AIS
        patient_info[record_id]["stroke_type"]["neo_ais"] = row["stroke_type___3"]
        # neonatal CSVT
        patient_info[record_id]["stroke_type"]["neo_csvt"] = row["stroke_type___4"]
        # child AIS
        patient_info[record_id]["stroke_type"]["child_ais"] = row["stroke_type___1"]
        # child CSVT
        patient_info[record_id]["stroke_type"]["child_csvt"] = row["stroke_type___2"]
        # presumed perinatal AIS
        patient_info[record_id]["stroke_type"]["pp_ais"] = row["stroke_type___5"]
        # presumed perinatal CSVT
        patient_info[record_id]["stroke_type"]["pp_csvt"] = row["stroke_type___6"]
        # presumed perinatal VI
        patient_info[record_id]["stroke_type"]["pp_vi"] = row["stroke_type___7"]
        # arteriopathy
        patient_info[record_id]["stroke_type"]["art"] = row["stroke_type___8"]
        # other
        patient_info[record_id]["stroke_type"]["other"] = row["stroke_type___9"]

    # Look for patients without an identified stroke type.
    record_ids_with_unidentified_stroke_type = []
    for id, record in patient_info.iteritems():
        identified_type = False
        for stroke_type, value in record["stroke_type"].iteritems():
            if (value == "1"):
                identified_type = True
                break
        if (not identified_type):
            record_ids_with_unidentified_stroke_type.append(id)
    
    # Print some stats on the acquired patient information.
    num_no_year = 0
    num_no_dag = 0
    for record_id, record in patient_info.iteritems():
        if (record["dag"] == ""):
            num_no_dag += 1 
        if (not "enroll_date" in record):
            num_no_year += 1
    print "Number of duplicated record IDs:", len(record_ids) - len(set(record_ids))
    print "Number of unique record IDs:", len(set(record_ids))
    print "Number of record IDs in patient_info:", len(patient_info)
    print "Number of records with no DAG:", num_no_dag
    print "Number of records with no enrolment date:", num_no_year
    print "Number of records with unidentified stroke type:", len(record_ids_with_unidentified_stroke_type)    
    return patient_info