# record_ids.remove(row["ipssid"]) record_ids = getIPSSIDs(inc_registry_only=False, inc_unknown_stroke_type=False) record_ids_post_2014 = getIPSSIDs(inc_registry_only=False, inc_unknown_stroke_type=False, inc_pre_2014=False) record_ids_non_sk = getIPSSIDs(inc_registry_only=False, inc_unknown_stroke_type=False, inc_sk_patients=False) record_ids_psom = getIPSSIDs(db="psom", inc_registry_only=False, inc_unknown_stroke_type=False) # Load REDCap project (a PyCap object). project = redcap.Project(api_url_ipss, api_key_ipss) def_field = project.def_field project_info = exportProjectInfo(api_url_ipss, api_key_ipss) project_longitudinal = bool(project_info["is_longitudinal"]) project_repeating = bool(project_info["has_repeating_instruments_or_events"]) events = getEvents(api_url_ipss, api_key_ipss) metadata_raw = project.export_metadata() form_event_mapping = exportFormEventMapping(project, project_longitudinal) repeating_forms_events = exportRepeatingFormsEvents(api_url_ipss, api_key_ipss, project_repeating) forms = exportFormsOrdered(api_url_ipss, api_key_ipss) form_repetition_map = createFormRepetitionMap(project_longitudinal, project_repeating, form_event_mapping, repeating_forms_events, forms) metadata = parseMetadata(def_field, project_info, project_longitudinal, project_repeating, events, metadata_raw, form_event_mapping, repeating_forms_events, forms,
dags_list = [None]*num_projects dag_record_map_list = [None]*num_projects #for project_index in range(len(api_key_list)): # version without reversing order for project_index in range(len(api_key_list))[::-1]: # get data from second project first, to ensure that first project only includes records that also appear in the second project. # Load REDCap project (a PyCap object). project_list[project_index] = redcap.Project(api_url_list[project_index], api_key_list[project_index]) # Get the field name of the unique identifying field (e.g. "ipssid"). def_field_list[project_index] = project_list[project_index].def_field # Load high-level projct information. project_info_list[project_index] = exportProjectInfo(api_url_list[project_index], api_key_list[project_index]) project_longitudinal_list[project_index] = bool(project_info_list[project_index]["is_longitudinal"]) project_repeating_list[project_index] = bool(project_info_list[project_index]["has_repeating_instruments_or_events"]) # Load list of events events_list[project_index] = getEvents(api_url_list[project_index], api_key_list[project_index])#project_list[project_index], project_info_list[project_index], project_longitudinal_list[project_index]) # Load raw data dictionary. metadata_list[project_index] = project_list[project_index].export_metadata() # Load instrument-event mapping form_event_mapping_list[project_index] = exportFormEventMapping(project_list[project_index], project_longitudinal_list[project_index])
def buildProjects(config): #### Read user's settings.yml file, which will be used to get API tokens and URLs. api_settings = ApiSettings() ## Build a list of "projects" - dicts which store data and settings for the project. projects = config["projects"] ## Verify the settings for each project. for project in projects: code_name = project["code_name"] # Get args to pass to exportRecords. if (not "exportRecords_args" in project) or (project["exportRecords_args"] is None): project["exportRecords_args"] = {} # If use_getIPSSIDs is True, get list of record IDs to export. if project["options"]["use_getIPSSIDs"]: # If use_getIPSSIDs is True, but no options provided, raise warning. if (not "getIPSSIDs_args" in project) or (project["getIPSSIDs_args"] is None): print "Warning: in project '" + code_name + "', 'use_getIPSSIDs' is True, but 'getIPSSIDs_args' not provided for project. Exporting all record IDs from project." record_id_list = None else: getIPSSIDs_args = project["getIPSSIDs_args"] record_id_list = getIPSSIDs(**getIPSSIDs_args) # If exportRecords_args has an entry for record_id_list, but use_getIPSSIDs is True, raise warning. if (project["options"]["use_getIPSSIDs"]) and ( "record_id_list" in project["exportRecords_args"]): print "Warning: in project '" + code_name + "', the specified 'record_id_list' will be ignored, since 'use_getIPSSIDs' is True." # Overwrite the record_id_list argument in exportRecords_args project["exportRecords_args"]["record_id_list"] = record_id_list ## Get args to pass to exportRecords. If key does not exist, or it is not set to a value, set it to an empty dict (i.e. exportRecords_args = project[ "exportRecords_args"] # has a value (possibly {}). # Convert exportRecords_args arguments to strings as needed. convert_to_strings = ["fields", "forms", "events", "record_id_list"] for arg in convert_to_strings: if arg in exportRecords_args.keys(): if (exportRecords_args[arg] == 'None' ): # these arguments could be lists or None # Convert string 'None' to Python None. exportRecords_args[arg] = None else: # Convert list to list of strings. Currently, list might contain integers etc. new_list = [str(val) for val in exportRecords_args[arg]] exportRecords_args[arg] = new_list ## Get API credentials for current project. api_url, api_key, code_name = api_settings.getApiCredentials( code_name=code_name) project["api_url"] = api_url project["api_key"] = api_key ## Export requested data for current project data_csv = exportRecords(api_url, api_key, format="csv", **exportRecords_args) data_csv_file = StringIO(data_csv) data_df = pandas.read_csv(data_csv_file, dtype=unicode, encoding='utf-8').fillna('') project["chunks"] = [ data_df ] # this list of dataframes will be broken into pieces, each piece containing data to be placed in a different tab. ## Retrieve project settings and add them to the dict for the current project pycap_project = redcap.Project(api_url, api_key) def_field = pycap_project.def_field project_info = exportProjectInfo(api_url, api_key) longitudinal = bool(project_info["is_longitudinal"]) repeating = bool(project_info["has_repeating_instruments_or_events"]) events = getEvents(api_url, api_key, quiet=True) metadata_raw = pycap_project.export_metadata() form_event_mapping = exportFormEventMapping(pycap_project, longitudinal) repeating_forms_events = exportRepeatingFormsEvents( api_url, api_key, repeating) forms = exportFormsOrdered(api_url, api_key) form_repetition_map = createFormRepetitionMap(longitudinal, repeating, form_event_mapping, repeating_forms_events, forms) metadata = parseMetadata(pycap_project.def_field, project_info, longitudinal, repeating, events, metadata_raw, form_event_mapping, repeating_forms_events, forms, form_repetition_map, write_branching_logic_function=False) project["pycap_project"] = pycap_project project["def_field"] = def_field project["project_info"] = project_info project["longitudinal"] = longitudinal project["repeating"] = repeating project["events"] = events project["form_event_mapping"] = form_event_mapping project["repeating_forms_events"] = repeating_forms_events project["forms"] = forms project["form_repetition_map"] = form_repetition_map project["metadata"] = metadata # Create dict which maps each form to a list of events containing that form. if longitudinal: form_to_events_dict = {} for form_event_entry in form_event_mapping: form = form_event_entry['form'] event = form_event_entry['unique_event_name'] if (not form in form_to_events_dict): form_to_events_dict[form] = [event] else: form_to_events_dict[form].append(event) else: form_to_events_dict = None project["form_to_events_dict"] = form_to_events_dict ## Build lists of variables which appear in the export data. # columns which uniquely identify a row primary_key = [def_field] if project["longitudinal"]: primary_key.append("redcap_event_name") if project["repeating"]: primary_key.append("redcap_repeat_instrument") primary_key.append("redcap_repeat_instance") project["primary_key"] = primary_key primary_key_and_dag = primary_key if ("redcap_data_access_group" in data_df.columns): primary_key_and_dag.append("redcap_data_access_group") project["primary_key_and_dag"] = primary_key_and_dag # form_complete fields form_complete_fields = [ field for field in data_df.columns if ((field.endswith("_complete")) and (not field in metadata) and ( not field in primary_key) and ( not field == "redcap_data_access_group")) ] project["form_complete_fields"] = form_complete_fields # data fields data_fields = [ field for field in data_df.columns if ((not field in primary_key + form_complete_fields) and ( not field == "redcap_data_access_group")) ] project["data_fields"] = data_fields return projects
def mainIntraProject(config_path): config = readConfig(config_path) print "Performing checks with configuration:" pprint(config) print #### Read user's settings.yml file, which will be used to get API tokens and URLs. api_settings = ApiSettings( ) # Create instance of ApiSettings class. Use this to find file containing API keys and URLs. # Determine the API URL and API token based on the users input and api_keys.yml file. code_name = config["code_name"] api_url, api_key, code_name = api_settings.getApiCredentials( code_name=code_name) # Create output directory if it does not exist. out_dir = config["out_dir"] if (not os.path.isdir(out_dir)): os.mkdir(out_dir) print "Created directory:", out_dir # Define a list containing the lists of Check objects (defined in Check.py). check_name_list = config["checks"] check_paths_exist = True for check_name in check_name_list: scriptdir = os.path.dirname(os.path.realpath(__file__)) check_path = os.path.join(scriptdir, check_name + ".py") if not os.path.exists(check_path): raise Exception("Path does not exist:", check_path) # Load REDCap project (a PyCap object). project = redcap.Project(api_url, api_key) # Get the field name of the unique identifying field (e.g. "ipssid"). def_field = project.def_field # Load high-level projct information. project_info = exportProjectInfo(api_url, api_key) project_longitudinal = bool(project_info["is_longitudinal"]) project_repeating = bool( project_info["has_repeating_instruments_or_events"]) # Load list of events events = getEvents(api_url, api_key) #project, project_info, project_longitudinal) if (not events == None): print "Review the event_ids below. These are required for generating links to problematic data in reports. If these are incorrect, or unset, you can set them in the event_ids.yml file specified in your settings.yml file. You can find the event_id associated with an event by accessing data from that event online, and looking at the value of 'event_id' in the address bar." for event in events: event_id = events[event]["event_id"] if (not event_id == None): print Color.green + event + " " + event_id + Color.end else: print Color.red + event + " " + 'None' + Color.end print # Load raw data dictionary. metadata_raw = project.export_metadata() # Load instrument-event mapping form_event_mapping = exportFormEventMapping(project, project_longitudinal) # Load information specifying which forms are repeating. repeating_forms_events = exportRepeatingFormsEvents( api_url, api_key, project_repeating) # Generate list of forms - list of dicts with two keys: 'instrument_label' and 'instrument_name' forms = exportFormsOrdered(api_url, api_key) # Generate a dictionary with form_names as keys; each entry is a dict specifying in which # events the form is non-repeating, indpendently repeating, or dependently repeating. form_repetition_map = createFormRepetitionMap(project_longitudinal, project_repeating, form_event_mapping, repeating_forms_events, forms) # Gather data about each variable. metadata = parseMetadata(def_field, project_info, project_longitudinal, project_repeating, events, metadata_raw, form_event_mapping, repeating_forms_events, forms, form_repetition_map) ## Load all records. if config["use_getIPSSIDs"]: getIPSSIDs_args = config["getIPSSIDs_args"] record_id_list = getIPSSIDs(**getIPSSIDs_args) elif config["use_custom_record_id_list"]: record_id_list = config["record_id_list"] else: record_id_list = None records = exportRecords(api_url, api_key, record_id_list) # Check for high-level issues in project settings, metadata, records. # 2020-05-11 - This script appears to check for bugged output of exportRecords.py, which has now been handled in exportRecords.py. # project_compatible = isProjectCompatible(metadata, records, def_field) # if (not project_compatible): # raise Exception("Error found in records or metadata. Review output above.") # Generate a dictionary with record IDs as keys and a list of row numbers corresponding to that record as values. record_id_map = createRecordIDMap(def_field, records) # Generate a list of data access groups if they exist. dags_used, dags = getDAGs(records) # Generate a dictionary containing information about each dag (e.g. number of records they contain). dag_record_map = createDAGRecordMap(def_field, records, record_id_map, dags_used, dags) # Generate list of checks to perform (default & user-defined). checklist = createChecklist(check_name_list) # Perform checks on data and report issues. check_results = checkDriver(checklist, out_dir, def_field, forms, project_info, project_longitudinal, project_repeating, events, metadata, form_event_mapping, repeating_forms_events, form_repetition_map, records, record_id_map, dags_used, dags, dag_record_map) # # Save data exported from REDCap and generated in this script. The check results are saved by checkDriver() above. # saveData(out_dir, project, forms, project_info, metadata, record_id_map, dags_used, dags, check_results) return