def main_with_args(case_summary_file, case_types, output_file_path, state_dir): """Do everything required to export and report on commcare export Args: case_summary_file (str): File path to CommCare app case summary case_types (list): The case types (i.e, "contact", "lab_result, etc.) output_file_path (str): Where the workbook with source-column mappings lives """ logger.info( f"Retrieving data from {case_summary_file} and extracting column names" ) properties_by_type = extract_property_names(case_summary_file, case_types) new_mappings = { case_type: generate_source_target_mappings(properties) for case_type, properties in properties_by_type.items() } if state_dir: for case_type, new_mapping in new_mappings.items(): previous_mapping = get_previous_mapping(state_dir, case_type) if set(previous_mapping) != set(new_mapping): save_column_state(state_dir, case_type, new_mapping) logger.info(f"Generating a temporary Excel workbook to {output_file_path}") new_mappings = {make_sql_friendly(k): v for (k, v) in new_mappings.items()} wb = make_commcare_export_sync_xl_wb(new_mappings) wb.save(output_file_path)
def load_app_case_types_with_properties_from_json(file_path): """Load JSON file with data stored from the application structure API endpoint Args: file_path (str): the path to a JSON file containing the data """ logger.info(f"Loading application structure data from {file_path}") with open(file_path) as fl: return json.load(fl)
def get_app_case_types_with_properties_from_api( commcare_project_name, commcare_user_name, commcare_api_key, commcare_app_id, app_structure_api_timeout, app_structure_json_save_folder_path=None, ): """Get data about each case type and its known properties (historical and current) from the Application Structure API. Args: commcare_user_name (str): The Commcare username (email address) commcare_api_key (str): A Commcare API key for the user commcare_project_name (str): The Commcare project to which contacts will be imported commcare_app_id (str): The ID of the Commcare app. app_structure_api_timeout (int): Optional. If provided will override default timeout for the call to Application Structure API (which tends to take a while) app_structure_json_save_folder_path (str): Optional. If provided, the JSON returned by the call to the Application Structure API will be saved as a JSON file in this folder. Returns: dict: Whose keys are case types and whose values are lists of property names. For instance { "patient": ["first_name", "last_name", "etc.], "contact": ["first_name", "last_name", "phone_number", "etc".] } """ logger.info( f"Retrieving application structure for {commcare_project_name} with ID: " f"{commcare_app_id} from API. This may take a while.") app_structure = get_application_structure( commcare_project_name, commcare_user_name, commcare_api_key, commcare_app_id, app_structure_api_timeout, ) normalized_structure = normalize_application_structure_response_data( app_structure) if app_structure_json_save_folder_path: save_app_structure_json(normalized_structure, app_structure_json_save_folder_path) return normalized_structure
def save_app_structure_json(structure, save_folder): """Save the app structure as a JSON file, twice over. The file will be saved once with a date+time based name, and a second time with "latest" in the file name. So, for instance, "app_structure_10_20_20_11-43.json" and "app_structure_latest.json". Args: structure (dict): The dictionary to be saved as JSON save_folder (str): The folder where the files will be saved. """ date_file_name = f"app_structure_{datetime.now().strftime('%m_%d_%Y_%H-%M')}.json" latest_file_name = "app_structure_latest.json" for file_name in (date_file_name, latest_file_name): full_path = PurePath(save_folder).joinpath(file_name) logger.info( f"Saving normalized application structure data at {full_path}") with open(full_path, "w") as fl: json.dump(structure, fl)
def main_with_args( commcare_user_name, commcare_api_key, commcare_project_name, commcare_app_id, db_url, case_types=None, existing_app_structure_json=None, app_structure_json_save_folder_path=None, app_structure_api_timeout=None, commcare_export_script_options=None, commcare_export_script_flags=None, ): """The main routine. Args: commcare_user_name (str): The Commcare username (email address) commcare_api_key (str): A Commcare API key for the user commcare_project_name (str): The Commcare project to which contacts will be imported commcare_app_id (str): The ID of the Commcare app. db_url (str): Connection string for the db case_types (list): Optional. List of case types. If provided, only the provided case types will be synced. existing_app_structure_json (str): Optional. Path to a JSON blob storing data returned by the CommCare Application Structure API endpoint. app_structure_json_save_folder_path (str): Optional. If provided, the JSON blob saved by a call to the Application Structure API will be saved in a file here. app_structure_api_timeout (int): Optional. If provided will override default timeout for the call to Application Structure API ( which tends to take a while) commcare_export_script_options (dict): Optional. A dict of additional args to get passed to the `commcare-export` subprocess as command line options. commcare_export_script_flags (list): Optional. A list of command line flags (with no args) to pass to `commcare-export` subprocess. """ case_types = case_types if case_types else [] all_case_types_with_properties = ( load_app_case_types_with_properties_from_json( existing_app_structure_json) if existing_app_structure_json # NB: This API call can take a long time: ~2-3 minutes else get_app_case_types_with_properties_from_api( commcare_project_name, commcare_user_name, commcare_api_key, commcare_app_id, app_structure_api_timeout, app_structure_json_save_folder_path, )) # if person running script used the `--case-types` property and some of the ones # they asked for weren't avaiable, we'll use this to notify them in the logs unfound_requested_case_types = list( set(case_types).difference( set([k for k in all_case_types_with_properties.keys()]))) if case_types and len(unfound_requested_case_types) == len(case_types): logger.warn("None of the case types you requested were found") return if unfound_requested_case_types: logger.warn( f"Some case types were not found: {', '.join(unfound_requested_case_types)}" ) logger.info("Will continue processing the other requested case types") # we'll try to sync the requested case types minus the unfound ones if subset # requested, and if no subset requested, we'll sync all found case types to_sync_case_types = (list( set(case_types).difference( set(unfound_requested_case_types))) if case_types else list( set([k for k in all_case_types_with_properties.keys()]))) # filter `all_case_types_with_properties` down to only ones that are in our # list of `to_sync_case_types` to_sync_case_types_with_properties = { k: v for (k, v) in all_case_types_with_properties.items() if k in to_sync_case_types } mappings = generate_source_field_to_target_column_mappings( to_sync_case_types_with_properties) # this excel wb file is required by commcare-export which gets called in subprocess # by do_commcare_export_to_db wb = make_commcare_export_sync_xl_wb(mappings) with tempfile.TemporaryDirectory() as tmpdir: tmp_file_path = PurePath(tmpdir).joinpath("mapping.xlsx") wb.save(tmp_file_path) logger.info("Attempting to sync to db") do_commcare_export_to_db( db_url, commcare_project_name, tmp_file_path, commcare_user_name, commcare_api_key, commcare_export_script_options, commcare_export_script_flags, ) logger.info("I am quite done now.")
def upload_legacy_contacts_to_commcare( valid_normalized_contacts_data, project_slug, cc_user_name, cc_api_key, **contact_kwargs, ): """Upload a set of legacy contacts to CommCare. This function expects that contacts data sent to it has been validated and normalized beforehand. This function ultimately returns a dict whose keys are `contact_id`s and whose values are CommCare-generated `case_ids` for the associated contacts. These mappings can be used in the calling context to generate a report that provides URLs to view uploaded cases in CommCare, alongside the original user- supplied data. Args: valid_normalized_contacts_data (list): A list of dicts with user-supplied data for contacts to be uploaded. Additionally, each dict must contain a unique value for a `contact_id` field, which is not user-supplied, and should be dynamically generated in the calling context. project_slug (str): The name of the CommCare project (aka "domain") cc_user_name (str): Valid CommCare username contact_kwargs (dict): Additional key-value pairs to add to each contact. This is to support per-CommCare install specific requirements around fields that should be included on uploaded legacy-contacts. Returns: dict: A dict whose keys are 'contact_ids' and whose values are 'case_ids' of created contacts """ num_dummy_patients = ceil( len(valid_normalized_contacts_data) / MAX_CONTACTS_PER_PARENT_PATIENT) logger.info(f"Generating {num_dummy_patients} dummy patients") patients = generate_cc_dummy_patient_cases(project_slug, cc_user_name, cc_api_key, num_dummies=num_dummy_patients) expected_batches = ceil( len(valid_normalized_contacts_data) / MAX_CONTACTS_PER_PARENT_PATIENT) logger.info(f"Processing contacts in {expected_batches} " f"{'batch' if expected_batches == 1 else 'batches'} of " f"{MAX_CONTACTS_PER_PARENT_PATIENT} contacts per batch.") created_contacts = [] for i, batch in enumerate( chunk_list(valid_normalized_contacts_data, MAX_CONTACTS_PER_PARENT_PATIENT)): batch_num = i + 1 logger.info( f"Processing batch {batch_num} of {expected_batches} consisting of " f"{len(batch)} contacts.") parent_id = patients.pop() prepped_contacts = [ generate_commcare_contact_data(contact, parent_id, **contact_kwargs) for contact in batch ] try: logger.info( f"Uploading contacts from batch {batch_num} to CommCare") upload_data_to_commcare( prepped_contacts, project_slug, "contact", "case_id", cc_user_name, cc_api_key, ) logger.info(f"Retrieving parent case with case_id `{parent_id}` " f"for batch {batch_num}") parent_case = get_commcare_case( parent_id, project_slug, cc_user_name, cc_api_key, include_child_cases=True, ) for k in parent_case["child_cases"]: created_contacts.append(( parent_case["child_cases"][k]["properties"]["contact_id"], parent_case["child_cases"][k]["case_id"], )) # This is a rare exception (hah!) where a catch all except block is a good idea. # If there are multiple batches to be processed, and early ones succeed, but # a later one fails, we want to return a result to the calling context so a # report can be generated indicating which contacts were succesfully uploaded. # This will make it possible to remove rows that were succesfully uploaded from # the originally supplied data and try again later, without generating duplicate # case data in CommCare. except Exception: logger.exception( "[upload_legacy_contacts_to_commcare] Something went wrong") result = {} for item in created_contacts: result[item[0]] = item[1] return result
def main_with_args( commcare_user_name, commcare_api_key, commcare_project_name, redcap_api_url, redcap_api_key, external_id_col, phone_cols, state_file, sync_all, ): """ Script to download case and contact records for the given `redcap_api_url` and `redcap_api_key` and upload them to the provided `commcare_project_name` via CommCare's bulk upload API. Args: commcare_user_name (str): The Commcare username (email address) commcare_api_key (str): A Commcare API key for the user commcare_project_name (str): The Commcare project to which contacts will be imported redcap_api_url (str): The URL to the REDCap API server redcap_api_key (str): The REDCap API key external_id_col (str): The name of the column in REDCap that contains the external_id for CommCare phone_cols (list): List of phone columns that should be normalized for CommCare state_file (str): File path to a local file where state about this sync can be kept sync_all (bool): If set, ignore the date_begin in the state_file and sync all records """ # Try to avoid starting a second process, if one is already going # (this approach is not free of race conditions, but should catch # the majority of accidental duplicate runs). state = get_redcap_state(state_file) if state["in_progress"]: raise ValueError("There may be another process running. Exiting.") state["in_progress"] = True save_redcap_state(state, state_file) try: # Save next_date_begin before retrieving records so we don't miss any # on the next run (this might mean some records are synced twice, but # that's better than never at all). next_date_begin = datetime.now() logger.info("Retrieving and cleaning data from REDCap...") redcap_project = redcap.Project(redcap_api_url, redcap_api_key) redcap_records = redcap_project.export_records( # date_begin corresponds to the dateRangeBegin field in the REDCap # API, which "return[s] only records that have been created or modified # *after* a given date/time." Note that REDCap expects this to be in # server time, so the script and server should be run in the same time # zone (or this script modified to accept a timezone argument). date_begin=state["date_begin"] if not sync_all else None, # Tell PyCap to return a pandas DataFrame. format="df", df_kwargs={ # Without index_col=False, read_csv() will use the first column # ("record_id") as the index, which is problematic because it's # not unique and is easier to handle as a separate column anyways. "index_col": False, # We import everything as a string, to avoid pandas coercing ints # to floats and adding unnecessary decimal points in the data when # uploaded to CommCare. "dtype": str, }, ) if len(redcap_records.index) == 0: logger.info("No records returned from REDCap; aborting sync.") else: cases_df, contacts_df = ( redcap_records.pipe(collapse_checkbox_columns) .pipe(normalize_phone_cols, phone_cols) .pipe(split_cases_and_contacts, external_id_col) ) logger.info( f"Uploading {len(cases_df.index)} found patients (cases) to CommCare..." ) upload_data_to_commcare( cases_df, commcare_project_name, "patient", "external_id", commcare_user_name, commcare_api_key, create_new_cases="off", search_field="external_id", ) if len(contacts_df.index) > 0: # FIXME: The contact columns don't appear to match directly to CommCare, and # will need to be renamed before being imported. logger.warning( f"Found {len(contacts_df.index)} contacts, but contact sync not implemented." ) state["date_begin"] = next_date_begin finally: # Whatever happens, don't keep our lock open. state["in_progress"] = False save_redcap_state(state, state_file) logger.info("Sync done.")
def main_with_args( db_url, commcare_user_name, commcare_api_key, commcare_project_name, twilio_sid, twilio_token, case_type, search_column, batch_size=100, ): """The main routine Args: db_url (str): the db connection URL commcare_user_name (str): The Commcare username (email address) commcare_api_key (str): A Commcare API key for the user commcare_project_name (str): The Commcare project being exported from twilio_sid (str): A Twilio SID twilio_token (str): A Twilio auth token case_type (str): Case type and table name in db_url that should be queried for cases with a missing SMS capability property search_column (str): : The name of the column in the db for contact that CommCare will match against in the bulk upload step. See https://confluence.dimagi.com/display/commcarepublic/Bulk+Upload+Case+Data batch_size (int): The size to batch process requests in. Each batch_size batch will be looked up in Twilio, and then script attempts to upload the results for that batch to CommCare, before moving on to next batch. """ unprocessed = get_unprocessed_phone_numbers(db_url, case_type, search_column) logger.info(f"{len(unprocessed)} unprocessed {case_type}(s) found") expected_batches = ceil(len(unprocessed) / batch_size) logger.info( f"Processing {case_type}(s) in {expected_batches} " f"{'batch' if expected_batches == 1 else 'batches'} of {batch_size} {case_type}(s) " f"per batch.") for i, subset in enumerate(chunk_list(unprocessed, batch_size)): batch_num = i + 1 logger.info( f"Processing batch {batch_num} of {expected_batches} consisting of " f"{len(subset)} {case_type}(s).") try: contacts_data = cleanup_processed_records_with_numbers( process_records( subset, search_column, twilio_sid, twilio_token, )) except Exception as exc: logger.error(f"Something unexpected happened: {exc.message}") raise exc logger.info( f"Uploading SMS capability status for {len(contacts_data)} {case_type}(s) from " f"batch {batch_num} of {expected_batches} to CommCare.") upload_data_to_commcare( contacts_data, commcare_project_name, case_type, search_column, commcare_user_name, commcare_api_key, "off", file_name_prefix="twilio_sms_capability_", )