示例#1
0
def main_with_args(case_summary_file, case_types, output_file_path, state_dir):
    """Do everything required to export and report on commcare export
    Args:
        case_summary_file (str): File path to CommCare app case summary
        case_types (list): The case types (i.e, "contact", "lab_result, etc.)
        output_file_path (str): Where the workbook with source-column mappings lives
    """
    logger.info(
        f"Retrieving data from {case_summary_file} and extracting column names"
    )
    properties_by_type = extract_property_names(case_summary_file, case_types)

    new_mappings = {
        case_type: generate_source_target_mappings(properties)
        for case_type, properties in properties_by_type.items()
    }

    if state_dir:
        for case_type, new_mapping in new_mappings.items():
            previous_mapping = get_previous_mapping(state_dir, case_type)
            if set(previous_mapping) != set(new_mapping):
                save_column_state(state_dir, case_type, new_mapping)

    logger.info(f"Generating a temporary Excel workbook to {output_file_path}")
    new_mappings = {make_sql_friendly(k): v for (k, v) in new_mappings.items()}
    wb = make_commcare_export_sync_xl_wb(new_mappings)
    wb.save(output_file_path)
def load_app_case_types_with_properties_from_json(file_path):
    """Load JSON file with data stored from the application structure API endpoint

    Args:
        file_path (str): the path to a JSON file containing the data
    """
    logger.info(f"Loading application structure data from {file_path}")
    with open(file_path) as fl:
        return json.load(fl)
def get_app_case_types_with_properties_from_api(
    commcare_project_name,
    commcare_user_name,
    commcare_api_key,
    commcare_app_id,
    app_structure_api_timeout,
    app_structure_json_save_folder_path=None,
):
    """Get data about each case type and its known properties (historical and current)
        from the Application Structure API.

    Args:

        commcare_user_name (str): The Commcare username (email address)
        commcare_api_key (str): A Commcare API key for the user
        commcare_project_name (str): The Commcare project to which contacts will be
            imported
        commcare_app_id (str): The ID of the Commcare app.
        app_structure_api_timeout (int): Optional. If provided will override default
            timeout for the call to Application Structure API (which tends to take a
            while)
        app_structure_json_save_folder_path (str): Optional. If provided, the JSON
            returned by the call to the Application Structure API will be saved as a
            JSON file in this folder.
    Returns:
        dict: Whose keys are case types and whose values are lists of property names.
            For instance {
                "patient": ["first_name", "last_name", "etc.],
                "contact": ["first_name", "last_name", "phone_number", "etc".]
            }
    """
    logger.info(
        f"Retrieving application structure for {commcare_project_name} with ID: "
        f"{commcare_app_id} from API. This may take a while.")
    app_structure = get_application_structure(
        commcare_project_name,
        commcare_user_name,
        commcare_api_key,
        commcare_app_id,
        app_structure_api_timeout,
    )
    normalized_structure = normalize_application_structure_response_data(
        app_structure)
    if app_structure_json_save_folder_path:
        save_app_structure_json(normalized_structure,
                                app_structure_json_save_folder_path)
    return normalized_structure
def save_app_structure_json(structure, save_folder):
    """Save the app structure as a JSON file, twice over.

    The file will be saved once with a date+time based name, and a second time with
    "latest" in the file name. So, for instance, "app_structure_10_20_20_11-43.json"
    and "app_structure_latest.json".

    Args:
        structure (dict): The dictionary to be saved as JSON
        save_folder (str): The folder where the files will be saved.
    """
    date_file_name = f"app_structure_{datetime.now().strftime('%m_%d_%Y_%H-%M')}.json"
    latest_file_name = "app_structure_latest.json"
    for file_name in (date_file_name, latest_file_name):
        full_path = PurePath(save_folder).joinpath(file_name)
        logger.info(
            f"Saving normalized application structure data at {full_path}")
        with open(full_path, "w") as fl:
            json.dump(structure, fl)
def main_with_args(
    commcare_user_name,
    commcare_api_key,
    commcare_project_name,
    commcare_app_id,
    db_url,
    case_types=None,
    existing_app_structure_json=None,
    app_structure_json_save_folder_path=None,
    app_structure_api_timeout=None,
    commcare_export_script_options=None,
    commcare_export_script_flags=None,
):
    """The main routine.

    Args:
        commcare_user_name (str): The Commcare username (email address)
        commcare_api_key (str): A Commcare API key for the user
        commcare_project_name (str): The Commcare project to which contacts will be
            imported
        commcare_app_id (str): The ID of the Commcare app.
        db_url (str): Connection string for the db
        case_types (list): Optional. List of case types. If provided, only the provided
            case types will be synced.
        existing_app_structure_json (str): Optional. Path to a JSON blob storing data
            returned by the CommCare Application Structure API endpoint.
        app_structure_json_save_folder_path (str): Optional. If provided, the JSON blob
            saved by a call to the Application Structure API will be saved in a file
            here.
        app_structure_api_timeout (int): Optional. If provided will override default
            timeout for the call to Application Structure API (
            which tends to take a while)
        commcare_export_script_options (dict): Optional. A dict of additional args to
            get passed to the `commcare-export` subprocess as command line options.
        commcare_export_script_flags (list): Optional. A list of command line flags
            (with no args) to pass to `commcare-export` subprocess.
    """
    case_types = case_types if case_types else []

    all_case_types_with_properties = (
        load_app_case_types_with_properties_from_json(
            existing_app_structure_json) if existing_app_structure_json
        # NB: This API call can take a long time: ~2-3 minutes
        else get_app_case_types_with_properties_from_api(
            commcare_project_name,
            commcare_user_name,
            commcare_api_key,
            commcare_app_id,
            app_structure_api_timeout,
            app_structure_json_save_folder_path,
        ))
    # if person running script used the `--case-types` property and some of the ones
    # they asked for weren't avaiable, we'll use this to notify them in the logs
    unfound_requested_case_types = list(
        set(case_types).difference(
            set([k for k in all_case_types_with_properties.keys()])))
    if case_types and len(unfound_requested_case_types) == len(case_types):
        logger.warn("None of the case types you requested were found")
        return
    if unfound_requested_case_types:
        logger.warn(
            f"Some case types were not found: {', '.join(unfound_requested_case_types)}"
        )
        logger.info("Will continue processing the other requested case types")
    # we'll try to sync the requested case types minus the unfound ones if subset
    # requested, and if no subset requested, we'll sync all found case types
    to_sync_case_types = (list(
        set(case_types).difference(
            set(unfound_requested_case_types))) if case_types else list(
                set([k for k in all_case_types_with_properties.keys()])))
    # filter `all_case_types_with_properties` down to only ones that are in our
    # list of `to_sync_case_types`
    to_sync_case_types_with_properties = {
        k: v
        for (k, v) in all_case_types_with_properties.items()
        if k in to_sync_case_types
    }

    mappings = generate_source_field_to_target_column_mappings(
        to_sync_case_types_with_properties)
    # this excel wb file is required by commcare-export which gets called in subprocess
    # by do_commcare_export_to_db
    wb = make_commcare_export_sync_xl_wb(mappings)
    with tempfile.TemporaryDirectory() as tmpdir:
        tmp_file_path = PurePath(tmpdir).joinpath("mapping.xlsx")
        wb.save(tmp_file_path)
        logger.info("Attempting to sync to db")
        do_commcare_export_to_db(
            db_url,
            commcare_project_name,
            tmp_file_path,
            commcare_user_name,
            commcare_api_key,
            commcare_export_script_options,
            commcare_export_script_flags,
        )
    logger.info("I am quite done now.")
示例#6
0
def upload_legacy_contacts_to_commcare(
    valid_normalized_contacts_data,
    project_slug,
    cc_user_name,
    cc_api_key,
    **contact_kwargs,
):
    """Upload a set of legacy contacts to CommCare.

    This function expects that contacts data sent to it has been validated and
    normalized beforehand. This function ultimately returns a dict whose keys are
    `contact_id`s and whose values are CommCare-generated `case_ids` for the associated
    contacts. These mappings can be used in the calling context to generate a report
    that provides URLs to view uploaded cases in CommCare, alongside the original user-
    supplied data.

    Args:
        valid_normalized_contacts_data (list): A list of dicts with user-supplied data
            for contacts to be uploaded. Additionally, each dict must contain a unique
            value for a `contact_id` field, which is not user-supplied, and should be
            dynamically generated in the calling context.
        project_slug (str): The name of the CommCare project (aka "domain")
        cc_user_name (str): Valid CommCare username
        contact_kwargs (dict): Additional key-value pairs to add to each contact.
            This is to support per-CommCare install specific requirements around
            fields that should be included on uploaded legacy-contacts.
    Returns:
        dict: A dict whose keys are 'contact_ids' and whose values are 'case_ids' of
            created contacts
    """
    num_dummy_patients = ceil(
        len(valid_normalized_contacts_data) / MAX_CONTACTS_PER_PARENT_PATIENT)
    logger.info(f"Generating {num_dummy_patients} dummy patients")
    patients = generate_cc_dummy_patient_cases(project_slug,
                                               cc_user_name,
                                               cc_api_key,
                                               num_dummies=num_dummy_patients)
    expected_batches = ceil(
        len(valid_normalized_contacts_data) / MAX_CONTACTS_PER_PARENT_PATIENT)
    logger.info(f"Processing contacts in {expected_batches} "
                f"{'batch' if expected_batches == 1 else 'batches'} of "
                f"{MAX_CONTACTS_PER_PARENT_PATIENT} contacts per batch.")

    created_contacts = []

    for i, batch in enumerate(
            chunk_list(valid_normalized_contacts_data,
                       MAX_CONTACTS_PER_PARENT_PATIENT)):
        batch_num = i + 1
        logger.info(
            f"Processing batch {batch_num} of {expected_batches} consisting of "
            f"{len(batch)} contacts.")
        parent_id = patients.pop()
        prepped_contacts = [
            generate_commcare_contact_data(contact, parent_id,
                                           **contact_kwargs)
            for contact in batch
        ]
        try:
            logger.info(
                f"Uploading contacts from batch {batch_num} to CommCare")
            upload_data_to_commcare(
                prepped_contacts,
                project_slug,
                "contact",
                "case_id",
                cc_user_name,
                cc_api_key,
            )
            logger.info(f"Retrieving parent case with case_id `{parent_id}` "
                        f"for batch {batch_num}")
            parent_case = get_commcare_case(
                parent_id,
                project_slug,
                cc_user_name,
                cc_api_key,
                include_child_cases=True,
            )
            for k in parent_case["child_cases"]:
                created_contacts.append((
                    parent_case["child_cases"][k]["properties"]["contact_id"],
                    parent_case["child_cases"][k]["case_id"],
                ))
        # This is a rare exception (hah!) where a catch all except block is a good idea.
        # If there are multiple batches to be processed, and early ones succeed, but
        # a later one fails, we want to return a result to the calling context so a
        # report can be generated indicating which contacts were succesfully uploaded.
        # This will make it possible to remove rows that were succesfully uploaded from
        # the originally supplied data and try again later, without generating duplicate
        # case data in CommCare.
        except Exception:
            logger.exception(
                "[upload_legacy_contacts_to_commcare] Something went wrong")
    result = {}
    for item in created_contacts:
        result[item[0]] = item[1]
    return result
def main_with_args(
    commcare_user_name,
    commcare_api_key,
    commcare_project_name,
    redcap_api_url,
    redcap_api_key,
    external_id_col,
    phone_cols,
    state_file,
    sync_all,
):
    """
    Script to download case and contact records for the given `redcap_api_url` and
    `redcap_api_key` and upload them to the provided `commcare_project_name` via
    CommCare's bulk upload API.

    Args:
        commcare_user_name (str): The Commcare username (email address)
        commcare_api_key (str): A Commcare API key for the user
        commcare_project_name (str): The Commcare project to which contacts will be imported
        redcap_api_url (str): The URL to the REDCap API server
        redcap_api_key (str): The REDCap API key
        external_id_col (str): The name of the column in REDCap that contains the external_id for CommCare
        phone_cols (list): List of phone columns that should be normalized for CommCare
        state_file (str): File path to a local file where state about this sync can be kept
        sync_all (bool): If set, ignore the date_begin in the state_file and sync all records
    """

    # Try to avoid starting a second process, if one is already going
    # (this approach is not free of race conditions, but should catch
    # the majority of accidental duplicate runs).
    state = get_redcap_state(state_file)
    if state["in_progress"]:
        raise ValueError("There may be another process running. Exiting.")
    state["in_progress"] = True
    save_redcap_state(state, state_file)

    try:
        # Save next_date_begin before retrieving records so we don't miss any
        # on the next run (this might mean some records are synced twice, but
        # that's better than never at all).
        next_date_begin = datetime.now()

        logger.info("Retrieving and cleaning data from REDCap...")
        redcap_project = redcap.Project(redcap_api_url, redcap_api_key)
        redcap_records = redcap_project.export_records(
            # date_begin corresponds to the dateRangeBegin field in the REDCap
            # API, which "return[s] only records that have been created or modified
            # *after* a given date/time." Note that REDCap expects this to be in
            # server time, so the script and server should be run in the same time
            # zone (or this script modified to accept a timezone argument).
            date_begin=state["date_begin"] if not sync_all else None,
            # Tell PyCap to return a pandas DataFrame.
            format="df",
            df_kwargs={
                # Without index_col=False, read_csv() will use the first column
                # ("record_id") as the index, which is problematic because it's
                # not unique and is easier to handle as a separate column anyways.
                "index_col": False,
                # We import everything as a string, to avoid pandas coercing ints
                # to floats and adding unnecessary decimal points in the data when
                # uploaded to CommCare.
                "dtype": str,
            },
        )
        if len(redcap_records.index) == 0:
            logger.info("No records returned from REDCap; aborting sync.")
        else:
            cases_df, contacts_df = (
                redcap_records.pipe(collapse_checkbox_columns)
                .pipe(normalize_phone_cols, phone_cols)
                .pipe(split_cases_and_contacts, external_id_col)
            )
            logger.info(
                f"Uploading {len(cases_df.index)} found patients (cases) to CommCare..."
            )
            upload_data_to_commcare(
                cases_df,
                commcare_project_name,
                "patient",
                "external_id",
                commcare_user_name,
                commcare_api_key,
                create_new_cases="off",
                search_field="external_id",
            )
            if len(contacts_df.index) > 0:
                # FIXME: The contact columns don't appear to match directly to CommCare, and
                # will need to be renamed before being imported.
                logger.warning(
                    f"Found {len(contacts_df.index)} contacts, but contact sync not implemented."
                )
        state["date_begin"] = next_date_begin
    finally:
        # Whatever happens, don't keep our lock open.
        state["in_progress"] = False
        save_redcap_state(state, state_file)
    logger.info("Sync done.")
def main_with_args(
    db_url,
    commcare_user_name,
    commcare_api_key,
    commcare_project_name,
    twilio_sid,
    twilio_token,
    case_type,
    search_column,
    batch_size=100,
):
    """The main routine

    Args:
        db_url (str): the db connection URL
        commcare_user_name (str): The Commcare username (email address)
        commcare_api_key (str): A Commcare API key for the user
        commcare_project_name (str): The Commcare project being exported from
        twilio_sid (str): A Twilio SID
        twilio_token (str): A Twilio auth token
        case_type (str): Case type and table name in db_url that should be queried
            for cases with a missing SMS capability property
        search_column (str): : The name of the column in the db for contact that
            CommCare will match against in the bulk upload step. See
            https://confluence.dimagi.com/display/commcarepublic/Bulk+Upload+Case+Data
        batch_size (int): The size to batch process requests in. Each batch_size batch
            will be looked up in Twilio, and then script attempts to upload the
            results for that batch to CommCare, before moving on to next batch.

    """
    unprocessed = get_unprocessed_phone_numbers(db_url, case_type,
                                                search_column)
    logger.info(f"{len(unprocessed)} unprocessed {case_type}(s) found")
    expected_batches = ceil(len(unprocessed) / batch_size)
    logger.info(
        f"Processing {case_type}(s) in {expected_batches} "
        f"{'batch' if expected_batches == 1 else 'batches'} of {batch_size} {case_type}(s) "
        f"per batch.")
    for i, subset in enumerate(chunk_list(unprocessed, batch_size)):
        batch_num = i + 1
        logger.info(
            f"Processing batch {batch_num} of {expected_batches} consisting of "
            f"{len(subset)} {case_type}(s).")
        try:
            contacts_data = cleanup_processed_records_with_numbers(
                process_records(
                    subset,
                    search_column,
                    twilio_sid,
                    twilio_token,
                ))
        except Exception as exc:
            logger.error(f"Something unexpected happened: {exc.message}")
            raise exc
        logger.info(
            f"Uploading SMS capability status for {len(contacts_data)} {case_type}(s) from "
            f"batch {batch_num} of {expected_batches} to CommCare.")
        upload_data_to_commcare(
            contacts_data,
            commcare_project_name,
            case_type,
            search_column,
            commcare_user_name,
            commcare_api_key,
            "off",
            file_name_prefix="twilio_sms_capability_",
        )