Exemplo n.º 1
0
        log.info(f"Removed {removed} uuids; {len(uuids)} remain")

    # Convert the uuids to phone numbers
    log.info(f"Converting {len(uuids)} uuids to phone numbers...")
    uuid_phone_number_lut = phone_number_uuid_table.uuid_to_data_batch(uuids)
    phone_numbers = set()
    skipped_uuids = set()
    for uuid in uuids:
        # Some uuids are no longer re-identifiable due to a uuid table consistency issue between OCHA and WorldBank-PLR
        if uuid in uuid_phone_number_lut:
            phone_numbers.add(f"+{uuid_phone_number_lut[uuid]}")
        else:
            skipped_uuids.add(uuid)
    log.info(
        f"Successfully converted {len(phone_numbers)} uuids to phone numbers.")
    log.warning(f"Unable to re-identify {len(skipped_uuids)} uuids")

    # Export contacts CSV
    log.warning(
        f"Exporting {len(phone_numbers)} phone numbers to {csv_output_file_path}..."
    )
    with open(csv_output_file_path, "w") as f:
        writer = csv.DictWriter(f,
                                fieldnames=["URN:Tel", "Name"],
                                lineterminator="\n")
        writer.writeheader()

        for n in phone_numbers:
            writer.writerow({"URN:Tel": n})
        log.info(
            f"Wrote {len(phone_numbers)} contacts to {csv_output_file_path}")
Exemplo n.º 2
0
        title="Participants/Episode",
        width=len(engagement_counts) * 20 + 150)
    fig.update_xaxes(tickangle=-60)
    fig.write_image(
        f"{automated_analysis_output_dir}/graphs/participants_per_episode.png",
        scale=IMG_SCALE_FACTOR)

    log.info("Graphing the demographic distributions...")
    for plan in PipelineConfiguration.DEMOG_CODING_PLANS:
        for cc in plan.coding_configurations:
            if cc.analysis_file_key is None:
                continue

            if len(cc.code_scheme.codes) > 200:
                log.warning(
                    f"Skipping graphing the distribution of codes for {cc.analysis_file_key}, because it "
                    f"contains too many columns to graph (has {len(cc.code_scheme.codes)} columns; "
                    f"limit is 200).")
                continue

            log.info(
                f"Graphing the distribution of codes for {cc.analysis_file_key}..."
            )
            fig = px.bar([{
                "Label":
                code.string_value,
                "Number of Participants":
                demographic_distributions[cc.analysis_file_key][code.code_id]
            } for code in cc.code_scheme.codes
                          if code.control_code != Codes.STOP],
                         x="Label",
                         y="Number of Participants",
    uploaded_memory_logs = google_cloud_utils.list_blobs(
        google_cloud_credentials_file_path,
        pipeline_configuration.memory_profile_upload_bucket,
        pipeline_configuration.bucket_dir_path,
    )
    uploaded_memory_log_dates = get_uploaded_file_dates(
        uploaded_memory_logs, date_pattern)

    uploaded_data_archives = google_cloud_utils.list_blobs(
        google_cloud_credentials_file_path,
        pipeline_configuration.data_archive_upload_bucket,
        pipeline_configuration.bucket_dir_path)
    uploaded_data_archives_dates = get_uploaded_file_dates(
        uploaded_data_archives, date_pattern)

    log.warning(f"Deleting old memory profile files from local disk...")
    delete_old_log_files(memory_profile_dir_path, uploaded_memory_log_dates)
    log.warning(f"Deleting old data archives files from local disk...")
    delete_old_log_files(data_archive_dir_path, uploaded_data_archives_dates)

    log.info(f"Uploading memory profile files...")
    memory_log_files_by_date = get_files_by_date(memory_profile_dir_path,
                                                 uploaded_memory_log_dates)
    for file_date in memory_log_files_by_date:
        latest_memory_log_file_path = max(memory_log_files_by_date[file_date],
                                          key=os.path.getmtime)
        memory_profile_upload_location = f"{pipeline_configuration.memory_profile_upload_bucket}/" \
                                         f"{pipeline_configuration.bucket_dir_path}/{os.path.basename(latest_memory_log_file_path)}"
        log.info(
            f"Uploading memory profile from {latest_memory_log_file_path} to {memory_profile_upload_location}..."
        )
Exemplo n.º 4
0
                 x="Episode", y="Total Messages with Opt-Ins", template="plotly_white",
                 title="Messages/Episode", width=len(engagement_counts) * 20 + 150)
    fig.update_xaxes(tickangle=-60)
    fig.write_image(f"{output_dir}/graphs/messages_per_episode.png", scale=IMG_SCALE_FACTOR)

    # Graph the number of participants in each episode
    fig = px.bar([x for x in engagement_counts.values() if x["Episode"] != "Total"],
                 x="Episode", y="Total Participants with Opt-Ins", template="plotly_white",
                 title="Participants/Episode", width=len(engagement_counts) * 20 + 150)
    fig.update_xaxes(tickangle=-60)
    fig.write_image(f"{output_dir}/graphs/participants_per_episode.png", scale=IMG_SCALE_FACTOR)

    log.info("Graphing the demographic distributions...")
    for demographic, counts in demographic_distributions.items():
        if len(counts) > 200:
            log.warning(f"Skipping graphing the distribution of codes for {demographic}, but is contains too many "
                        f"columns to graph (has {len(counts)} columns; limit is 200).")
            continue

        log.info(f"Graphing the distribution of codes for {demographic}...")
        fig = px.bar([{"Label": code_string_value, "Number of Participants": number_of_participants}
                      for code_string_value, number_of_participants in counts.items()],
                     x="Label", y="Number of Participants", template="plotly_white",
                     title=f"Season Distribution: {demographic}", width=len(counts) * 20 + 150)
        fig.update_xaxes(type="category", tickangle=-60, dtick=1)
        fig.write_image(f"{output_dir}/graphs/season_distribution_{demographic}.png", scale=IMG_SCALE_FACTOR)

    # Plot the per-season distribution of responses for each survey question, per individual
    for plan in PipelineConfiguration.RQA_CODING_PLANS + PipelineConfiguration.SURVEY_CODING_PLANS:
        for cc in plan.coding_configurations:
            if cc.analysis_file_key is None:
                continue
Exemplo n.º 5
0
    google_cloud_credentials_file_path = args.google_cloud_credentials_file_path
    rapid_pro_domain = args.rapid_pro_domain
    rapid_pro_token_file_url = args.rapid_pro_token_file_url
    output_file_path = args.output_file_path

    log.info("Downloading the Rapid Pro access token...")
    rapid_pro_token = google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path, rapid_pro_token_file_url).strip()

    rapid_pro = RapidProClient(rapid_pro_domain, rapid_pro_token)

    all_messages = rapid_pro.get_raw_messages()
    inbound_messages = [msg for msg in all_messages if msg.direction == "in"]

    inbound_phone_numbers = set()
    for msg in inbound_messages:
        if msg.urn.startswith("tel:"):
            phone_number = msg.urn.split(":")[1]
            inbound_phone_numbers.add(phone_number)
        else:
            log.warning(f"Skipped non-telephone URN type {msg.urn.split(':')[0]}")

    log.warning(f"Exporting {len(inbound_phone_numbers)} inbound phone numbers to {output_file_path}...")
    with open(output_file_path, "w") as f:
        writer = csv.DictWriter(f, fieldnames=["URN:Tel", "Name"])
        writer.writeheader()
        for number in inbound_phone_numbers:
            writer.writerow({"URN:Tel": number, "Name": ""})
    log.info(f"Done. Wrote {len(inbound_phone_numbers)} inbound phone numbers to {output_file_path}")
Exemplo n.º 6
0
    log.info("Fetching all contacts from the source instance...")
    contacts = source_instance.get_raw_contacts()
    log.info(f"Fetched {len(contacts)} contacts")

    log.info("Updating contacts in the target instance...")
    # Update each contact's name and fields.
    # Language, groups, blocked, and stopped properties are not touched.
    multiple_urns_count = 0
    telephone_with_no_country_code_count = 0
    updated_count = 0
    for i, contact in enumerate(contacts):
        log.debug(f"Updating contact {i + 1}/{len(contacts)}...")
        if len(contact.urns) != 1:
            log.warning(
                f"Found a contact in the source instance with multiple URNS. "
                f"The RapidPro UUID is '{contact.uuid}'")
            multiple_urns_count += 1
            continue
        if contact.urns[0].startswith(
                "tel:") and not contact.urns[0].startswith("tel:+"):
            log.warning(
                f"Found a contact in the source instance with a telephone number that has no country "
                f"code; skipping. The RapidPro UUID is '{contact.uuid}'")
            telephone_with_no_country_code_count += 1
            continue
        if contact.name == "":
            contact.name = None
        target_instance.update_contact(contact.urns[0], contact.name,
                                       contact.fields)
        updated_count += 1
    # Load the REACH traced data
    log.info(f"Loading REACH traced data from file '{traced_data_path}'...")
    with open(traced_data_path, "r") as f:
        data = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    log.info(f"Loaded {len(data)} traced data objects")

    # Search the TracedData for consenting contacts
    log.info("Searching for consenting uuids...")
    consenting_uuids = set()
    for td in data:
        if td["withdrawn_consent"] == Codes.TRUE:
            continue
        consenting_uuids.add(td["UID"])
    log.info(f"Found {len(consenting_uuids)} consenting uuids")

    # Convert the uuids to phone numbers
    log.info("Converting the uuids to phone numbers...")
    phone_numbers = [f"+{phone_number_uuid_table.get_phone(uuid)}" for uuid in consenting_uuids]

    log.warning(f"Exporting {len(phone_numbers)} phone numbers to {output_path}...")
    with open(output_path, "w") as f:
        writer = csv.DictWriter(f, fieldnames=["URN:Tel", "Name"], lineterminator="\n")
        writer.writeheader()

        for n in phone_numbers:
            writer.writerow({
                "URN:Tel": n
            })

        log.info(f"Wrote {len(phone_numbers)} contacts to {output_path}")
        contact_v1 = instance_1_contacts_lut[urn]
        contact_v2 = instance_2_contacts_lut[urn]

        if contact_v1.name == contact_v2.name and contact_v1.fields == contact_v2.fields:
            log.info(
                f"Synchronising contacts in both instances {i + 1}/{len(urns_in_both_instances)}: "
                f"Contacts identical."
                f"Rapid Pro UUIDs are '{contact_v1.uuid}' in instance 1; '{contact_v2.uuid}' in instance 2"
            )
            continue

        # Contacts differ
        if not force_update:
            log.warning(
                f"Synchronising contacts in both instances {i + 1}/{len(urns_in_both_instances)}: "
                f"Contacts differ, but not overwriting. Use --force to write the latest everywhere. "
                f"Rapid Pro UUIDs are '{contact_v1.uuid}' in instance 1; '{contact_v2.uuid}' in instance 2"
            )
            continue

        # Assume the most recent contact is correct
        # IMPORTANT: If the same contact has been changed on both Rapid Pro instances since the last sync was performed,
        #            the older changes will be overwritten.
        if contact_v1.modified_on > contact_v2.modified_on:
            log.info(
                f"Synchronising contacts in both instances {i + 1}/{len(urns_in_both_instances)}: "
                f"Contacts differ, overwriting the contact in instance 2 with the more recent one in instance 1. "
                f"Rapid Pro UUIDs are '{contact_v1.uuid}' in instance 1; '{contact_v2.uuid}' in instance 2"
            )
            instance_2.update_contact(urn, contact_v1.name, contact_v1.fields)
        else:
            individuals = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(
                f)
        log.info(f"Loaded {len(individuals)} individuals")

        for ind in individuals:
            if ind["consent_withdrawn"] == Codes.TRUE:
                continue
            advert_uids.add(ind['uid'])

    # Convert the uuids to phone numbers
    log.info("Converting the uuids to phone numbers...")
    uuids_to_phone_numbers = phone_number_uuid_table.uuid_to_data_batch(
        list(advert_uids))
    advert_contacts = [
        f"+{uuids_to_phone_numbers[uuid]}" for uuid in advert_uids
    ]

    # Export contacts CSV
    log.warning(
        f"Exporting {len(advert_contacts)} phone numbers to {csv_output_file_path}..."
    )
    with open(csv_output_file_path, "w") as f:
        writer = csv.DictWriter(f,
                                fieldnames=["URN:Tel", "Name"],
                                lineterminator="\n")
        writer.writeheader()
        for n in advert_contacts:
            writer.writerow({"URN:Tel": n})
        log.info(
            f"Wrote {len(advert_contacts)} contacts to {csv_output_file_path}")
Exemplo n.º 10
0
        contact_v2 = workspace_2_contacts_lut[urn]

        if contact_v1.name == contact_v2.name and contact_v1.fields == contact_v2.fields:
            log.debug(
                f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: "
                f"Contacts identical. "
                f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; "
                f"'{contact_v2.uuid}' in {workspace_2_name})")
            identical_contacts += 1
            continue

        # Contacts differ
        if not force_update:
            log.warning(
                f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: "
                f"Contacts differ, but not overwriting. Use --force to write the latest everywhere. "
                f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; "
                f"'{contact_v2.uuid}' in {workspace_2_name})")
            skipped_contacts += 1
            continue

        # Assume the most recent contact is correct
        # IMPORTANT: If the same contact has been changed on both Rapid Pro workspaces since the last sync was
        #            performed, the older changes will be overwritten.
        if contact_v1.modified_on > contact_v2.modified_on:
            if workspaces_to_update in {"2", "both"}:
                log.info(
                    f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: "
                    f"Contacts differ, overwriting the contact in {workspace_2_name} with the more recent one in "
                    f"{workspace_1_name}. "
                    f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; "
    id_tables = FirestoreUuidInfrastructure.init_from_credentials(firestore_uuid_table_credentials)
    if len(table_names) == 0:
        table_names = id_tables.list_table_names()
    log.info(f"Found {len(table_names)} uuid tables to export")

    export = dict()  # of table_name -> {mappings: dict of data -> uuid}
    for i, table_name in enumerate(table_names):
        log.info(f"Fetching mappings from table {i + 1}/{len(table_names)}: {table_name}...")
        mappings = id_tables.get_table(table_name, None).get_all_mappings()
        export[table_name] = {
            "mappings": mappings
        }
        log.info(f"Fetched {len(mappings)} mappings")

    log.info(f"Converting fetched data to zipped json for export...")
    json_blob = json.dumps(export)
    export_compressed = gzip.compress(bytes(json_blob, "utf-8"))

    if gzip_export_file_path is not None:
        log.warning(f"Writing mappings to local disk at '{gzip_export_file_path}'...")
        with open(gzip_export_file_path, "wb") as f:
            f.write(export_compressed)

    if gcs_upload_path is not None:
        log.info(f"Uploading the mappings to {gcs_upload_path}...")
        google_cloud_utils.upload_string_to_blob(google_cloud_credentials_file_path, gcs_upload_path,
                                                 export_compressed)

    log.info(f"Export complete ({len(table_names)} table(s))")
    for phone_number in somali_phone_numbers:
        advert_contacts[phone_number] = {
            "URN:Tel": phone_number,
            "Name": None,
            "Language": 'som'
        }
    for phone_number in english_phone_numbers:
        advert_contacts[phone_number] = {
            "URN:Tel": phone_number,
            "Name": None,
            "Language": 'eng'
        }
    for phone_number in swahili_phone_numbers:
        advert_contacts[phone_number] = {
            "URN:Tel": phone_number,
            "Name": None,
            "Language": 'swh'
        }

    log.warning(
        f"Exporting {len(advert_contacts)} contacts to {contacts_csv_path}")
    with open(contacts_csv_path, "w") as f:
        headers = ["URN:Tel", "Name", "Language"]
        writer = csv.DictWriter(f, fieldnames=headers, lineterminator="\n")
        writer.writeheader()
        for phone_number in advert_contacts.values():
            writer.writerow(phone_number)

        log.info(
            f"Wrote {len(advert_contacts)} contacts to {contacts_csv_path}")