示例#1
0
    automated_analysis_output_dir = args.automated_analysis_output_dir

    IOUtils.ensure_dirs_exist(automated_analysis_output_dir)
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/maps/regions")
    IOUtils.ensure_dirs_exist(
        f"{automated_analysis_output_dir}/maps/districts")
    IOUtils.ensure_dirs_exist(
        f"{automated_analysis_output_dir}/maps/mogadishu")
    IOUtils.ensure_dirs_exist(f"{automated_analysis_output_dir}/graphs")

    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    sys.setrecursionlimit(30000)
    # Read the messages dataset
    log.info(
        f"Loading the messages dataset from {messages_json_input_path}...")
    with open(messages_json_input_path) as f:
        messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
        for i in range(len(messages)):
            messages[i] = dict(messages[i].items())
    log.info(f"Loaded {len(messages)} messages")

    # Read the individuals dataset
    log.info(
        f"Loading the individuals dataset from {individuals_json_input_path}..."
    )
示例#2
0
    for msg in messages:
        if msg.urn.startswith("tel:"):
            operator = PhoneCleaner.clean_operator(msg.urn.split(":")[1])
        else:
            operator = msg.urn.split(":")[0]
        if operator == target_operator and msg.direction == target_message_direction:
            msg_sent_on_timestamps.append(msg.sent_on)
    msg_sent_on_timestamps.append(end_date)

    computed_windows_of_downtime = []
    # Compute the time difference between two consecutive messages i.e `PreviousMessageTimestamp` and
    # `NextMessageTimestamp` to get the window of time without a message and relate each time difference
    #  with the operator and the message direction.
    for index, time_in_range in enumerate(msg_sent_on_timestamps):
        log.debug(
            f"Computing window of time without messages {index + 1}/{len(msg_sent_on_timestamps)}..."
        )

        max_allowable_index = len(msg_sent_on_timestamps) - 1
        if index < max_allowable_index:
            next_index = index + 1
        else:
            continue

        time_diff = msg_sent_on_timestamps[next_index] - \
            msg_sent_on_timestamps[index]
        computed_windows_of_downtime.append({
            "Operator":
            target_operator,
            "MessageDirection":
            target_message_direction,
示例#3
0
        if field.key not in target_field_keys:
            target_instance.create_field(field.label)
    log.info("Contact fields copied")

    log.info("Fetching all contacts from the source instance...")
    contacts = source_instance.get_raw_contacts()
    log.info(f"Fetched {len(contacts)} contacts")

    log.info("Updating contacts in the target instance...")
    # Update each contact's name and fields.
    # Language, groups, blocked, and stopped properties are not touched.
    multiple_urns_count = 0
    telephone_with_no_country_code_count = 0
    updated_count = 0
    for i, contact in enumerate(contacts):
        log.debug(f"Updating contact {i + 1}/{len(contacts)}...")
        if len(contact.urns) != 1:
            log.warning(
                f"Found a contact in the source instance with multiple URNS. "
                f"The RapidPro UUID is '{contact.uuid}'")
            multiple_urns_count += 1
            continue
        if contact.urns[0].startswith(
                "tel:") and not contact.urns[0].startswith("tel:+"):
            log.warning(
                f"Found a contact in the source instance with a telephone number that has no country "
                f"code; skipping. The RapidPro UUID is '{contact.uuid}'")
            telephone_with_no_country_code_count += 1
            continue
        if contact.name == "":
            contact.name = None
示例#4
0
        drive_client_wrapper.init_client_from_info(credentials_info)

    # Load phone number <-> UUID table
    print("Loading Phone Number <-> UUID Table...")
    with open(phone_number_uuid_table_path, "r") as f:
        phone_number_uuid_table = PhoneNumberUuidTable.load(f)

    # Load messages
    messages_datasets = []
    for i, activation_flow_name in enumerate(
            pipeline_configuration.activation_flow_names):
        raw_activation_path = f"{raw_data_dir}/{activation_flow_name}.jsonl"
        log.info(f"Loading {raw_activation_path}...")
        with open(raw_activation_path, "r") as f:
            messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
        log.debug(f"Loaded {len(messages)} messages")
        messages_datasets.append(messages)

    # Load surveys
    survey_datasets = []
    for i, survey_flow_name in enumerate(
            pipeline_configuration.survey_flow_names):
        raw_survey_path = f"{raw_data_dir}/{survey_flow_name}.jsonl"
        log.info(f"Loading {raw_survey_path}...")
        with open(raw_survey_path, "r") as f:
            messages = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
        log.debug(f"Loaded {len(messages)} messages")
        survey_datasets.append(messages)

    # Add survey data to the messages
    print("Combining Datasets...")
示例#5
0
    # Load the pipeline configuration file
    print("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(f)

    # Load phone number <-> UUID table
    print("Loading Phone Number <-> UUID Table...")
    with open(phone_number_uuid_table_path, "r") as f:
        phone_number_uuid_table = PhoneNumberUuidTable.load(f)

    # Load demographics
    log.info("Loading Demographics...")
    with open(demog_input_path, "r") as f:
        demographics = TracedDataJsonIO.import_json_to_traced_data_iterable(f)
    log.debug(f"Loaded {len(demographics)} contacts")

    # Load messages
    messages_datasets = []
    for i, path in enumerate(message_paths):
        print("Loading Episode {}/{}...".format(i + 1, len(message_paths)))
        with open(path, "r") as f:
            messages_datasets.append(TracedDataJsonIO.import_json_to_traced_data_iterable(f))
    
    # Add survey data to the messages
    print("Combining Datasets...")
    # The "demographics" dataset contains the full list of participants as the original contact dataset, so, confusingly, it is passed 
    # as the "messages" parameter and will be updated with the messages.
    data = CombineRawDatasets.combine_raw_datasets(user, [demographics], messages_datasets)

    print("Translating Rapid Pro Keys...")
示例#6
0
    # Update contacts present in both workspaces
    identical_contacts = 0
    skipped_contacts = 0
    updated_contacts_in_workspace_1 = 0
    updated_contacts_in_workspace_2 = 0
    urns_in_both_workspaces = workspace_1_contacts_lut.keys(
    ) & workspace_2_contacts_lut.keys()
    for i, urn in enumerate(sorted(urns_in_both_workspaces)):
        contact_v1 = workspace_1_contacts_lut[urn]
        contact_v2 = workspace_2_contacts_lut[urn]

        if contact_v1.name == contact_v2.name and contact_v1.fields == contact_v2.fields:
            log.debug(
                f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: "
                f"Contacts identical. "
                f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; "
                f"'{contact_v2.uuid}' in {workspace_2_name})")
            identical_contacts += 1
            continue

        # Contacts differ
        if not force_update:
            log.warning(
                f"Synchronising contacts in both workspaces {i + 1}/{len(urns_in_both_workspaces)}: "
                f"Contacts differ, but not overwriting. Use --force to write the latest everywhere. "
                f"(Rapid Pro UUIDs are '{contact_v1.uuid}' in {workspace_1_name}; "
                f"'{contact_v2.uuid}' in {workspace_2_name})")
            skipped_contacts += 1
            continue