def main(user, google_cloud_credentials_file_path,
         pipeline_configuration_file_path, raw_data_dir):
    # Read the settings from the configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")
    log.info("Initialised the Firestore UUID table")

    log.info(
        f"Fetching data from {len(pipeline_configuration.raw_data_sources)} sources..."
    )
    for i, raw_data_source in enumerate(
            pipeline_configuration.raw_data_sources):
        log.info(
            f"Fetching from source {i + 1}/{len(pipeline_configuration.raw_data_sources)}..."
        )
        if isinstance(raw_data_source, RapidProSource):
            fetch_from_rapid_pro(user, google_cloud_credentials_file_path,
                                 raw_data_dir, phone_number_uuid_table,
                                 raw_data_source)
        elif isinstance(raw_data_source, GCloudBucketSource):
            fetch_from_gcloud_bucket(google_cloud_credentials_file_path,
                                     raw_data_dir, raw_data_source)
        elif isinstance(raw_data_source, RecoveryCSVSource):
            fetch_from_recovery_csv(user, google_cloud_credentials_file_path,
                                    raw_data_dir, phone_number_uuid_table,
                                    raw_data_source)

        else:
            assert False, f"Unknown raw_data_source type {type(raw_data_source)}"
示例#2
0
def main(user, google_cloud_credentials_file_path,
         pipeline_configuration_file_path, raw_data_dir):
    # Read the settings from the configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")
    log.info("Initialised the Firestore UUID table")

    log.info(
        f"Fetching data from {len(pipeline_configuration.raw_data_sources)} sources..."
    )
    for i, raw_data_source in enumerate(
            pipeline_configuration.raw_data_sources):
        log.info(
            f"Fetching from source {i + 1}/{len(pipeline_configuration.raw_data_sources)}..."
        )
        if isinstance(raw_data_source, RapidProSource):
            fetch_from_rapid_pro(user, google_cloud_credentials_file_path,
                                 raw_data_dir, phone_number_uuid_table,
                                 raw_data_source)
        else:
            assert False, f"Unknown raw_data_source type {type(raw_data_source)}"

    # Fetch de-identified listening group CSVs
    log.info(f"Fetching listening group CSVs")
    fetch_listening_groups_csvs(google_cloud_credentials_file_path,
                                pipeline_configuration, raw_data_dir)
示例#3
0
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)
    Logger.set_project_name(pipeline_configuration.pipeline_name)
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")
    log.info("Initialised the Firestore UUID table")

    uuids = set()
    skipped_nr = 0
    for path in traced_data_paths:
        # Load the traced data
        log.info(f"Loading previous traced data from file '{path}'...")
        with open(path) as f:
            data = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
        log.info(f"Loaded {len(data)} traced data objects")

        for td in data:
            if td["consent_withdrawn"] == Codes.TRUE:
                continue
    log.debug(f"Pipeline name is {pipeline_configuration.pipeline_name}")

    log.info("Downloading Rapid Pro access token...")
    rapid_pro_token = google_cloud_utils.download_blob_to_string(
        google_cloud_credentials_file_path, rapid_pro_token_url).strip()

    rapid_pro = RapidProClient(rapid_pro_domain, rapid_pro_token)

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")
    log.info("Initialised the Firestore UUID table")

    log.info(f"Loading the uuids that are safe to send to")
    with open(avf_uuid_file_path) as f:
        safe_uuids = json.load(f)
    log.info(f"Loaded {len(safe_uuids)} uuids")

    log.info(f"Re-identifying the uuids")
    safe_numbers = phone_number_uuid_table.uuid_to_data_batch(
        safe_uuids).values()
    safe_urns = {f"tel:+{number}" for number in safe_numbers}
    log.info(f"Re-identified {len(safe_urns)} uuids")

    log.info("Downloading the latest contacts fields from Rapid Pro")
    csv_by_individual_output_path = args.csv_by_individual_output_path
    production_csv_output_path = args.production_csv_output_path

    # Load the pipeline configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)

    log.info("Downloading Firestore Uuid Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))
    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")

    if pipeline_configuration.drive_upload is not None:
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, pipeline_configuration.
                drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)

    # Load messages
    messages_datasets = []
    for i, activation_flow_name in enumerate(
            pipeline_configuration.activation_flow_names):
        raw_activation_path = f"{raw_data_dir}/{activation_flow_name}.json"
    baidoa_output_path = args.baidoa_output_path

    # Read the settings from the configuration file
    log.info("Loading Pipeline Configuration File...")
    with open(pipeline_configuration_file_path) as f:
        pipeline_configuration = PipelineConfiguration.from_configuration_file(
            f)

    log.info("Downloading Firestore UUID Table credentials...")
    firestore_uuid_table_credentials = json.loads(
        google_cloud_utils.download_blob_to_string(
            google_cloud_credentials_file_path, pipeline_configuration.
            phone_number_uuid_table.firebase_credentials_file_url))

    phone_number_uuid_table = FirestoreUuidTable(
        pipeline_configuration.phone_number_uuid_table.table_name,
        firestore_uuid_table_credentials, "avf-phone-uuid-")
    log.info("Initialised the Firestore UUID table")

    log.info(f"Loading UNDP-RCO traced data from file '{traced_data_path}'...")
    with open(traced_data_path, "r") as f:
        data = TracedDataJsonIO.import_jsonl_to_traced_data_iterable(f)
    log.info(f"Loaded {len(data)} traced data objects")

    # Search the TracedData for the bossaso/baidoa contacts
    bossaso_uuids = set()
    baidoa_uuids = set()
    log.info("Searching for participants from Bossaso or Baidoa")
    for td in data:
        if td["district_coded"] == "STOP":
            continue