Exemplo n.º 1
0
        log.info(f"Downloading Google Drive service account credentials...")
        credentials_info = json.loads(
            google_cloud_utils.download_blob_to_string(
                google_cloud_credentials_file_path, pipeline_configuration.
                drive_upload.drive_credentials_file_url))
        drive_client_wrapper.init_client_from_info(credentials_info)

        log.info("Uploading CSVs to Google Drive...")

        production_csv_drive_dir = os.path.dirname(
            pipeline_configuration.drive_upload.production_upload_path)
        production_csv_drive_file_name = os.path.basename(
            pipeline_configuration.drive_upload.production_upload_path)
        drive_client_wrapper.update_or_create(
            production_csv_input_path,
            production_csv_drive_dir,
            target_file_name=production_csv_drive_file_name,
            target_folder_is_shared_with_me=True)

        messages_csv_drive_dir = os.path.dirname(
            pipeline_configuration.drive_upload.messages_upload_path)
        messages_csv_drive_file_name = os.path.basename(
            pipeline_configuration.drive_upload.messages_upload_path)
        drive_client_wrapper.update_or_create(
            messages_csv_input_path,
            messages_csv_drive_dir,
            target_file_name=messages_csv_drive_file_name,
            target_folder_is_shared_with_me=True)

        individuals_csv_drive_dir = os.path.dirname(
            pipeline_configuration.drive_upload.individuals_upload_path)
Exemplo n.º 2
0
                                                             pretty_print=True)

    # Upload to Google Drive, if requested.
    # Note: This should happen as late as possible in order to reduce the risk of the remainder of the pipeline failing
    # after a Drive upload has occurred. Failures could result in inconsistent outputs or outputs with no
    # traced data log.
    if pipeline_configuration.drive_upload is not None:
        log.info("Uploading CSVs to Google Drive...")

        production_csv_drive_dir = os.path.dirname(
            pipeline_configuration.drive_upload.production_upload_path)
        production_csv_drive_file_name = os.path.basename(
            pipeline_configuration.drive_upload.production_upload_path)
        drive_client_wrapper.update_or_create(
            production_csv_output_path,
            production_csv_drive_dir,
            target_file_name=production_csv_drive_file_name,
            target_folder_is_shared_with_me=True)

        messages_csv_drive_dir = os.path.dirname(
            pipeline_configuration.drive_upload.messages_upload_path)
        messages_csv_drive_file_name = os.path.basename(
            pipeline_configuration.drive_upload.messages_upload_path)
        drive_client_wrapper.update_or_create(
            csv_by_message_output_path,
            messages_csv_drive_dir,
            target_file_name=messages_csv_drive_file_name,
            target_folder_is_shared_with_me=True)

        individuals_csv_drive_dir = os.path.dirname(
            pipeline_configuration.drive_upload.individuals_upload_path)
Exemplo n.º 3
0
                     template="plotly_white")
        fig.update_layout(
            title_text=f"{plan.raw_field} by gender (normalised)")
        fig.update_xaxes(tickangle=-60)
        fig.write_image(
            f"{output_dir}/graphs/{plan.raw_field}_by_gender_normalised.png",
            scale=IMG_SCALE_FACTOR)

    if pipeline_configuration.drive_upload is not None:
        log.info("Uploading CSVs to Drive...")
        paths_to_upload = glob(f"{output_dir}/*.csv")
        for i, path in enumerate(paths_to_upload):
            log.info(
                f"Uploading CSV {i + 1}/{len(paths_to_upload)}: {path}...")
            drive_client_wrapper.update_or_create(
                path,
                pipeline_configuration.drive_upload.analysis_graphs_dir,
                target_folder_is_shared_with_me=True)

        log.info("Uploading graphs to Drive...")
        paths_to_upload = glob(f"{output_dir}/graphs/*.png")
        for i, path in enumerate(paths_to_upload):
            log.info(
                f"Uploading graph {i + 1}/{len(paths_to_upload)}: {path}...")
            drive_client_wrapper.update_or_create(
                path,
                f"{pipeline_configuration.drive_upload.analysis_graphs_dir}/graphs",
                target_folder_is_shared_with_me=True)

        log.info("Uploading region maps to Drive...")
        paths_to_upload = glob(f"{output_dir}/maps/regions/*.png")
        for i, path in enumerate(paths_to_upload):
Exemplo n.º 4
0
                for ind in individuals:
                    label_counts[ind[cc.analysis_file_key]] += 1
            else:
                assert cc.coding_mode == CodingModes.MULTIPLE
                for ind in individuals:
                    for code in cc.code_scheme.codes:
                        if ind[f"{cc.analysis_file_key}{code.string_value}"] == Codes.MATRIX_1:
                            label_counts[code.string_value] += 1

            chart = altair.Chart(
                altair.Data(values=[{"label": k, "count": v} for k, v in label_counts.items()])
            ).mark_bar().encode(
                x=altair.X("label:N", title="Label", sort=list(label_counts.keys())),
                y=altair.Y("count:Q", title="Number of Individuals")
            ).properties(
                title=f"Season Distribution: {cc.analysis_file_key}"
            )
            chart.save(f"{output_dir}/season_distribution_{cc.analysis_file_key}.html")
            chart.save(f"{output_dir}/season_distribution_{cc.analysis_file_key}.png", scale_factor=IMG_SCALE_FACTOR)

    if pipeline_configuration.drive_upload is not None:
        log.info("Uploading graphs to Drive...")
        paths_to_upload = glob.glob(f"{output_dir}/*.png")
        for i, path in enumerate(paths_to_upload):
            log.info(f"Uploading graph {i + 1}/{len(paths_to_upload)}: {path}...")
            drive_client_wrapper.update_or_create(path, pipeline_configuration.drive_upload.analysis_graphs_dir,
                                                  target_folder_is_shared_with_me=True)
    else:
        log.info("Skipping uploading to Google Drive (because the pipeline configuration json does not contain the key "
                 "'DriveUploadPaths')")
Exemplo n.º 5
0
    print("Generating Analysis CSVs...")
    data = AnalysisFile.generate(user, data, csv_by_message_output_path,
                                 csv_by_individual_output_path)

    print("Writing TracedData to file....")
    IOUtils.ensure_dirs_exist_for_file(json_output_path)
    with open(json_output_path, "w") as f:
        TracedDataJsonIO.export_traced_data_iterable_to_json(data,
                                                             f,
                                                             pretty_print=True)

    if drive_upload:
        print("Uploading CSVs to Google Drive...")
        drive_client_wrapper.init_client(drive_credentials_path)

        production_csv_drive_dir = os.path.dirname(production_csv_drive_path)
        production_csv_drive_file_name = os.path.basename(
            production_csv_drive_path)
        drive_client_wrapper.update_or_create(
            production_csv_output_path,
            production_csv_drive_dir,
            target_file_name=production_csv_drive_file_name,
            target_folder_is_shared=True)
        print("Files successfully uploaded")

    else:
        print("Not uploading to Google Drive")

    print("Python script complete")