log.info(f"Downloading Google Drive service account credentials...") credentials_info = json.loads( google_cloud_utils.download_blob_to_string( google_cloud_credentials_file_path, pipeline_configuration. drive_upload.drive_credentials_file_url)) drive_client_wrapper.init_client_from_info(credentials_info) log.info("Uploading CSVs to Google Drive...") production_csv_drive_dir = os.path.dirname( pipeline_configuration.drive_upload.production_upload_path) production_csv_drive_file_name = os.path.basename( pipeline_configuration.drive_upload.production_upload_path) drive_client_wrapper.update_or_create( production_csv_input_path, production_csv_drive_dir, target_file_name=production_csv_drive_file_name, target_folder_is_shared_with_me=True) messages_csv_drive_dir = os.path.dirname( pipeline_configuration.drive_upload.messages_upload_path) messages_csv_drive_file_name = os.path.basename( pipeline_configuration.drive_upload.messages_upload_path) drive_client_wrapper.update_or_create( messages_csv_input_path, messages_csv_drive_dir, target_file_name=messages_csv_drive_file_name, target_folder_is_shared_with_me=True) individuals_csv_drive_dir = os.path.dirname( pipeline_configuration.drive_upload.individuals_upload_path)
pretty_print=True) # Upload to Google Drive, if requested. # Note: This should happen as late as possible in order to reduce the risk of the remainder of the pipeline failing # after a Drive upload has occurred. Failures could result in inconsistent outputs or outputs with no # traced data log. if pipeline_configuration.drive_upload is not None: log.info("Uploading CSVs to Google Drive...") production_csv_drive_dir = os.path.dirname( pipeline_configuration.drive_upload.production_upload_path) production_csv_drive_file_name = os.path.basename( pipeline_configuration.drive_upload.production_upload_path) drive_client_wrapper.update_or_create( production_csv_output_path, production_csv_drive_dir, target_file_name=production_csv_drive_file_name, target_folder_is_shared_with_me=True) messages_csv_drive_dir = os.path.dirname( pipeline_configuration.drive_upload.messages_upload_path) messages_csv_drive_file_name = os.path.basename( pipeline_configuration.drive_upload.messages_upload_path) drive_client_wrapper.update_or_create( csv_by_message_output_path, messages_csv_drive_dir, target_file_name=messages_csv_drive_file_name, target_folder_is_shared_with_me=True) individuals_csv_drive_dir = os.path.dirname( pipeline_configuration.drive_upload.individuals_upload_path)
template="plotly_white") fig.update_layout( title_text=f"{plan.raw_field} by gender (normalised)") fig.update_xaxes(tickangle=-60) fig.write_image( f"{output_dir}/graphs/{plan.raw_field}_by_gender_normalised.png", scale=IMG_SCALE_FACTOR) if pipeline_configuration.drive_upload is not None: log.info("Uploading CSVs to Drive...") paths_to_upload = glob(f"{output_dir}/*.csv") for i, path in enumerate(paths_to_upload): log.info( f"Uploading CSV {i + 1}/{len(paths_to_upload)}: {path}...") drive_client_wrapper.update_or_create( path, pipeline_configuration.drive_upload.analysis_graphs_dir, target_folder_is_shared_with_me=True) log.info("Uploading graphs to Drive...") paths_to_upload = glob(f"{output_dir}/graphs/*.png") for i, path in enumerate(paths_to_upload): log.info( f"Uploading graph {i + 1}/{len(paths_to_upload)}: {path}...") drive_client_wrapper.update_or_create( path, f"{pipeline_configuration.drive_upload.analysis_graphs_dir}/graphs", target_folder_is_shared_with_me=True) log.info("Uploading region maps to Drive...") paths_to_upload = glob(f"{output_dir}/maps/regions/*.png") for i, path in enumerate(paths_to_upload):
for ind in individuals: label_counts[ind[cc.analysis_file_key]] += 1 else: assert cc.coding_mode == CodingModes.MULTIPLE for ind in individuals: for code in cc.code_scheme.codes: if ind[f"{cc.analysis_file_key}{code.string_value}"] == Codes.MATRIX_1: label_counts[code.string_value] += 1 chart = altair.Chart( altair.Data(values=[{"label": k, "count": v} for k, v in label_counts.items()]) ).mark_bar().encode( x=altair.X("label:N", title="Label", sort=list(label_counts.keys())), y=altair.Y("count:Q", title="Number of Individuals") ).properties( title=f"Season Distribution: {cc.analysis_file_key}" ) chart.save(f"{output_dir}/season_distribution_{cc.analysis_file_key}.html") chart.save(f"{output_dir}/season_distribution_{cc.analysis_file_key}.png", scale_factor=IMG_SCALE_FACTOR) if pipeline_configuration.drive_upload is not None: log.info("Uploading graphs to Drive...") paths_to_upload = glob.glob(f"{output_dir}/*.png") for i, path in enumerate(paths_to_upload): log.info(f"Uploading graph {i + 1}/{len(paths_to_upload)}: {path}...") drive_client_wrapper.update_or_create(path, pipeline_configuration.drive_upload.analysis_graphs_dir, target_folder_is_shared_with_me=True) else: log.info("Skipping uploading to Google Drive (because the pipeline configuration json does not contain the key " "'DriveUploadPaths')")
print("Generating Analysis CSVs...") data = AnalysisFile.generate(user, data, csv_by_message_output_path, csv_by_individual_output_path) print("Writing TracedData to file....") IOUtils.ensure_dirs_exist_for_file(json_output_path) with open(json_output_path, "w") as f: TracedDataJsonIO.export_traced_data_iterable_to_json(data, f, pretty_print=True) if drive_upload: print("Uploading CSVs to Google Drive...") drive_client_wrapper.init_client(drive_credentials_path) production_csv_drive_dir = os.path.dirname(production_csv_drive_path) production_csv_drive_file_name = os.path.basename( production_csv_drive_path) drive_client_wrapper.update_or_create( production_csv_output_path, production_csv_drive_dir, target_file_name=production_csv_drive_file_name, target_folder_is_shared=True) print("Files successfully uploaded") else: print("Not uploading to Google Drive") print("Python script complete")