def handle_state_dashboard_user_restrictions_file( data: Dict[str, Any], _: ContextType) -> Tuple[str, HTTPStatus]: """This function is triggered when a file is dropped in a `recidiviz-{project_id}-dashboard-user-restrictions/US_XX` bucket. If the file matches `dashboard_user_restrictions.csv`, then it makes a request to import the CSV to the Cloud SQL `dashboard_user_restrictions` table in the Case Triage schema. Once the CSV import finishes, it makes a request to update the Auth0 users with the user restrictions. data: A cloud storage object that holds name information and other metadata related to the file that was dropped into the bucket. _: (google.cloud.functions.Context): Metadata of triggering event. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: logging.error( "No project id set for call to update auth0 users, returning.") return "", HTTPStatus.BAD_REQUEST filepath = data["name"].split("/") # Expected file path structure is US_XX/dashboard_user_restrictions.csv if len(filepath) != 2: logging.info( "Skipping filepath, incorrect number of nested directories: %s", filepath) return "", HTTPStatus.OK region_code, filename = filepath csv_file = "dashboard_user_restrictions.csv" if filename == csv_file: import_user_restrictions_url = ( _APP_ENGINE_IMPORT_USER_RESTRICTIONS_CSV_TO_SQL_URL.format( project_id, region_code, )) logging.info("Calling URL: %s", import_user_restrictions_url) # Hit the App Engine endpoint `auth/import_user_restrictions_csv_to_sql`. response = make_iap_request(import_user_restrictions_url, IAP_CLIENT_ID[project_id]) logging.info( "The %s response status is %s", import_user_restrictions_url, response.status_code, ) if response.status_code == HTTPStatus.OK: update_users_url = _APP_ENGINE_UPDATE_AUTH0_USER_METADATA_URL.format( project_id, region_code) # Hit the App Engine endpoint `auth/update_auth0_user_metadata`. response = make_iap_request(update_users_url, IAP_CLIENT_ID[project_id]) logging.info("The %s response status is %s", update_users_url, response.status_code) return "", HTTPStatus.OK
def handle_deliver_emails_for_batch_email_reporting(request: Request) -> None: """Cloud function to deliver a batch of generated emails. It hits the App Engine endpoint `reporting/deliver_emails_for_batch`. It requires a JSON input containing the following keys: batch_id: (required) Identifier for this batch redirect_address: (optional) An email address to which all emails should be sent instead of to their actual recipients. Args: request: HTTP request payload containing JSON with keys as described above Returns: Nothing. Raises: Nothing. All exception raising is handled within the App Engine logic. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: logging.error("No project id set, returning") return request_params = request.get_json() if not request_params: logging.error("No request params, returning") return batch_id = request_params.get("batch_id", '') redirect_address = request_params.get("redirect_address", '') url = _APP_ENGINE_PO_MONTHLY_REPORT_DELIVER_EMAILS_URL.format(project_id, batch_id, redirect_address) logging.info("Calling URL: %s", url) response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code)
def normalize_raw_file_path(data: Dict[str, Any], _: ContextType) -> Tuple[str, HTTPStatus]: """Cloud functions can be configured to trigger this function on any bucket that is being used as a test bed for automatic uploads. This will just rename the incoming files to have a normalized path with a timestamp so subsequent uploads do not have naming conflicts.""" project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: error_str = ( "No project id set for call to direct ingest cloud function, returning." ) logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST bucket = data["bucket"] relative_file_path = data["name"] url = _DIRECT_INGEST_NORMALIZE_RAW_PATH_URL.format(project_id, bucket, relative_file_path) logging.info("Calling URL: %s", url) # Hit the cloud function backend, which will schedule jobs to parse # data for unprocessed files in this bucket and persist to our database. response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code) return "", HTTPStatus(response.status_code)
def parse_state_aggregate(data: Dict[str, Any], _: ContextType) -> Tuple[str, HTTPStatus]: """This function is triggered when a file is dropped into the state aggregate bucket and makes a request to parse and write the data to the aggregate table database. data: A cloud storage object that holds name information and other metadata related to the file that was dropped into the bucket. _: (google.cloud.functions.Context): Metadata of triggering event. """ bucket = data["bucket"] state, filename = data["name"].split("/") project_id = os.environ.get(GCP_PROJECT_ID_KEY) logging.info( "Running cloud function for bucket %s, state %s, filename %s", bucket, state, filename, ) url = _STATE_AGGREGATE_CLOUD_FUNCTION_URL.format(project_id, bucket, state, filename) # Hit the cloud function backend, which persists the table data to our # database. response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code) return "", HTTPStatus(response.status_code)
def export_metric_view_data(event: Dict[str, Any], _context: ContextType) -> Tuple[str, HTTPStatus]: """This function is triggered by a Pub/Sub event to begin the export of data contained in BigQuery metric views to files in cloud storage buckets. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: error_str = "No project id set for call to export view data, returning." logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST if "data" in event: logging.info("data found") url = (_METRIC_VIEW_EXPORT_CLOUD_FUNCTION_URL.format(project_id) + "?export_job_filter=" + b64decode(event["data"]).decode("utf-8")) else: error_str = "Missing required export_job_filter in data of the Pub/Sub message." logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST logging.info("project_id: %s", project_id) logging.info("Calling URL: %s", url) # Hit the cloud function backend, which exports view data to their assigned cloud storage bucket response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code) return "", HTTPStatus(response.status_code)
def trigger_daily_calculation_pipeline_dag( data: Dict[str, Any], _context: ContextType) -> Tuple[str, HTTPStatus]: """This function is triggered by a Pub/Sub event, triggers an Airflow DAG where all the daily calculation pipelines run simultaneously. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY, "") if not project_id: error_str = ( "No project id set for call to run the calculation pipelines, returning." ) logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST iap_client_id = os.environ.get("IAP_CLIENT_ID") if not iap_client_id: error_str = "The environment variable 'IAP_CLIENT_ID' is not set." logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST airflow_uri = os.environ.get("AIRFLOW_URI") if not airflow_uri: error_str = "The environment variable 'AIRFLOW_URI' is not set" logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST # The name of the DAG you wish to trigger dag_name = "{}_calculation_pipeline_dag".format(project_id) webserver_url = "{}/api/experimental/dags/{}/dag_runs".format( airflow_uri, dag_name) monitor_response = make_iap_request(webserver_url, iap_client_id, method="POST", json={"conf": data}) logging.info("The monitoring Airflow response is %s", monitor_response) return "", HTTPStatus(monitor_response.status_code)
def _handle_state_direct_ingest_file(data, start_ingest: bool) -> None: """Calls direct ingest cloud function when a new file is dropped into a bucket.""" project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: logging.error('No project id set for call to direct ingest cloud ' 'function, returning.') return bucket = data['bucket'] relative_file_path = data['name'] region_code = get_state_region_code_from_direct_ingest_bucket(bucket) if not region_code: logging.error('Cannot parse region code from bucket %s, returning.', bucket) return url = _DIRECT_INGEST_CLOUD_FUNCTION_URL.format( project_id, region_code, bucket, relative_file_path, str(start_ingest)) logging.info("Calling URL: %s", url) # Hit the cloud function backend, which will schedule jobs to parse # data for unprocessed files in this bucket and persist to our database. response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code)
def _handle_state_direct_ingest_file( data: Dict[str, Any], start_ingest: bool) -> Tuple[str, HTTPStatus]: """Calls direct ingest cloud function when a new file is dropped into a bucket.""" project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: error_str = ( "No project id set for call to direct ingest cloud function, returning." ) logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST bucket = data["bucket"] relative_file_path = data["name"] region_code = get_region_code_from_direct_ingest_bucket(bucket) if not region_code: error_str = f"Cannot parse region code from bucket {bucket}, returning." logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST url = _DIRECT_INGEST_CLOUD_FUNCTION_URL.format(project_id, region_code, bucket, relative_file_path, str(start_ingest)) logging.info("Calling URL: %s", url) # Hit the cloud function backend, which will schedule jobs to parse # data for unprocessed files in this bucket and persist to our database. response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code) return "", HTTPStatus(response.status_code)
def make_iap_export_request(url: str) -> Dict[str, Any]: client_id = IAP_CLIENT_ID[os.environ.get("GCP_PROJECT_ID")] # make_iap_request raises an exception if the returned status code is not 200 response = make_iap_request(url, client_id) # When operators return a value in airflow, the result is put into xcom for other operators to access it. # However, the result must be a built in Python data type otherwise the operator will not return successfully. return {"status_code": response.status_code, "text": response.text}
def start_and_monitor_calculation_pipeline(_event, _context) -> None: """This function, which is triggered by a Pub/Sub event, can kick off any single Dataflow pipeline template. On successful triggering of the job, this function makes a call to the app to begin monitoring the progress of the job. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: logging.error( "No project id set for call to run a calculation" " pipeline, returning." ) return bucket = get_dataflow_template_bucket(project_id) template_name = os.environ.get("TEMPLATE_NAME") if not template_name: logging.error("No template_name set, returning.") return job_name = os.environ.get("JOB_NAME") if not job_name: logging.error("No job_name set, returning.") return on_dataflow_job_completion_topic = os.environ.get( "ON_DATAFLOW_JOB_COMPLETION_TOPIC" ) if not on_dataflow_job_completion_topic: logging.error("No on-completion topic set, returning.") return region = os.environ.get("REGION") if not region: logging.error("No region set, returning.") return response = trigger_dataflow_job_from_template( project_id, bucket, template_name, job_name, region ) logging.info("The response to triggering the Dataflow job is: %s", response) job_id = response["id"] location = response["location"] on_dataflow_job_completion_topic = on_dataflow_job_completion_topic.replace( ".", "-" ) # Monitor the successfully triggered Dataflow job url = _DATAFLOW_MONITOR_URL.format( project_id, job_id, location, on_dataflow_job_completion_topic ) monitor_response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The monitoring Dataflow response is %s", monitor_response)
def run_calculation_pipelines(_event, _context): """This function, which is triggered by a Pub/Sub event, kicks off a Dataflow job with the given job_name where the template for the job lives at gs://{bucket}/templates/{template_name} for the given project. On successful triggering of the job, this function makes a call to the app to begin monitoring the progress of the job. """ project_id = os.environ.get('GCP_PROJECT') if not project_id: logging.error('No project id set for call to run a calculation' ' pipeline, returning.') return bucket = get_dataflow_template_bucket(project_id) template_name = os.environ.get('TEMPLATE_NAME') if not template_name: logging.error('No template_name set, returning.') return job_name = os.environ.get('JOB_NAME') if not job_name: logging.error('No job_name set, returning.') return on_dataflow_job_completion_topic = os.environ.get( 'ON_DATAFLOW_JOB_COMPLETION_TOPIC') if not on_dataflow_job_completion_topic: logging.error('No on-completion topic set, returning.') return response = trigger_dataflow_job_from_template(project_id, bucket, template_name, job_name) logging.info("The response to triggering the Dataflow job is: %s", response) job_id = response['id'] location = response['location'] on_dataflow_job_completion_topic = on_dataflow_job_completion_topic.replace( '.', '-') # Monitor the successfully triggered Dataflow job url = _DATAFLOW_MONITOR_URL.format(project_id, job_id, location, on_dataflow_job_completion_topic) monitor_response = make_iap_request(url, _CLIENT_ID[project_id]) logging.info("The monitoring Dataflow response is %s", monitor_response)
def handle_start_new_batch_email_reporting(request: Request) -> None: """Start a new batch of email generation for the indicated state. This function is the entry point for generating a new batch. It hits the App Engine endpoint `/start_new_batch`. It requires a JSON input containing the following keys: state_code: (required) State code for the report (i.e. "US_ID") report_type: (required) The type of report (i.e. "po_monthly_report") test_address: (optional) A test address to generate emails for region_code: (optional) The sub-region of the state to generate emails for (i.e. "US_ID_D5") message_body: (optional) If included, overrides the default message body. Args: request: The HTTP request. Must contain JSON with "state_code" and "report_type" keys, and may contain an optional "test_address" key. Returns: Nothing. Raises: Nothing. All exception raising is handled within the App Engine logic. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: logging.error("No project id set, returning") return request_params = request.get_json() if not request_params: logging.error("No request params, returning") return query_params = build_query_param_string( request_params, [ "state_code", "report_type", "test_address", "region_code", "message_body", ], ) url = _APP_ENGINE_PO_MONTHLY_REPORT_GENERATE_EMAILS_URL.format( project_id, query_params ) logging.info("Calling URL: %s", url) # Hit the App Engine endpoint `reporting/start_new_batch`. response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code)
def export_view_data(_event, _context): """This function is triggered by a Pub/Sub event to begin the export of data contained in BigQuery views to files in cloud storage buckets. """ project_id = os.environ.get('GCP_PROJECT') if not project_id: logging.error( 'No project id set for call to export view data, returning.') return url = _VIEW_DATA_EXPORT_CLOUD_FUNCTION_URL.format(project_id) logging.info("project_id: %s", project_id) logging.info("Calling URL: %s", url) # Hit the cloud function backend, which exports view data to their assigned cloud storage bucket response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code)
def _call_dashboard_export(data_type: str): project_id = os.environ.get('GCP_PROJECT') if not project_id: logging.error('No project id set for call to export dashboard data, ' 'returning.') return bucket = get_dashboard_data_export_storage_bucket(project_id) url = _DASHBOARD_EXPORT_CLOUD_FUNCTION_URL.format(project_id, bucket, data_type) logging.info("project_id: %s", project_id) logging.info("Calling URL: %s", url) # Hit the cloud function backend, which exports the given data type to # the given cloud storage bucket response = make_iap_request(url, _CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code)
def trigger_calculation_pipeline_dag(data, _context) -> None: """This function is triggered by a Pub/Sub event, triggers an Airflow DAG where all the calculation pipelines run simultaneously. """ gcp_project_id = os.environ.get(GCP_PROJECT_ID_KEY, '') project_id = gcp_project_id + '-airflow' if not project_id: logging.error('No project id set for call to run the calculation pipelines, returning.') return webserver_id = os.environ.get('WEBSERVER_ID') if not webserver_id: logging.error("The environment variable 'WEBSERVER_ID' is not set") return # The name of the DAG you wish to trigger dag_name = '{}_calculation_pipeline_dag'.format(gcp_project_id) webserver_url = 'https://{}.appspot.com/api/experimental/dags/{}/dag_runs'.format(webserver_id, dag_name) monitor_response = make_iap_request(webserver_url, IAP_CLIENT_ID[project_id], method='POST', json={"conf": data}) logging.info("The monitoring Airflow response is %s", monitor_response)
def export_dashboard_data(_event, _context): """This function is triggered by a Pub/Sub event to begin the export of data needed for the dashboard. """ project_id = os.environ.get('GCP_PROJECT') if not project_id: logging.error('No project id set for call to export dashboard data, ' 'returning.') return bucket = get_dashboard_data_export_storage_bucket(project_id) url = _DASHBOARD_EXPORT_CLOUD_FUNCTION_URL.format(project_id, bucket) logging.info("project_id: %s", project_id) logging.info("Calling URL: %s", url) # Hit the cloud function backend, which exports the given data type to # the given cloud storage bucket response = make_iap_request(url, _CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code)
def handle_new_case_triage_etl(data: Dict[str, Any], _: ContextType) -> Tuple[str, HTTPStatus]: """This function is triggered when a file is dropped in the `{project_id}-case-triage-data` bucket. If the file matches `etl_*.csv`, then it makes a request to import the CSV to Cloud SQL. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: logging.error( "No project id set for call to update auth0 users, returning.") return "", HTTPStatus.BAD_REQUEST filename = data["name"] if not filename.startswith("etl_") or not filename.endswith(".csv"): logging.info("Ignoring file %s", filename) return "", HTTPStatus.OK import_url = _APP_ENGINE_IMPORT_CASE_TRIAGE_ETL_CSV_TO_SQL_URL.format( project_id, filename) import_response = make_iap_request(import_url, IAP_CLIENT_ID[project_id]) return "", HTTPStatus(import_response.status_code)
def export_metric_view_data(event, _context) -> None: """This function is triggered by a Pub/Sub event to begin the export of data contained in BigQuery metric views to files in cloud storage buckets. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: logging.error('No project id set for call to export view data, returning.') return if 'data' in event: logging.info("data found") url = _METRIC_VIEW_EXPORT_CLOUD_FUNCTION_URL.format(project_id) + '?export_job_filter=' + \ b64decode(event['data']).decode('utf-8') else: url = _METRIC_VIEW_EXPORT_CLOUD_FUNCTION_URL.format(project_id) logging.info("project_id: %s", project_id) logging.info("Calling URL: %s", url) # Hit the cloud function backend, which exports view data to their assigned cloud storage bucket response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The response status is %s", response.status_code)