def start_and_monitor_calculation_pipeline(_event, _context) -> None: """This function, which is triggered by a Pub/Sub event, can kick off any single Dataflow pipeline template. On successful triggering of the job, this function makes a call to the app to begin monitoring the progress of the job. """ project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: logging.error( "No project id set for call to run a calculation" " pipeline, returning." ) return bucket = get_dataflow_template_bucket(project_id) template_name = os.environ.get("TEMPLATE_NAME") if not template_name: logging.error("No template_name set, returning.") return job_name = os.environ.get("JOB_NAME") if not job_name: logging.error("No job_name set, returning.") return on_dataflow_job_completion_topic = os.environ.get( "ON_DATAFLOW_JOB_COMPLETION_TOPIC" ) if not on_dataflow_job_completion_topic: logging.error("No on-completion topic set, returning.") return region = os.environ.get("REGION") if not region: logging.error("No region set, returning.") return response = trigger_dataflow_job_from_template( project_id, bucket, template_name, job_name, region ) logging.info("The response to triggering the Dataflow job is: %s", response) job_id = response["id"] location = response["location"] on_dataflow_job_completion_topic = on_dataflow_job_completion_topic.replace( ".", "-" ) # Monitor the successfully triggered Dataflow job url = _DATAFLOW_MONITOR_URL.format( project_id, job_id, location, on_dataflow_job_completion_topic ) monitor_response = make_iap_request(url, IAP_CLIENT_ID[project_id]) logging.info("The monitoring Dataflow response is %s", monitor_response)
def run_calculation_pipelines(_event, _context): """This function, which is triggered by a Pub/Sub event, kicks off a Dataflow job with the given job_name where the template for the job lives at gs://{bucket}/templates/{template_name} for the given project. On successful triggering of the job, this function makes a call to the app to begin monitoring the progress of the job. """ project_id = os.environ.get('GCP_PROJECT') if not project_id: logging.error('No project id set for call to run a calculation' ' pipeline, returning.') return bucket = get_dataflow_template_bucket(project_id) template_name = os.environ.get('TEMPLATE_NAME') if not template_name: logging.error('No template_name set, returning.') return job_name = os.environ.get('JOB_NAME') if not job_name: logging.error('No job_name set, returning.') return on_dataflow_job_completion_topic = os.environ.get( 'ON_DATAFLOW_JOB_COMPLETION_TOPIC') if not on_dataflow_job_completion_topic: logging.error('No on-completion topic set, returning.') return response = trigger_dataflow_job_from_template(project_id, bucket, template_name, job_name) logging.info("The response to triggering the Dataflow job is: %s", response) job_id = response['id'] location = response['location'] on_dataflow_job_completion_topic = on_dataflow_job_completion_topic.replace( '.', '-') # Monitor the successfully triggered Dataflow job url = _DATAFLOW_MONITOR_URL.format(project_id, job_id, location, on_dataflow_job_completion_topic) monitor_response = make_iap_request(url, _CLIENT_ID[project_id]) logging.info("The monitoring Dataflow response is %s", monitor_response)
def start_calculation_pipeline( _event: Dict[str, Any], _context: ContextType) -> Tuple[str, HTTPStatus]: """This function, which is triggered by a Pub/Sub event, can kick off any single Dataflow pipeline template.""" project_id = os.environ.get(GCP_PROJECT_ID_KEY) if not project_id: error_str = ( "No project_id set for call to run a calculation pipeline, returning." ) logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST bucket = get_dataflow_template_bucket(project_id) template_name = os.environ.get("TEMPLATE_NAME") if not template_name: error_str = "No template_name set, returning." logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST job_name = os.environ.get("JOB_NAME") if not job_name: error_str = "No job_name set, returning." logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST region = os.environ.get("REGION") if not region: error_str = "No region set, returning." logging.error(error_str) return error_str, HTTPStatus.BAD_REQUEST response = trigger_dataflow_job_from_template(project_id, bucket, template_name, job_name, region) logging.info("The response to triggering the Dataflow job is: %s", response) return str(response), HTTPStatus.OK