示例#1
0
def start_and_monitor_calculation_pipeline(_event, _context) -> None:
    """This function, which is triggered by a Pub/Sub event, can kick off any single Dataflow pipeline template.

    On successful triggering of the job, this function makes a call to the app
    to begin monitoring the progress of the job.
    """
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        logging.error(
            "No project id set for call to run a calculation" " pipeline, returning."
        )
        return

    bucket = get_dataflow_template_bucket(project_id)

    template_name = os.environ.get("TEMPLATE_NAME")
    if not template_name:
        logging.error("No template_name set, returning.")
        return

    job_name = os.environ.get("JOB_NAME")
    if not job_name:
        logging.error("No job_name set, returning.")
        return

    on_dataflow_job_completion_topic = os.environ.get(
        "ON_DATAFLOW_JOB_COMPLETION_TOPIC"
    )
    if not on_dataflow_job_completion_topic:
        logging.error("No on-completion topic set, returning.")
        return

    region = os.environ.get("REGION")
    if not region:
        logging.error("No region set, returning.")
        return

    response = trigger_dataflow_job_from_template(
        project_id, bucket, template_name, job_name, region
    )

    logging.info("The response to triggering the Dataflow job is: %s", response)

    job_id = response["id"]
    location = response["location"]
    on_dataflow_job_completion_topic = on_dataflow_job_completion_topic.replace(
        ".", "-"
    )

    # Monitor the successfully triggered Dataflow job
    url = _DATAFLOW_MONITOR_URL.format(
        project_id, job_id, location, on_dataflow_job_completion_topic
    )

    monitor_response = make_iap_request(url, IAP_CLIENT_ID[project_id])
    logging.info("The monitoring Dataflow response is %s", monitor_response)
示例#2
0
def run_calculation_pipelines(_event, _context):
    """This function, which is triggered by a Pub/Sub event, kicks off a
    Dataflow job with the given job_name where the template for the job lives at
    gs://{bucket}/templates/{template_name} for the given project.

    On successful triggering of the job, this function makes a call to the app
    to begin monitoring the progress of the job.
    """
    project_id = os.environ.get('GCP_PROJECT')
    if not project_id:
        logging.error('No project id set for call to run a calculation'
                      ' pipeline, returning.')
        return

    bucket = get_dataflow_template_bucket(project_id)

    template_name = os.environ.get('TEMPLATE_NAME')
    if not template_name:
        logging.error('No template_name set, returning.')
        return

    job_name = os.environ.get('JOB_NAME')
    if not job_name:
        logging.error('No job_name set, returning.')
        return

    on_dataflow_job_completion_topic = os.environ.get(
        'ON_DATAFLOW_JOB_COMPLETION_TOPIC')
    if not on_dataflow_job_completion_topic:
        logging.error('No on-completion topic set, returning.')
        return

    response = trigger_dataflow_job_from_template(project_id, bucket,
                                                  template_name, job_name)

    logging.info("The response to triggering the Dataflow job is: %s",
                 response)

    job_id = response['id']
    location = response['location']
    on_dataflow_job_completion_topic = on_dataflow_job_completion_topic.replace(
        '.', '-')

    # Monitor the successfully triggered Dataflow job
    url = _DATAFLOW_MONITOR_URL.format(project_id, job_id, location,
                                       on_dataflow_job_completion_topic)

    monitor_response = make_iap_request(url, _CLIENT_ID[project_id])
    logging.info("The monitoring Dataflow response is %s", monitor_response)
示例#3
0
def start_calculation_pipeline(
        _event: Dict[str,
                     Any], _context: ContextType) -> Tuple[str, HTTPStatus]:
    """This function, which is triggered by a Pub/Sub event, can kick off any single Dataflow pipeline template."""
    project_id = os.environ.get(GCP_PROJECT_ID_KEY)
    if not project_id:
        error_str = (
            "No project_id set for call to run a calculation pipeline, returning."
        )
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    bucket = get_dataflow_template_bucket(project_id)

    template_name = os.environ.get("TEMPLATE_NAME")
    if not template_name:
        error_str = "No template_name set, returning."
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    job_name = os.environ.get("JOB_NAME")
    if not job_name:
        error_str = "No job_name set, returning."
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    region = os.environ.get("REGION")
    if not region:
        error_str = "No region set, returning."
        logging.error(error_str)
        return error_str, HTTPStatus.BAD_REQUEST

    response = trigger_dataflow_job_from_template(project_id, bucket,
                                                  template_name, job_name,
                                                  region)

    logging.info("The response to triggering the Dataflow job is: %s",
                 response)
    return str(response), HTTPStatus.OK