def _execute_and_wait_for_job(job_data):
    job_id = JobsPersistence.create(job_data)

    # wait for job to finish:
    with beeline.tracer("waiting for workers"):
        period = 0.5  # check every X seconds
        n_checks = int(REQUEST_TIMEOUT / period)
        for _ in range(n_checks):
            job = JobsPersistence.get_by_id(job_id)
            if job["current_status"] in ["finished", "error"]:
                break
            time.sleep(0.5)

    JobsPersistence.delete(job_id)

    if job["current_status"] == "finished":
        results = json.loads(job["results"])
        if len(results) != 1:
            return flask.make_response(
                jsonify(
                    id=None,
                    code=400,
                    message=
                    "This endpoint can only succeed if process graph yields exactly one result, instead it received: {}."
                    .format(len(results)),
                    links=[]), 400)

        s3 = boto3.client(
            's3',
            endpoint_url=S3_LOCAL_URL,
            region_name="eu-central-1",
            aws_access_key_id=AWS_ACCESS_KEY_ID,
            aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        )

        result = results[0]
        filename = result["filename"]
        object_key = '{}/{}'.format(job_id, os.path.basename(filename))

        s3_object = s3.get_object(Bucket=RESULTS_S3_BUCKET_NAME,
                                  Key=object_key)
        content = s3_object['Body'].read()
        response = flask.make_response(content, 200)
        response.mimetype = result["type"]
        return response

    if job["current_status"] == "error":
        return flask.make_response(
            jsonify(id=None,
                    code=job["error_code"],
                    message=job["error_msg"],
                    links=[]), job["http_code"])

    return flask.make_response(
        jsonify(id=None,
                code="Timeout",
                message="Request timed out.",
                links=[]), 408)
def api_jobs():
    if flask.request.method == 'GET':
        jobs = []
        links = []

        for record in JobsPersistence.items():
            jobs.append({
                "id": record["id"],
                "title": record.get("title", None),
                "description": record.get("description", None),
            })
            links.append({
                "href":
                "{}/jobs/{}".format(flask.request.url_root, record.get("id")),
                "title":
                record.get("title", None),
            })
        return {
            "jobs": jobs,
            "links": links,
        }, 200

    elif flask.request.method == 'POST':
        data = flask.request.get_json()

        process_graph_schema = PostJobsSchema()
        errors = process_graph_schema.validate(data)

        if errors:
            # Response procedure for validation will depend on how openeo_pg_parser_python will work
            return flask.make_response('Invalid request: {}'.format(errors),
                                       400)

        data["current_status"] = "submitted"
        data["should_be_cancelled"] = False

        record_id = JobsPersistence.create(data)

        # add requested headers to 201 response:
        response = flask.make_response('', 201)
        response.headers['Location'] = '/jobs/{}'.format(record_id)
        response.headers['OpenEO-Identifier'] = record_id
        return response
def worker_proc(jobs_queue, worker_number):
    # worker shouldn't be concerned with signals - parent orchestrates
    # everything and will tell us if we need to quit:
    signal.signal(signal.SIGINT, signal.SIG_IGN)
    while True:
        job = jobs_queue.get(True, None)
        logger.info("Worker {} received a job [{}]: {}".format(
            worker_number, job["job_id"], job))
        if job == SIGNAL_QUIT_JOB:
            return

        # try to execute job's process graph:
        results = None
        error_msg = None
        error_code = None
        http_code = None
        try:
            results = _execute_process_graph(job["process_graph"],
                                             job["job_id"], job["variables"])
            logger.info("Worker {} successfully finished job [{}]".format(
                worker_number, job["job_id"]))
        except Exception as ex:
            logger.exception("Worker {}, job [{}] exec failed: {}".format(
                worker_number, job["job_id"], str(ex)))
            error_msg = ex.msg if hasattr(ex, "msg") else str(ex)
            error_code = ex.error_code if hasattr(ex,
                                                  "error_code") else "Internal"
            http_code = ex.http_code if hasattr(ex, "http_code") else 500
        finally:
            job_id = job["job_id"]
            logger.info("Worker {} writing results for job [{}]".format(
                worker_number, job_id))

            # write results:
            try:
                JobsPersistence.update_running_to_finished(
                    job_id, results, error_msg, error_code, http_code)
                logger.info("Job {} finished.".format(job_id))
            except:
                logger.exception(
                    "Unknown error saving results, job will hang indefinitely! {}"
                    .format(job_id))
示例#4
0
def main():
    # create workers:
    jobs_queue = multiprocessing.Queue()
    processes = []
    for i in range(10):
        p = multiprocessing.Process(target=worker_proc, args=(
            jobs_queue,
            i,
        ))
        p.daemon = True
        p.start()
        processes.append(p)

    # listen for changes on DynamoDB jobs table and dispatch any new jobs to
    # the workers:
    running_jobs = set()
    try:
        while True:
            _feed_monitoring_system()

            # Because we couldn't find a way to get notifications about DynamoDB changes (via Streams)
            # without polling, we use SQS to be notified when new jobs surface. We still query DynamoDB
            # to get them though, even though we receive the job_ids:
            logger.info("Sleeping / waiting for wakeup:")
            wakeup = JobsPersistence.wait_for_wakeup(timeout=20)
            if not wakeup:
                logger.info("Continue sleeping...")
                continue

            logger.info("Woke up!")
            # GET queued AND should_be_cancelled = False
            new_queued = JobsPersistence.query_new_queued()
            for page in new_queued:
                for job in page["Items"]:
                    job_id = job["id"]['S']
                    logger.info("Found a job: {}".format(job_id))

                    success = JobsPersistence.update_queued_to_running(job_id)
                    if not success:
                        # someone was faster than us - we were not able to mark it as running,
                        # so we shouldn't execute it:
                        logger.info(
                            "Found a job, but could not update its status to running... ignoring it."
                        )
                        continue

                    running_jobs.add(job_id)
                    jobs_queue.put({
                        'job_id':
                        job_id,
                        'process_graph':
                        json.loads(job["process_graph"]['S']),
                        'variables':
                        json.loads(job["variables"]['S'])
                        if "variables" in job else {},
                    })

            # GET queued AND should_be_cancelled = True
            cancelled_queued = JobsPersistence.query_cancelled_queued()
            for page in cancelled_queued:
                for job in page["Items"]:
                    # Set them back to submitted:
                    job_id = job["id"]['S']
                    JobsPersistence.update_cancelled_queued_to_submitted(
                        job_id)

            # GET running AND should_be_cancelled = True
            cancelled_running = JobsPersistence.query_cancelled_running()
            for page in cancelled_running:
                for job in page["Items"]:
                    job_id = job["id"]['S']
                    if job_id not in running_jobs:
                        continue

                    JobsPersistence.update_cancelled_running_to_canceled(
                        job_id)
                    # we don't actually kill the process (though that would be nice), we just mark that the
                    # job is no longer running, so the results will not be used:
                    running_jobs.remove(job_id)

    except KeyboardInterrupt:
        logger.info("SIGINT received, exiting.")

    # clean up and quit:
    for i in range(len(processes)):
        jobs_queue.put(SIGNAL_QUIT_JOB)
    for p in processes:
        p.join()
def add_job_to_queue(job_id):
    if flask.request.method == "POST":
        job = JobsPersistence.get_by_id(job_id)

        if job["current_status"] in [
                "submitted", "finished", "canceled", "error"
        ]:
            JobsPersistence.update_status(job_id, "queued")
            return flask.make_response(
                'The creation of the resource has been queued successfully.',
                202)
        else:
            return flask.make_response(
                jsonify(
                    id=job_id,
                    code="JobLocked",
                    message=
                    'Job is locked due to a queued or running batch computation.',
                    links=[]), 400)

    elif flask.request.method == "GET":
        job = JobsPersistence.get_by_id(job_id)

        if job["current_status"] not in ["finished", "error"]:
            return flask.make_response(
                jsonify(
                    id=job_id,
                    code='JobNotFinished',
                    message=
                    'Job has not finished computing the results yet. Please try again later.',
                    links=[]), 400)

        if job["current_status"] == "error":
            return flask.make_response(
                jsonify(id=job_id,
                        code=job["error_code"],
                        message=job["error_msg"],
                        links=[]), 424)

        s3 = boto3.client(
            's3',
            endpoint_url=S3_LOCAL_URL,
            region_name="eu-central-1",
            aws_access_key_id=AWS_ACCESS_KEY_ID,
            aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        )
        links = []
        results = json.loads(job["results"])
        for result in results:
            # create signed url:
            filename = result["filename"]
            object_key = '{}/{}'.format(job_id, os.path.basename(filename))
            url = s3.generate_presigned_url(ClientMethod='get_object',
                                            Params={
                                                'Bucket':
                                                RESULTS_S3_BUCKET_NAME,
                                                'Key': object_key,
                                            })
            mime_type = result["type"]
            links.append({
                'href': url,
                'type': mime_type,
            })

        return flask.make_response(
            jsonify(
                id=job_id,
                title=job.get("title", None),
                description=job.get("description", None),
                updated=job[
                    "last_updated"],  # "updated" is a reserved word in DynamoDB
                links=links,
            ),
            200)

    elif flask.request.method == "DELETE":
        job = JobsPersistence.get_by_id(job_id)

        if job["current_status"] in ["queued", "running"]:
            JobsPersistence.set_should_be_cancelled(job_id)
            return flask.make_response(
                'Processing the job has been successfully canceled.', 200)

        return flask.make_response(
            jsonify(id=job_id,
                    code="JobNotStarted",
                    message="Job hasn't been started yet.",
                    links=[]), 400)
def api_batch_job(job_id):
    job = JobsPersistence.get_by_id(job_id)
    if job is None:
        return flask.make_response(
            jsonify(id=job_id,
                    code="JobNotFound",
                    message="The job does not exist.",
                    links=[]), 404)

    if flask.request.method == 'GET':
        status = job["current_status"]
        return flask.make_response(
            jsonify(
                id=job_id,
                title=job.get("title", None),
                description=job.get("description", None),
                process_graph=json.loads(job["process_graph"]),
                status=status,  # "status" is reserved word in DynamoDB
                error=job["error_msg"] if status == "error" else None,
                submitted=job["submitted"],
                updated=job["last_updated"],
            ),
            200)

    elif flask.request.method == 'PATCH':
        if job["current_status"] in ["queued", "running"]:
            return flask.make_response(
                jsonify(
                    id=job_id,
                    code="JobLocked",
                    message=
                    'Job is locked due to a queued or running batch computation.',
                    links=[]), 400)

        data = flask.request.get_json()
        errors = PatchJobsSchema().validate(data)
        if errors:
            # Response procedure for validation will depend on how openeo_pg_parser_python will work
            return flask.make_response(
                jsonify(id=job_id, code=400, message=errors, links=[]), 400)

        for key in data:
            JobsPersistence.update_key(job_id, key, data[key])
        JobsPersistence.update_status(job_id, "submitted")

        return flask.make_response('Changes to the job applied successfully.',
                                   204)

    elif flask.request.method == 'DELETE':
        JobsPersistence.set_should_be_cancelled(job_id)

        # wait for job to get status 'cancelled':
        period = 0.5  # check every X seconds
        n_checks = int(REQUEST_TIMEOUT / period)
        for _ in range(n_checks):
            job = JobsPersistence.get_by_id(job_id)
            if job["current_status"] in ["canceled"]:
                break
            time.sleep(0.5)

        JobsPersistence.delete(job_id)
        return flask.make_response('The job has been successfully deleted.',
                                   204)
示例#7
0
def teardown_function(function):
    ProcessGraphsPersistence.clear_table()
    JobsPersistence.clear_table()
    ServicesPersistence.clear_table()
示例#8
0
def setup_function(function):
    ProcessGraphsPersistence.ensure_table_exists()
    JobsPersistence.ensure_table_exists()
    JobsPersistence.ensure_queue_exists()
    ServicesPersistence.ensure_table_exists()