示例#1
0
def authorize_run_listing(project_id):
    logger.info("Looking up project")
    # Check the project resource exists
    abort_if_project_doesnt_exist(project_id)
    if request.headers is None or 'Authorization' not in request.headers:
        safe_fail_request(401, message="Authentication token required")
    auth_header = request.headers.get('Authorization')
    logger.info("Checking credentials to list project runs")
    # Check the caller has a valid results token (analyst token)
    abort_if_invalid_results_token(project_id, auth_header)
    logger.info("Caller is allowed to list project runs")
示例#2
0
def precheck_upload_token(project_id, headers, parent_span):
    """
    Raise a `ProblemException` if the project doesn't exist or the
    authentication token passed in the headers isn't valid.
    """
    with opentracing.tracer.start_span('check-auth',
                                       child_of=parent_span) as span:
        abort_if_project_doesnt_exist(project_id)
        if headers is None or 'Authorization' not in headers:
            safe_fail_request(401, message="Authentication token required")

        token = headers['Authorization']

        # Check the caller has valid token -> otherwise 403
        abort_if_invalid_dataprovider_token(token)
    return token
示例#3
0
def project_delete(project_id):
    log = logger.bind(pid=project_id)
    log.info('Request to delete project')
    # Check the resource exists and hasn't already been marked for deletion
    abort_if_project_doesnt_exist(project_id)

    # Check the caller has a valid results token. Yes it should be renamed.
    abort_if_invalid_results_token(project_id,
                                   request.headers.get('Authorization'))
    log.info("Marking project for deletion")

    with DBConn() as db_conn:
        db.mark_project_deleted(db_conn, project_id)

    log.info("Queuing authorized request to delete project resources")
    remove_project.delay(project_id)

    return '', 204
示例#4
0
def authorise_get_request(project_id):
    if request.headers is None or 'Authorization' not in request.headers:
        safe_fail_request(401, message="Authentication token required")
    auth_header = request.headers.get('Authorization')
    dp_id = None
    # Check the resource exists
    abort_if_project_doesnt_exist(project_id)
    with DBConn() as dbinstance:
        project_object = db.get_project(dbinstance, project_id)
    logger.info("Checking credentials")
    if project_object['result_type'] == 'mapping' or project_object[
            'result_type'] == 'similarity_scores':
        # Check the caller has a valid results token if we are including results
        abort_if_invalid_results_token(project_id, auth_header)
    elif project_object['result_type'] == 'permutations':
        dp_id = get_authorization_token_type_or_abort(project_id, auth_header)
    else:
        safe_fail_request(500, "Unknown error")
    return dp_id, project_object
示例#5
0
def post(project_id, run):
    log, span = bind_log_and_span(project_id)
    log.debug("Processing request to add a new run", run=run)
    # Check the resource exists
    abort_if_project_doesnt_exist(project_id)

    # Check the caller has a valid results token. Yes it should be renamed.
    abort_if_invalid_results_token(project_id,
                                   request.headers.get('Authorization'))

    abort_if_project_in_error_state(project_id)

    run_model = Run.from_json(run, project_id)

    log.debug("Saving run")

    with db.DBConn() as db_conn:
        run_model.save(db_conn)

    check_for_executable_runs.delay(project_id, serialize_span(span))
    return RunDescription().dump(run_model), 201
示例#6
0
def post(project_id, run):
    log = logger.bind(pid=project_id)
    log.debug("Processing request to add a new run", run=run)
    # Check the resource exists
    abort_if_project_doesnt_exist(project_id)

    # Check the caller has a valid results token. Yes it should be renamed.
    abort_if_invalid_results_token(project_id,
                                   request.headers.get('Authorization'))

    abort_if_project_in_error_state(project_id)

    run_model = Run.from_json(run, project_id)

    log.debug("Saving run")

    with db.DBConn() as db_conn:
        run_model.save(db_conn)
        project_object = db.get_project(db_conn, project_id)
        parties_contributed = db.get_number_parties_uploaded(
            db_conn, project_id)
        ready_to_run = parties_contributed == project_object['parties']
        log.debug(
            "Expecting {} parties to upload data. Have received {}".format(
                project_object['parties'], parties_contributed))
        if ready_to_run:
            log.info(
                "Scheduling task to carry out all runs for project {} now".
                format(project_id))
            update_run_mark_queued(db_conn, run_model.run_id)
        else:
            log.info("Task queued but won't start until CLKs are all uploaded")

    if ready_to_run:
        span = g.flask_tracer.get_span()
        span.set_tag("run_id", run_model.run_id)
        span.set_tag("project_id", run_model.project_id)
        check_for_executable_runs.delay(project_id, serialize_span(span))
    return RunDescription().dump(run_model), 201
示例#7
0
def project_get(project_id):
    """
    This endpoint describes a Project.
    """
    log = logger.bind(pid=project_id)
    log.info("Getting detail for a project")
    abort_if_project_doesnt_exist(project_id)
    authorise_get_request(project_id)
    with DBConn() as db_conn:
        project_object = db.get_project(db_conn, project_id)
        # Expose the number of data providers who have uploaded clks
        parties_contributed = db.get_number_parties_uploaded(
            db_conn, project_id)
        num_parties_with_error = db.get_encoding_error_count(
            db_conn, project_id)
    log.info(f"{parties_contributed} parties have contributed hashes")
    project_object['parties_contributed'] = parties_contributed

    if num_parties_with_error > 0:
        log.warning(
            f"There are {num_parties_with_error} parties in error state")
    project_object['error'] = num_parties_with_error > 0

    return ProjectDescription().dump(project_object)
示例#8
0
def project_clks_post(project_id):
    """
    Update a project to provide encoded PII data.
    """
    log = logger.bind(pid=project_id)
    headers = request.headers

    parent_span = g.flask_tracer.get_span()

    with opentracing.tracer.start_span('check-auth',
                                       child_of=parent_span) as span:
        abort_if_project_doesnt_exist(project_id)
        if headers is None or 'Authorization' not in headers:
            safe_fail_request(401, message="Authentication token required")

        token = headers['Authorization']

        # Check the caller has valid token -> otherwise 403
        abort_if_invalid_dataprovider_token(token)

    with DBConn() as conn:
        dp_id = db.get_dataprovider_id(conn, token)
        project_encoding_size = db.get_project_schema_encoding_size(
            get_db(), project_id)

    log = log.bind(dp_id=dp_id)
    log.info("Receiving CLK data.")
    receipt_token = None

    with opentracing.tracer.start_span('upload-data',
                                       child_of=parent_span) as span:
        span.set_tag("project_id", project_id)
        if headers['Content-Type'] == "application/json":
            span.set_tag("content-type", 'json')
            # TODO: Previously, we were accessing the CLKs in a streaming fashion to avoid parsing the json in one hit. This
            #       enables running the web frontend with less memory.
            #       However, as connexion is very, very strict about input validation when it comes to json, it will always
            #       consume the stream first to validate it against the spec. Thus the backflip to fully reading the CLks as
            #       json into memory. -> issue #184

            receipt_token, raw_file = upload_json_clk_data(
                dp_id, get_json(), span)
            # Schedule a task to deserialize the hashes, and carry
            # out a pop count.
            handle_raw_upload.delay(project_id,
                                    dp_id,
                                    receipt_token,
                                    parent_span=serialize_span(span))
            log.info("Job scheduled to handle user uploaded hashes")
        elif headers['Content-Type'] == "application/octet-stream":
            span.set_tag("content-type", 'binary')
            log.info("Handling binary CLK upload")
            try:
                count, size = check_binary_upload_headers(headers)
                log.info(
                    f"Headers tell us to expect {count} encodings of {size} bytes"
                )
                span.log_kv({'count': count, 'size': size})
            except Exception:
                log.warning(
                    "Upload failed due to problem with headers in binary upload"
                )
                raise
            # Check against project level encoding size (if it has been set)
            if project_encoding_size is not None and size != project_encoding_size:
                # fail fast - we haven't stored the encoded data yet
                return safe_fail_request(
                    400, "Upload 'Hash-Size' doesn't match project settings")

            # TODO actually stream the upload data straight to Minio. Currently we can't because
            # connexion has already read the data before our handler is called!
            # https://github.com/zalando/connexion/issues/592
            # stream = get_stream()
            stream = BytesIO(request.data)
            log.debug(
                f"Stream size is {len(request.data)} B, and we expect {(6 + size)* count} B"
            )
            if len(request.data) != (6 + size) * count:
                safe_fail_request(
                    400,
                    "Uploaded data did not match the expected size. Check request headers are correct"
                )
            try:
                receipt_token = upload_clk_data_binary(project_id, dp_id,
                                                       stream, count, size)
            except ValueError:
                safe_fail_request(
                    400,
                    "Uploaded data did not match the expected size. Check request headers are correct."
                )
        else:
            safe_fail_request(400, "Content Type not supported")

    return {'message': 'Updated', 'receipt_token': receipt_token}, 201