示例#1
0
def post(req: func.HttpRequest) -> func.HttpResponse:
    envelope = parse_request(NodeEventEnvelope, req)
    if isinstance(envelope, Error):
        return not_ok(envelope, context=ERROR_CONTEXT)

    logging.info(
        "node event: machine_id: %s event: %s",
        envelope.machine_id,
        envelope.event,
    )

    if isinstance(envelope.event, NodeEvent):
        event = envelope.event
    elif isinstance(envelope.event, NodeStateUpdate):
        event = NodeEvent(state_update=envelope.event)
    elif isinstance(envelope.event, WorkerEvent):
        event = NodeEvent(worker_event=envelope.event)
    else:
        err = Error(code=ErrorCode.INVALID_REQUEST, errors=["invalid node event"])
        return not_ok(err, context=ERROR_CONTEXT)

    if event.state_update:
        on_state_update(envelope.machine_id, event.state_update)
        return ok(BoolResult(result=True))
    elif event.worker_event:
        on_worker_event(envelope.machine_id, event.worker_event)
        return ok(BoolResult(result=True))
    else:
        err = Error(code=ErrorCode.INVALID_REQUEST, errors=["invalid node event"])
        return not_ok(err, context=ERROR_CONTEXT)
示例#2
0
def patch(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(ProxyReset, req)
    if isinstance(request, Error):
        return not_ok(request, context="ProxyReset")

    proxy = Proxy.get(request.region)
    if proxy is not None:
        proxy.state = VmState.stopping
        proxy.save()
        return ok(BoolResult(result=True))

    return ok(BoolResult(result=False))
示例#3
0
def patch(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(ProxyReset, req)
    if isinstance(request, Error):
        return not_ok(request, context="ProxyReset")

    proxy_list = Proxy.search(query={"region": [request.region]})
    for proxy in proxy_list:
        proxy.set_state(VmState.stopping)

    if proxy_list:
        return ok(BoolResult(result=True))
    else:
        return ok(BoolResult(result=False))
示例#4
0
def delete(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(ContainerDelete, req)
    if isinstance(request, Error):
        return not_ok(request, context="container delete")

    logging.info("container - deleting %s", request.name)
    return ok(BoolResult(result=delete_container(request.name, StorageType.corpus)))
示例#5
0
def delete(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(JobTemplateDelete, req)
    if isinstance(request, Error):
        return not_ok(request, context="JobTemplateDelete")

    entry = JobTemplateIndex.get(request.name)
    return ok(BoolResult(result=entry is not None))
示例#6
0
def delete(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_uri(NodeCommandDelete, req)
    if isinstance(request, Error):
        return not_ok(request, context="NodeCommandDelete")

    NodeMessage.delete_messages(request.machine_id, [request.message_id])
    return ok(BoolResult(result=True))
示例#7
0
def on_worker_event(machine_id: UUID, event: WorkerEvent) -> func.HttpResponse:
    if event.running:
        task_id = event.running.task_id
    elif event.done:
        task_id = event.done.task_id

    task = get_task_checked(task_id)
    node = get_node_checked(machine_id)
    node_task = NodeTasks(machine_id=machine_id,
                          task_id=task_id,
                          state=NodeTaskState.running)

    if event.running:
        if task.state not in TaskState.shutting_down():
            task.state = TaskState.running
        if node.state not in NodeState.ready_for_reset():
            node.state = NodeState.busy
        node_task.save()
        task.on_start()
    elif event.done:
        # Only record exit status if the task isn't already shutting down.
        #
        # It's ok for the agent to fail because resources vanish out from underneath
        # it during deletion.
        if task.state not in TaskState.shutting_down():
            exit_status = event.done.exit_status

            if not exit_status.success:
                logging.error("task failed: status = %s", exit_status)

                task.error = Error(
                    code=ErrorCode.TASK_FAILED,
                    errors=[
                        "task failed. exit_status = %s" % exit_status,
                        event.done.stdout,
                        event.done.stderr,
                    ],
                )

            task.state = TaskState.stopping
        if node.state not in NodeState.ready_for_reset():
            node.state = NodeState.done
        node_task.delete()
    else:
        err = Error(
            code=ErrorCode.INVALID_REQUEST,
            errors=["invalid worker event type"],
        )
        raise RequestException(err)

    task.save()
    node.save()
    task_event = TaskEvent(task_id=task_id,
                           machine_id=machine_id,
                           event_data=event)
    task_event.save()
    return ok(BoolResult(result=True))
示例#8
0
def delete(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(NodeCommandDelete, req)
    if isinstance(request, Error):
        return not_ok(request, context="NodeCommandDelete")

    message = NodeMessage.get(request.machine_id, request.message_id)
    if message:
        message.delete()
    return ok(BoolResult(result=True))
示例#9
0
def delete(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(PoolStop, req)
    if isinstance(request, Error):
        return not_ok(request, context="PoolDelete")

    pool = Pool.get_by_name(request.name)
    if isinstance(pool, Error):
        return not_ok(pool, context="pool stop")
    pool.set_shutdown(now=request.now)
    return ok(BoolResult(result=True))
示例#10
0
def post(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(JobTemplateUpload, req)
    if isinstance(request, Error):
        return not_ok(request, context="JobTemplateUpload")

    entry = JobTemplateIndex(name=request.name, template=request.template)
    result = entry.save()
    if isinstance(result, Error):
        return not_ok(result, context="JobTemplateUpload")

    return ok(BoolResult(result=True))
示例#11
0
def delete(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(WebhookGet, req)
    if isinstance(request, Error):
        return not_ok(request, context="webhook delete")

    logging.info("deleting webhook: %s", request.webhook_id)

    entry = Webhook.get_by_id(request.webhook_id)
    if isinstance(entry, Error):
        return not_ok(entry, context="webhook delete")

    entry.delete()
    return ok(BoolResult(result=True))
示例#12
0
def on_state_update(machine_id: UUID, state: NodeState) -> func.HttpResponse:
    node = get_node_checked(machine_id)

    if state == NodeState.init or node.state not in NodeState.ready_for_reset(
    ):
        if node.state != state:
            node.state = state
            node.save()
    else:
        logging.info("ignoring state updates from the node: %s: %s",
                     machine_id, state)

    return ok(BoolResult(result=True))
示例#13
0
def post(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(TaskConfig, req)
    if isinstance(request, Error):
        return not_ok(request, context="task create")

    user_info = parse_jwt_token(req)
    if isinstance(user_info, Error):
        return not_ok(user_info, context="task create")

    try:
        check_config(request)
    except TaskConfigError as err:
        return not_ok(
            Error(code=ErrorCode.INVALID_REQUEST, errors=[str(err)]),
            context="task create",
        )

    if "dryrun" in req.params:
        return ok(BoolResult(result=True))

    job = Job.get(request.job_id)
    if job is None:
        return not_ok(
            Error(code=ErrorCode.INVALID_REQUEST,
                  errors=["unable to find job"]),
            context=request.job_id,
        )

    if job.state not in [JobState.enabled, JobState.init]:
        return not_ok(
            Error(
                code=ErrorCode.UNABLE_TO_ADD_TASK_TO_JOB,
                errors=["unable to add a job in state: %s" % job.state.name],
            ),
            context=job.job_id,
        )

    if request.prereq_tasks:
        for task_id in request.prereq_tasks:
            prereq = Task.get_by_task_id(task_id)
            if isinstance(prereq, Error):
                return not_ok(prereq, context="task create prerequisite")

    task = Task.create(config=request,
                       job_id=request.job_id,
                       user_info=user_info)
    if isinstance(task, Error):
        return not_ok(task, context="task create invalid pool")
    return ok(task)
示例#14
0
def patch(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(NodeGet, req)
    if isinstance(request, Error):
        return not_ok(request, context="NodeRestart")

    node = Node.get_by_machine_id(request.machine_id)
    if not node:
        return not_ok(
            Error(code=ErrorCode.UNABLE_TO_FIND,
                  errors=["unable to find node"]),
            context=request.machine_id,
        )

    node.stop()
    return ok(BoolResult(result=True))
示例#15
0
def patch(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(JobTemplateUpdate, req)
    if isinstance(request, Error):
        return not_ok(request, context="JobTemplateUpdate")

    entry = JobTemplateIndex.get(request.name)
    if entry is None:
        return not_ok(
            Error(code=ErrorCode.UNABLE_TO_UPDATE,
                  errors=["no such job template"]),
            context="JobTemplateUpdate",
        )

    entry.template = request.template
    entry.save()
    return ok(BoolResult(result=True))
示例#16
0
def post(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(NodeUpdate, req)
    if isinstance(request, Error):
        return not_ok(request, context="NodeUpdate")

    node = Node.get_by_machine_id(request.machine_id)
    if not node:
        return not_ok(
            Error(code=ErrorCode.UNABLE_TO_FIND, errors=["unable to find node"]),
            context=request.machine_id,
        )
    if request.debug_keep_node is not None:
        node.debug_keep_node = request.debug_keep_node

    node.save()
    return ok(BoolResult(result=True))
示例#17
0
def delete(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(ProxyDelete, req)
    if isinstance(request, Error):
        return not_ok(request, context="debug_proxy delete")

    regions = ProxyForward.remove_forward(
        scaleset_id=request.scaleset_id,
        machine_id=request.machine_id,
        dst_port=request.dst_port,
    )
    for region in regions:
        proxy = Proxy.get_or_create(region)
        if proxy:
            proxy.save_proxy_config()

    return ok(BoolResult(result=True))
示例#18
0
def post(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(NodeAddSshKey, req)
    if isinstance(request, Error):
        return not_ok(request, context="NodeAddSshKey")

    node = Node.get_by_machine_id(request.machine_id)
    if not node:
        return not_ok(
            Error(code=ErrorCode.UNABLE_TO_FIND, errors=["unable to find node"]),
            context=request.machine_id,
        )
    result = node.add_ssh_public_key(public_key=request.public_key)
    if isinstance(result, Error):
        return not_ok(result, context="NodeAddSshKey")

    return ok(BoolResult(result=True))
示例#19
0
def on_state_update(
    machine_id: UUID,
    state_update: NodeStateUpdate,
) -> func.HttpResponse:
    state = state_update.state
    node = get_node_checked(machine_id)

    if state == NodeState.init or node.state not in NodeState.ready_for_reset(
    ):
        if node.state != state:
            node.state = state
            node.save()

            if state == NodeState.setting_up:
                # This field will be required in the future.
                # For now, it is optional for back compat.
                if state_update.data:
                    for task_id in state_update.data.tasks:
                        task = get_task_checked(task_id)

                        # The task state may be `running` if it has `vm_count` > 1, and
                        # another node is concurrently executing the task. If so, leave
                        # the state as-is, to represent the max progress made.
                        #
                        # Other states we would want to preserve are excluded by the
                        # outermost conditional check.
                        if task.state != TaskState.running:
                            task.state = TaskState.setting_up

                        # We don't yet call `on_start()` for the task.
                        # This will happen once we see a worker event that
                        # reports it as `running`.
                        task.save()

                        # Note: we set the node task state to `setting_up`, even though
                        # the task itself may be `running`.
                        node_task = NodeTasks(
                            machine_id=machine_id,
                            task_id=task_id,
                            state=NodeTaskState.setting_up,
                        )
                        node_task.save()
    else:
        logging.info("ignoring state updates from the node: %s: %s",
                     machine_id, state)

    return ok(BoolResult(result=True))
示例#20
0
def delete(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(ScalesetStop, req)
    if isinstance(request, Error):
        return not_ok(request, context="ScalesetDelete")

    scaleset = Scaleset.get_by_id(request.scaleset_id)
    if isinstance(scaleset, Error):
        return not_ok(scaleset, context="scaleset stop")

    if request.now:
        scaleset.state = ScalesetState.halt
    else:
        scaleset.state = ScalesetState.shutdown

    scaleset.save()
    scaleset.auth = None
    return ok(BoolResult(result=True))
示例#21
0
def post(req: func.HttpRequest) -> func.HttpResponse:
    envelope = parse_request(NodeEventEnvelope, req)
    if isinstance(envelope, Error):
        return not_ok(envelope, context="node event")

    logging.info(
        "node event: machine_id: %s event: %s",
        envelope.machine_id,
        envelope.event.json(exclude_none=True),
    )

    result = process(envelope)
    if isinstance(result, Error):
        logging.error("unable to process agent event. envelope:%s error:%s",
                      envelope, result)
        return not_ok(result, context="node event")

    return ok(BoolResult(result=True))
示例#22
0
def patch(req: func.HttpRequest) -> func.HttpResponse:
    request = parse_request(NodeGet, req)
    if isinstance(request, Error):
        return not_ok(request, context="NodeReimage")

    answer = check_require_admins(req)
    if isinstance(answer, Error):
        return not_ok(answer, context="NodeReimage")

    node = Node.get_by_machine_id(request.machine_id)
    if not node:
        return not_ok(
            Error(code=ErrorCode.UNABLE_TO_FIND,
                  errors=["unable to find node"]),
            context=request.machine_id,
        )

    node.stop(done=True)
    if node.debug_keep_node:
        node.debug_keep_node = False
        node.save()
    return ok(BoolResult(result=True))