Пример #1
0
def delete_cluster_task(log_type, log_pk):
    """删除集群
    """
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return
    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)
        return
    if log.is_finished:
        logger.error("log[%s] has been finished", log_pk)
        return
    if not log.is_polling:
        log.is_polling = True
        log.save()

    end_time = datetime.now() + DELETE_POLLING_TIMEOUT
    is_terminaled = False

    while not log.is_finished and log.is_polling:
        time.sleep(POLLING_INTERVAL_SECONDS)
        if datetime.now() > end_time:
            break
        try:
            log, is_terminaled = _polling_host_module_once(log, _("1.修改主机模块"))
        except Exception as err:
            logger.exception(err)
    if is_terminaled:
        return
    # 执行任务
    delete_handler(log)
Пример #2
0
def common_model_handler(log_type, log_pk, task_id_flag=False):
    """针对model的统一处理
    """
    if not log_pk:
        return
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return

    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)
        return

    if not log.task_id and task_id_flag:
        logger.error("task id is null for pk: %s", log_pk)
        return

    if log.is_finished:
        logger.info("log[%s] has been finished", log_pk)
        return

    if log.is_polling:
        logger.warning("log[%s] is polling", log_pk)
    else:
        log.is_polling = True
        log.save()
    return model, log
Пример #3
0
def chain_polling_bke_status(log_info):
    """检查BKE Agent是否安装成功
    """
    if not log_info:
        return
    log_pk, log_type = log_info
    model = models.log_factory(log_type)
    log = model.objects.filter(pk=log_pk).last()
    new_log = models.NodeUpdateLog.objects.create(  # noqa
        project_id=log.project_id,
        cluster_id=log.cluster_id,
        token=log.token,
        node_id=log.node_id,
        params=log.params,
        operator=log.operator,
        oper_type=models.NodeOperType.BkeInstall
    )
    try:
        bke_cluster_info = helm_init(new_log.token, new_log.project_id, new_log.cluster_id, 'kube-system')
    except Exception as err:
        logger.error("Install bke error, token: %s, project_id: %s, cluster_id: %s, error detail: %s"
                     % (new_log.token, new_log.project_id, new_log.cluster_id, err))
        bke_log_save(new_log, log_type, models.NodeStatus.BkeFailed, message=err)
        return

    # 异常记录LOG
    if bke_cluster_info.get("code") != ErrorCode.NoError:
        message = bke_cluster_info.get("message")
        logger.error("Install bke error, detail: %s" % message)
        bke_log_save(new_log, log_type, models.NodeStatus.BkeFailed, message)
        return
    _polling_bke_status(new_log.id)
Пример #4
0
def polling_so_init(old_log, log_pk=None, log_type=None):
    """SO初始化
    """
    if not (old_log or (log_pk and log_type)):
        return
    if old_log:
        log_pk, log_type = old_log
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return
    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)

    end_time = datetime.now() + POLLING_TIMEOUT
    while not log.is_finished and log.is_polling:
        time.sleep(POLLING_INTERVAL_SECONDS)
        if datetime.now() > end_time:
            break
        try:
            log = _polling_so_initial_once(model, log)
        except Exception as err:
            logger.exception(err)
            continue
    # model.objects.filter(pk=log_pk).update(is_polling=False)
    if log.status in [models.CommonStatus.SoInitialFailed]:
        push_sentry(log, _("SO初始化失败"))
        # 更改node或集群状态
        update_node_cluster_check_status(log, log_type, status=log.status)
        return
    return log_pk, log_type
Пример #5
0
def so_init(old_log, request=None):
    """SO初始化
    """
    if not old_log:
        return
    log_pk, log_type = old_log
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return
    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)
    user_token = log.token
    username = log.operator
    if request:
        user_token = request.user.token.access_token
        username = request.user.username
    try:
        params = json.loads(log.params)
    except Exception:
        params = {}
    save_params = {
        "project_id": log.project_id,
        "cluster_id": log.cluster_id,
        "token": user_token,
        "status": models.CommonStatus.SoInitial,
        "params": log.params,
        "operator": username,
        "oper_type": SO_INITIAL
    }
    if log_type != "ClusterInstallLog":
        save_params["node_id"] = log.node_id
    new_log = model.objects.create(**save_params)
    # 触发初始化检查任务
    ip_list = params.get("master_ips") or params.get("node_info", {}).keys()
    resp = so.initial_host(username, ip_list or [])
    task_id = (resp.get("data") or {}).get("job_id")
    if not resp.get("result") or not task_id:
        new_log.is_finished = True
        new_log.is_polling = False
        new_log.status = models.CommonStatus.SoInitialFailed
        new_log.log = json.dumps({
            "state":
            "FAILURE",
            "node_tasks": [{
                "state": "FAILURE",
                "name": f"{_('1.SO初始化失败')}: {resp.get('message')}"
            }]
        })
        new_log.save()
        update_node_cluster_check_status(
            new_log, log_type, status=models.CommonStatus.SoInitialFailed)

        return
    new_log.task_id = task_id
    new_log.is_polling = True
    new_log.save()
    return new_log.id, log_type
Пример #6
0
def node_exec_bcs_task(old_log, request=None):
    """执行bcs创建节点任务
    """
    if not old_log:
        return
    log_pk, log_type = old_log
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return
    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)

    # 解析参数
    try:
        params = json.loads(log.params)
    except Exception:
        params = {}
    node_info = params.get("node_info") or {}
    user_token = log.token
    username = log.operator
    project_id = log.project_id
    cluster_id = log.cluster_id
    if request:
        user_token = request.user.token.access_token
        username = request.user.username
    new_log = models.NodeUpdateLog.objects.create(  # noqa
        project_id=project_id,
        cluster_id=cluster_id,
        token=user_token,
        node_id=",".join(node_info.values()),
        params=json.dumps(params),
        operator=username,
    )
    try:
        client = BCSClient(
            user_token, project_id, cluster_id, None
        )
        rsp = client.add_cluster_node(
            params.get("kind_name"), username,
            list(node_info.keys()), params.get("cc_app_id")
        )
    except Exception as error:
        logger.error("add add_cluster_node error: %s", error)
        node_ip_status(new_log, new_log.project_id, new_log.cluster_id, node_info)
        return

    if rsp.get("code") != ErrorCode.NoError:
        node_ip_status(new_log, new_log.project_id, new_log.cluster_id, node_info)
        push_sentry(new_log, _("节点初始化失败"))
        return

    data = rsp.get("data") or {}
    taskid = data.get("taskID")
    new_log.task_id = taskid
    new_log.save()
    return new_log.id
Пример #7
0
def chain_polling_task(log_pk, log_type):
    if not log_pk:
        return
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return

    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)
        return

    if not log.task_id:
        logger.error("task id is null for pk: %s", log_pk)
        return

    if log.is_finished:
        logger.info("log[%s] has been finished", log_pk)
        return

    if log.is_polling:
        logger.warning("log[%s] is polling", log_pk)
    else:
        log.is_polling = True
        log.save()

    end_time = datetime.now() + POLLING_TIMEOUT

    while not log.is_finished and log.is_polling:
        time.sleep(POLLING_INTERVAL_SECONDS)
        if datetime.now() > end_time:
            break
        try:
            log = _polling_once(model, log)
        except Exception as err:
            logger.exception(err)
    model.objects.filter(pk=log_pk).update(is_polling=False)
    # 出现异常,不影响流程
    try:
        register_ns(log)
    except Exception as err:
        logger.error("Register default namespace: %s" % err)
    # TODO:待op系统上线后,下掉以下通知
    try:
        if log_type == "ClusterInstallLog" and log.oper_type in [
                "initialize", "reinstall"
        ] and log.status == "normal":
            send_msg_for_cluster(log)
    except Exception as err:
        logger.error("Send cluster info failed: %s" % err)
    if (log_type == "NodeUpdateLog") and (
            log.status == models.CommonStatus.Normal) and ('K8S'
                                                           in log.cluster_id):
        return log.id, log_type
Пример #8
0
def polling_task(log_type, log_pk):
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return

    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)
        return

    if not log.task_id:
        logger.error("task id is null for pk: %s", log_pk)
        return

    if log.is_finished:
        logger.info("log[%s] has been finished", log_pk)
        return

    if log.is_polling:
        logger.warning("log[%s] is polling", log_pk)
    else:
        log.is_polling = True
        log.save()

    end_time = datetime.now() + POLLING_TIMEOUT

    while not log.is_finished and log.is_polling:
        time.sleep(POLLING_INTERVAL_SECONDS)
        if datetime.now() > end_time:
            break
        try:
            log = _polling_once(model, log)
        except Exception as err:
            logger.exception(err)
    model.objects.filter(pk=log_pk).update(is_polling=False)
    # 异常时,上报sentry
    if log.status != models.CommonStatus.Normal:
        initial_oper_list = [INITIAL_CHECK, INITIALIZE, SO_INITIAL, REINSTALL]
        if log_type == "ClusterInstallLog":
            if log.oper_type in initial_oper_list:
                prefix_msg = _("初始化集群失败")
            else:
                prefix_msg = _("删除集群失败")
        else:
            if log.oper_type in initial_oper_list:
                prefix_msg = _("初始化节点失败")
            else:
                prefix_msg = _("删除节点失败")
        push_sentry(log, prefix_msg)
    if ((log_type == "NodeUpdateLog")
            and (log.status == models.CommonStatus.Normal)
            and (log.oper_type not in [models.NodeOperType.NodeRemove])
            and ('K8S' in log.cluster_id)):
        return log.id, log_type
Пример #9
0
def delete_cluster_node_polling(new_log):
    log_type, log_id = new_log
    if not (log_type and log_id):
        return
    model = models.log_factory(log_type)
    log = model.objects.filter(id=log_id).last()
    end_time = datetime.now() + POLLING_TIMEOUT

    while not log.is_finished and log.is_polling:
        time.sleep(POLLING_INTERVAL_SECONDS)
        if datetime.now() > end_time:
            break
        try:
            log = _polling_once(model, log)
        except Exception as err:
            logger.exception(err)
    model.objects.filter(pk=log_id).update(is_polling=False)
Пример #10
0
def polling_initial_task(log_type, log_pk):
    """轮训初始化检测
    """
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return

    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)
        return

    if log.is_finished:
        logger.info("log[%s] has been finished", log_pk)
        return

    if log.is_polling:
        logger.warning("log[%s] is polling", log_pk)
    else:
        log.is_polling = True
        log.save()

    end_time = datetime.now() + POLLING_TIMEOUT

    while not log.is_finished and log.is_polling:
        time.sleep(POLLING_INTERVAL_SECONDS)
        if datetime.now() > end_time:
            break
        try:
            log = _polling_initial_once(model, log)
        except Exception as err:
            logger.exception(err)
            return
    model.objects.filter(pk=log_pk).update(is_polling=False)
    if log.status in [models.CommonStatus.InitialCheckFailed]:
        push_sentry(log, _("前置检查失败"))
        # 更改node或集群状态
        update_node_cluster_check_status(log, log_type)
        return
    return log_pk, log_type
Пример #11
0
def delete_cluster_node(new_log):
    log_type, log_id = new_log
    if not (log_type and log_id):
        return log_type, log_id
    model = models.log_factory(log_type)
    log = model.objects.filter(id=log_id).last()
    params = json.loads(log.params)
    # 触发bcs任务
    model.objects.filter(id=log.id)
    bcs_client = BCSClient(
        log.token, log.project_id, log.cluster_id, None
    )
    resp = bcs_client.delete_cluster_node(
        params.get("kind_name"), log.operator, list(params.get("nodes", {}).keys())
    )
    if not resp.get("result"):
        log.is_finished = True
        log.is_polling = False
        log.status = models.CommonStatus.RemoveFailed
        log.log = json.dumps({
            "state": "remove_failed",
            "node_tasks": [{
                "state": "FAILURE",
                "name": resp.get("message")
            }]
        })
        log.save()
        result = paas_cc.update_node(
            log.token, log.project_id, params["node_id"],
            {"status": models.CommonStatus.RemoveFailed}
        )
        if result.get("code") != ErrorCode.NoError:
            return None, None
        return None, None

    data = resp.get("data") or {}
    taskid = data.get("taskID")
    log.task_id = taskid
    log.is_polling = True
    log.save()
    return log_type, log.id
Пример #12
0
def _polling_bke_status(pk, log_type="NodeUpdateLog"):
    """
    """
    model = models.log_factory(log_type)
    log = model.objects.filter(pk=pk).last()

    end_time = datetime.now() + BKE_POLLING_TIMEOUT
    status = models.CommonStatus.Normal
    message = ""
    while not log.is_finished and log.is_polling:
        if datetime.now() > end_time:
            break
        try:
            bke_client = get_bcs_client(log.project_id, log.cluster_id, log.token)
            bke_client.get_cluster_credential()
            status = models.CommonStatus.Normal
            break
        except Exception as err:
            status = models.NodeStatus.BkeFailed
            message = "%s" % err
    bke_log_save(log, log_type, status, message=message)
    return
Пример #13
0
    def get_task_record(self) -> Optional[ModelLogRecord]:
        """获取task记录"""
        params = self.params
        # 任务类型: cluster/node
        model_type = params["model_type"]
        # 任务记录的ID
        task_record_id = params["pk"]
        task_model = models.log_factory(model_type)
        if not task_model:
            logger.error(f'not found {model_type} task')
            return

        # 获取记录
        try:
            record = task_model.objects.get(pk=task_record_id)
        except task_model.DoesNotExist:
            logger.error(f'not found task: {task_record_id}')
            return
        # 判断任务是否结束
        if record.is_finished:
            logger.info(f'record: {task_record_id} has been finished')
            return record
        return record
Пример #14
0
def polling_task(log_type, log_pk):
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return

    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)
        return

    if not log.task_id:
        logger.error("task id is null for pk: %s", log_pk)
        return

    if log.is_finished:
        logger.info("log[%s] has been finished", log_pk)
        return

    end_time = datetime.now() + POLLING_TIMEOUT
    while not log.is_finished and log.is_polling:
        time.sleep(POLLING_INTERVAL_SECONDS)
        if datetime.now() > end_time:
            break
        try:
            log = _polling_once(model, log)
        except Exception as err:
            logger.exception("query task failed, detail: %s" % err)
    # 超时更新状态
    if not log.is_finished:
        log.is_finished = True
        log.is_polling = False
        log.status = log_status(log)
        log.save()
    # 更新配置中心状态
    update_status(log_type, log)
Пример #15
0
def exec_bcs_task(old_log, request=None):
    """执行bcs创建集群任务
    """
    if not old_log:
        return
    # 判断是否可以执行后续
    log_pk, log_type = old_log
    model = models.log_factory(log_type)
    if not model:
        logger.error("log not found for type: %s", log_type)
        return
    log = model.objects.filter(pk=log_pk).last()
    if not log:
        logger.error("log not found for pk: %s", log_pk)
        return

    # 组装参数
    user_token = log.token
    username = log.operator
    if request:
        user_token = request.user.token.access_token
        username = request.user.username
    # 解析参数
    try:
        params = json.loads(log.params)
    except Exception:
        params = {}
    new_log = models.ClusterInstallLog.objects.create(
        project_id=log.project_id,
        cluster_id=log.cluster_id,
        token=user_token,
        status=models.CommonStatus.Initializing,
        params=log.params,
        operator=username
    )
    client = BCSClient(
        user_token, params.get("project_id"),
        params.get("cluster_id"), None
    )
    rsp = client.create_cluster(
        params.get("kind_name"), username,
        params.get("master_ips", []),
        data={
            "modules": params.get("module_list", ""),
            "appID": constants.BCS_APP_ID,
            "needNat": params.get("need_nat", True),
        }
    )
    if rsp.get("code") != ErrorCode.NoError:
        new_log.is_finished = True
        new_log.is_polling = False
        new_log.status = models.CommonStatus.InitialFailed
        # 记录错误信息到log
        new_log.log = json.dumps({
            "state": "FAILURE",
            "node_tasks": [{"state": "FAILURE", "name": rsp.get("message")}]
        })
        new_log.save()
        result = paas_cc.update_cluster(
            user_token, params.get("project_id"),
            params.get("cluster_id"),
            {"status": models.CommonStatus.InitialFailed}
        )
        # TODO: 怎样保证写入不成功时,可以再次写入
        if result.get("code") != ErrorCode.NoError:
            return
        push_sentry(new_log, _("初始化集群失败"))
        return

    data = rsp.get("data") or {}
    new_log.task_id = data.get("taskID")
    new_log.save()
    try:
        cc.host_standard_property(
            username, params.get("master_ips", []), bak_operator_flag=True
        )
    except Exception as err:
        logger.error("Request cc error, detail: %s" % err)
    # 触发新的任务
    return new_log.id