def update_cron_status(self, bk_biz_id: int, cron_status: int, cron_id: int): """ 更新定时作业状态,如启动或暂停 :param bk_biz_id: :param cron_status: :param cron_id: :return: """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "cron_status": cron_status, # 定时状态,1.启动、2.暂停 "cron_id": cron_id } data = self.client.job.update_cron_status(kwargs) save_json("update_cron_status", data) result = {"result": False, "message": "nothing", "cron_id": 0} if data.get("result", False): result["result"] = data['result'] result["cron_id"] = data['data']['cron_id'] else: logger.error("更新定时作业状态失败:%s" % data['message']) result['message'] = data['message'] return result\
def save_cron( self, bk_biz_id: int, bk_job_id: int, cron_name, cron_expression, ): """ 新建或保存定时作业 :param bk_biz_id: :param cron_name: :param cron_expression: :param bk_job_id: :return: """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "bk_job_id": bk_job_id, "cron_name": cron_name, "cron_expression": cron_expression } data = self.client.job.save_cron(kwargs) save_json("save_cron", data) result = {"result": False, "message": "nothing", "cron_id": 0} if data.get("result", False): result["result"] = data['result'] result["cron_id"] = data['data']['cron_id'] else: logger.error("新建或保存定时作业失败:%s" % data['message']) result['message'] = data['message'] return result
def get_step_instance_status(self, bk_biz_id: int, job_instance_id=0, step_instance_id=0): """ 查询作业步骤的执行状态 :param bk_biz_id: :param job_instance_id: :param step_instance_id: :return: """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "params": { "job_instance_id": job_instance_id, "step_instance_id": step_instance_id } } data = self.client.job.get_step_instance_status(kwargs) result = {"result": False, "message": "nothing", "status": {}} if data.get("result", False): result["result"] = data['result'] result["status"] = data['data'] else: logger.error("查询作业步骤的执行状态失败:%s" % data['message']) result['message'] = data['message'] return result
def get_script_detail(self, bk_biz_id: int, id: int): """ 查询脚本详情 :param bk_biz_id: :param id: :return: """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "id": id } data = self.client.job.get_script_detail(kwargs) save_json("get_script_detail", data) result = {'result': False, 'message': 'Nothing', "detail": {}} if data.get('result', False): result['result'] = data['result'] result['detail'] = data['data'] else: logger.error(u'查询脚本详情失败:%s' % result.get('message')) result['message'] = data['message'] return result
def create_record_detail(task_info): """ 记录cluster所有的task记录 @param task_info:{ "db_type": 1/2/3 "app_id": app_id, "cluster_name": cluster_name, "task_type":xxx "task_mode:"xxx", "pipeline_id":xxx, "task_params":task_params, } """ task_id = None try: task_model = TaskRecord.objects.create( db_type=task_info['db_type'], app_id=task_info['app_id'], cluster_name=task_info['cluster_name'], task_mode=task_info['task_mode'], task_type=task_info['task_type'], op_user=task_info['op_user'], task_kwargs=json.dumps(task_info['task_params']), pipeline_id=task_info['pipeline_id'], ) task_id = task_model.task_id except Exception as err: logger.error(str(err)) finally: return task_id
def input_task_sync_db(task_kwargs, is_state, db_type, root_id=None): """ 根据集群录入类任务的参数和当前状态来同步后台信息 """ cluster_id = None if is_state == 'FINISHED': # 生成集群相关信息 if db_type == 1: success_message = None pipeline_tree = TaskRecord.objects.get( pipeline_id=root_id).pipeline_tree pipeline_tree = json.loads(pipeline_tree) for node_id in pipeline_tree: success_message = PipelineTaskApi({ 'node_id': node_id }).get_node_output() if success_message: cluster_id = sync_db( success_message.get(task_kwargs['target_ip']), task_kwargs) else: logger.error("该es集群录入检测时,查不到返回的结果") elif db_type == 2: cluster_id = create_cluster_info(task_kwargs) elif db_type == 3: pass if not cluster_id: logger.error(f"部署/录入集群初始化数据失败, 组件类型: {db_type}") return False return True
def fast_execute_sql(self, bk_biz_id, script_id, db_account_id, ips): """ 快速执行sql :param bk_biz_id: :param script_id: :param db_account_id: :param ips: :return: """ ip_list = [{"bk_cloud_id": 0, "ip": ip} for ip in ips] kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "db_account_id": db_account_id, "script_id": script_id, "script_timeout": 1000, "ip_list": ip_list } data = self.client.job.fast_execute_sql(kwargs) result = {"result": False, "message": "nothing", "job_instance_id": 0} if data.get("result", False): result["result"] = data['result'] result["job_instance_id"] = data['data']['job_instance_id'] else: logger.error("快速执行sql脚本失败:%s" % data['message']) result['message'] = data['message'] return result
def find_host_by_module(self, bk_biz_id: int, bk_module_id): """ 获取模块下主机 :param bk_biz_id: :param bk_module_id: :return: """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "bk_module_ids": [bk_module_id], } data = self.client.cc.find_host_by_module(kwargs) result = {"result": False, "message": "nothing", "data": []} if data.get("result", False): result["result"] = data['result'] for i in data['data']['info']: result['data'].append(i['host']) else: logger.error("获取模块下主机失败:%s" % data['message']) result['message'] = data['message'] return result
def list_biz_hosts(self, bk_biz_id: int, bk_obj_id, bk_inst_ids=[]): """ 查询业务下主机 """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "page": { "start": 0, "limit": 500, "sort": "bk_host_id" }, f"bk_{bk_obj_id}_ids": bk_inst_ids, } data = self.client.cc.list_biz_hosts(kwargs) result = {"result": False, "message": "nothing", "data": []} if data.get("result", False): result["result"] = data['result'] result["data"] = data['data']['info'] else: logger.error("获取业务下主机失败 %s" % data['message']) result['message'] = data['message'] return result
def op_pipeline_task(kwargs): """ 对任务做管理操作,目前支持 任务启动,任务撤销,任务暂停 """ try: pipeline_id = kwargs.get("id") op_type = kwargs.get('op_type') task = PipelineTaskApi({'pipeline_id': pipeline_id}) if op_type == 'revoke': if task.task_revoke(): return build_success_result("操作成功") else: return build_fail_result("操作失败") elif op_type == 'resume': if task.task_resume(): return build_success_result("操作成功") else: return build_fail_result("操作失败") elif op_type == 'pause': if task.task_pause(): return build_success_result("操作成功") else: return build_fail_result("操作失败") else: return build_fail_result("后端暂不执行该类型操作:{}".format(op_type)) except Exception as err: logger.error(str(err))
def search_business(self, condition={}): """ 查询业务 :param condition: :return: """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "fields": ["bk_biz_id", "bk_biz_name"], "condition": condition } data = self.client.cc.search_business(kwargs) result = {"result": False, "message": "nothing", "data": []} if data.get("result", False): result['result'] = data['result'] result['data'] = data['data']['info'] else: logger.error("查询业务失败:%s" % data['message']) result['message'] = data['message'] return result
def change_state_by_signal(node_id, to_state, root_id): task_info = get_task_record(root_id) if not task_info: logger.error("后台记录中任务参数为空") return None # 节点执行失败 if to_state == 'FAILED': update_record_detail_by_pipeline_id(root_id, {"task_status": 4}) change_state_by_task(to_state, task_info, root_id) # 流程被撤销 elif to_state == 'REVOKED' and node_id == root_id: update_record_detail_by_pipeline_id(root_id, {"task_status": 8}) change_state_by_task(to_state, task_info, root_id) # 流程执行完成 elif to_state == 'FINISHED' and node_id == root_id: update_record_detail_by_pipeline_id(root_id, {"task_status": 3}) change_state_by_task(to_state, task_info, root_id) # 流程启动时 elif to_state == 'RUNNING' and node_id == root_id: update_record_detail_by_pipeline_id(root_id, {"task_status": 2}) change_state_by_task(to_state, task_info, root_id) # 流程暂停时 elif to_state == 'SUSPENDED' and node_id == root_id: update_record_detail_by_pipeline_id(root_id, {"task_status": 5}) change_state_by_task(to_state, task_info, root_id) # 流程还在运行不做处理 else: return None
def change_state_by_task(state, task_info, root_id): task_kwargs = json.loads(task_info['task_kwargs']) db_type = task_info['db_type'] if task_info['task_type'] in [1, 2, 3]: # 属于集群部署任务,根据任务的db组件类型,更新对应后端数据 deploy_task_sync_db(task_kwargs, state, db_type) elif task_info['task_type'] in [4, 6, 10]: # 属于集群节点扩容类任务,根据任务的db组件类型,更新对应后端数据 add_node_task_sync_db(task_kwargs, state, db_type) elif task_info['task_type'] in [8]: # 属于集群磁盘扩容类任务(hdfs专属),根据任务的db组件类型,更新对应后端数据 hadoop_add_dir_task_sync_db(task_kwargs, state) elif task_info['task_type'] in [5, 7, 11]: # 属于集群缩容类任务,根据任务的db组件类型,更新对应后端数据 remove_task_sync_db(task_kwargs, state, db_type) elif task_info['task_type'] in [9]: # 属于集群录入类任务,根据任务的db组件类型,更新对应后端数据 input_task_sync_db(task_kwargs, state, db_type, root_id) else: logger.error("没有匹配到对应的任务类型,无法更新数据:task_info:{}".format( task_info['task_type'])) return None
def reduce_es_node(reduce_info): """ 缩容后删除节点信息 @param reduce_info: { "app_id": 业务id, "cluster_id": 集群id "cluster_name": 集群名称, "version": 版本号, "http_port": es http 集群端口号, "target_ips": 待删除ip, "master_str": 集群master节点信息,已逗号隔开, "bk_username": 任务创建者 , "task_type": 任务类型 } """ try: with transaction.atomic(): # 更新节点数量 for reduce_ip in reduce_info['target_ips']: role = EsNodeInfo.objects.get(ip=reduce_ip).role update_data = {f'{role}_cnt': F(f'{role}_cnt') - 1} EsCluster.objects.filter(id=reduce_info['cluster_id']).update( **update_data) EsNodeInfo.objects.filter( ip__in=reduce_info['target_ips']).delete() return True except Exception as err: logger.error(f"ES集群数据更新发生异常{str(err)}") return False
def fast_push_file(self, kwargs): """ 快速分发文件 """ kwargs.update({ "bk_username": self.bk_username, }) result = JobV3Api.fast_transfer_file(kwargs, raw=True) if result["result"]: query_kwargs = { "job_instance_id": result["data"].get("job_instance_id"), "bk_biz_id": kwargs.get("bk_biz_id"), } result = self.get_task_result_status(query_kwargs) logger.info( build_job_exec_log_format(self.bk_username, 'fast_push_file', kwargs['task_name'])) return result else: logger.error( build_job_err_log_format(self.bk_username, 'fast_push_file', kwargs, result)) return None
def async_task(x, y): """ 定义一个 celery 异步任务 """ logger.error(u"celery 定时任务执行成功,执行结果:{:0>2}:{:0>2}".format(x, y)) time.sleep(5) return x + y
def async_task(username='******'): """ 定义一个 celery 异步任务 """ # 执行Job作业,并获取磁盘容量信息写入库 save_capacity_data(username) logger.error("save capacity celery任务执行成功")
def create_topic(self, request, *args, **kwargs): """ /kafka/create_topic 创建kafka topic信息 """ try: post_data = request.data cluster_name = post_data.get('cluster_name') topic = post_data.get('topic') bk_username = request.user.username if create_topic(bk_username, cluster_name, topic): return JsonResponse({ 'result': True, 'code': 0, 'data': [], 'message': 'topic创建成功' }) else: return JsonResponse({ 'result': False, 'code': 1, 'data': [], 'message': 'topic创建失败' }) except Exception as e: logger.error(f'create failed:{e}') return JsonResponse({ 'result': False, 'code': 1, 'data': [], 'message': f'{e}' })
def execute_script(request): biz_id = request.POST.get("biz_id") script_id = request.POST.get("script_id") obj = SelectScript.objects.get(id=script_id) objtest = base64.b64encode(obj.scriptcontent.encode("utf-8")) ip_id = request.POST.getlist("ip_id[]") ips = {"bk_cloud_id": 0, "ip": 0} ip_info = [] for i in ip_id: ips['id'] = i ip_info.append(copy.deepcopy(ips)) kwargs = { "bz_biz_id": biz_id, "script_content": str(objtest, "utf-8"), "account": "root", "script_type": 1, "ip_list": ip_info, } execute_data = client.job.fast_execute_script(kwargs) if execute_data.get("result", False): data = execute_data["data"] result = True message = str(execute_data.get("message")) async_status.apply_async(args=[client, data, biz_id, obj, ip_id], kwargs={}) else: data = [] result = False message = "False" logger.error(u"查询主机列表失败: %s" % execute_data.get("message")) return JsonResponse({"result": result, "message": message, "data": data})
def create_record_detail(self, task_type): """ create_record_detail方法:存储任务记录 @param task_type: 内部定义的任务流程id, 每个id都会对应不同的任务名称,这样方便后端统一管理,并节省数据库存储字段的长度,优化搜索 """ task_info = copy.deepcopy(self.param_info) # 屏蔽 password 等字段,防止录入到数据库 if task_info.get('password'): task_info.pop('password') logger.info(self.param_info) self.task_id = create_record_detail( { "db_type": self.db_type, "app_id": self.param_info['app_id'], "cluster_name": self.param_info['cluster_name'], "task_type": task_type, "task_mode": 1, "pipeline_id": "000", "op_user": self.param_info['bk_username'], "task_params": task_info, } ) if self.task_id == 0: logger.error("task表生成失败,任务结束") return False return True
def fast_execute_script(self, kwargs): """ 快速执行脚本 """ # shell 脚本内容需要base64编码 kwargs.update({ "bk_username": self.bk_username, }) result = JobV3Api.fast_execute_script( { **kwargs, **fast_execute_script_common_kwargs }, raw=True) if result["result"]: query_kwargs = { "job_instance_id": result["data"].get("job_instance_id"), "bk_biz_id": kwargs.get("bk_biz_id"), } result = self.get_task_result_status(query_kwargs) logger.info( build_job_exec_log_format(self.bk_username, 'fast_execute_script', kwargs['task_name'])) return result else: logger.error( build_job_err_log_format(self.bk_username, 'fast_execute_script', kwargs, result)) return None
def search_set(self, bk_biz_id: int, condition={}): """ 查询集群 :param bk_biz_id: :param condition: :return: """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "fields": ["bk_set_name", "bk_set_id"], "condition": condition, } data = self.client.cc.search_set(kwargs) result = {"result": False, "message": "nothing", "data": []} if data.get("result", False): result["result"] = data['result'] result['data'] = data['data']['info'] else: logger.error("获取集群失败 %s" % data['message']) result['message'] = data['message'] return result
def post(self, request): req = req_body_to_json(request) tpl_name = req.get("tpl_name", '') tpl_os = req.get("tpl_os", '') description = req.get("description", '') quotas = req.get("quotas", []) if not all([tpl_name, tpl_os, len(quotas)]): return Request.errorFcun("参数异常", data=[]) tpl_obj = Tpl.objects.filter(tpl_name=tpl_name, is_deleted=0).first() if tpl_obj: return Request.errorFcun(msg="添加失败 名称重复", data=[]) tpl = Tpl(tpl_name=tpl_name, tpl_os=tpl_os, description=description, author=request.user.username) # 批量插入 if tpl: try: # 下面的代码在一个事务中执行,一但出现异常,整个with函数内部的数据库操作都会回滚 with transaction.atomic(): tpl.save() # 准备批量插入数据 tpl_quota_list_to_insert = list() for quota in quotas: tpl_quota_list_to_insert.append( CheckSystemTplQuta(tpl_id=tpl.id, quota_id=quota['id'], quota_threshold=str( quota['quota_threshold']))) CheckSystemTplQuta.objects.bulk_create( tpl_quota_list_to_insert) field_names = Log.get_model_field(Tpl) field_names['quotas'] = "巡检指标" req['author'] = request.user.username req['tpl_os'] = CheckSystemOs.objects.get( pk=req['tpl_os']).os_name req['quotas'] = tpl_quotas(quotas) Log.operation_log(request, table_name=Tpl._meta.verbose_name, update_fields=req, operation_module_name="模板管理", field_names=field_names) data = req_body_to_json(request) data['tpl_id'] = tpl.id except: logger.error( f"新增模板失败 接口名称({request.path}) 请求参数({req_body_to_json(request)})" ) return Request.succFcun(msg="添加成功", data=data) else: return Request.errorFcun(msg="添加失败", data=[])
def get_usernames(): res = client.bk_login.get_all_users() usernames = [] if res.get('result', False): for i in res['data']: usernames.append(i['bk_username']) else: logger.error(u'查询所有用户列表失败: %s' % res.get('message')) return usernames
def check_param(self): """ 检测传入param_info 参数是否合法 """ if not self.param_info or not isinstance(self.param_info, dict): logger.error(f"任务传入参数不规范 param_info:{self.param_info}") return False return True
def get_username(): res = client.bk_login.get_all_users() usernames = [] if res.get('result', False): for i in res['data']: usernames.append(i["bk_username"]) else: logger.error(u"查询的所有用户列表失败: %s" % res.get("message")) return usernames
def get_job_instance_log(self, bk_biz_id: int, job_instance_id: int): """ 查询作业执行日志 :param bk_biz_id: :param job_instance_id: :return: """ kwargs = { "bk_app_code": app_code, "bk_app_secret": app_secret, "bk_token": self.bk_token, "bk_biz_id": bk_biz_id, "job_instance_id": job_instance_id, } data = self.client.job.get_job_instance_log(kwargs) result = { 'result': False, 'message': 'Nothing', 'job_instance_id': 0, "logs": [] } if data.get('result', False): result['result'] = data['result'] log_dict = {} for scrtip in data['data']: for step in scrtip['step_results']: for ip_log in step["ip_logs"]: if ip_log['ip'] in log_dict: log_dict[ip_log['ip']].append({ "start_time": ip_log['start_time'], "log_content": ip_log['log_content'], "end_time": ip_log['end_time'], }) else: log_dict[ip_log['ip']] = [{ "start_time": make_time(ip_log['start_time']), "log_content": ip_log['log_content'], "end_time": make_time(ip_log['end_time']), }] for key, value in log_dict.items(): result['logs'].append({ "ip": key, "log_content": value, }) else: logger.error(u'获取作业执行日志失败:%s' % result.get('message')) result['message'] = data['message'] return result
def get_time(): """ celery 周期任务示例 run_every=crontab(minute='*/5', hour='*', day_of_week="*"):每 5 分钟执行一次任务 periodic_task:程序运行时自动触发周期任务 """ execute_task() now = datetime.datetime.now() logger.error(u"celery 周期任务调用成功,当前时间:{}".format(now))
def task_resume(self): """ 手动启动已暂停的pipeline任务 """ resume_info = api.resume_pipeline(runtime=BambooDjangoRuntime(), pipeline_id=self.pipeline_id) if not resume_info.result: logger.error("重新启动任务失败,错误信息:{},pipeline_id :{}".format( resume_info.exc, self.pipeline_id)) return resume_info.result
def task_pause(self): """ 手动暂停正在运行的pipeline任务 """ pause_info = api.pause_pipeline(runtime=BambooDjangoRuntime(), pipeline_id=self.pipeline_id) if not pause_info.result: logger.error("暂停任务失败,错误信息:{},pipeline_id :{}".format( pause_info.exc, self.pipeline_id)) return pause_info.result