def post(request): logger.debug("POST: {}".format(request.body)) result = RequestValidator.check_params( request, check_empty=True, check_params=["tokenName", "tokenContent", "status"] ) if result.has_error: logger.error("error: {}".format(result.error_message)) return JsonResponse({"code": 1004, "message": result.error_message}) args = result.params token_name = args.get("tokenName", None) token = args.get("tokenContent", None) status = args.get("status", None) obj = GeyeTokenModel.objects.create( token_name=token_name, token=token, status=status, remain_limit=99999 ) if obj: return JsonResponse({"code": 1001, "message": "添加成功!", "data": { "id": obj.id, "tokenName": obj.token_name, "tokenContent": obj.token, "status": obj.status, "remainLimit": obj.remain_limit, }}) else: return JsonResponse({"code": 1002, "message": "添加失败!"})
def post(request): logger.debug("POST: {}".format(request.body)) # 检查参数是否为空 result = RequestValidator.check_params(request, check_empty=True, check_params=[ "id", "name", "ruleType", "ruleEngine", "ruleContent", "status", "action", "position", "priority" ]) if result.has_error: logger.error("error: {}".format(result.error_message)) return JsonResponse({ "code": 1004, "message": result.error_message }) params = result.params # 检查filter rule id是否存在 if not GeyeFilterRuleModel.instance.is_exist(params.get("id", None)): return JsonResponse({"code": 1003, "message": "规则ID不存在!"}) name = params.get("name") if not name: return JsonResponse({"code": 1003, "message": "规则名称有误!"}) rule_type = CommonConvert.ensure_int(params.get("ruleType", 1)) if rule_type not in (1, 2): return JsonResponse({"code": 1005, "message": "ruleType有误!"}) rule_engine = CommonConvert.ensure_int(params.get("ruleEngine", 1)) if rule_engine not in (1, 2): return JsonResponse({"code": 1006, "message": "ruleEngine有误!"}) # rule_content = params.get("ruleContent", "") status = CommonConvert.ensure_int(params.get("status", 1)) if status not in (1, 0): return JsonResponse({"code": 1007, "message": "status有误!"}) # action action = CommonConvert.ensure_int(params.get("action", 1)) if action not in range(1, 6): return JsonResponse({"code": 1007, "message": "action有误!"}) # position position = CommonConvert.ensure_int(params.get("position", 1)) if position not in range(1, 6): return JsonResponse({"code": 1008, "message": "position有误!"}) # priority priority = CommonConvert.ensure_int(params.get("priority", 5)) if priority not in range(0, 11): return JsonResponse({"code": 1009, "message": "priority有误!"}) if GeyeFilterRuleModel.instance.update_filter_rule(params): return JsonResponse({"code": 1001, "message": "更新成功!"}) else: return JsonResponse({"code": 1002, "message": "更新失败!"})
def post(request): logger.debug("POST: {}".format(request.body)) result = RequestValidator.check_params( request, check_empty=True, check_params=["id", "tokenName", "tokenContent", "status"] ) if result.has_error: logger.error("error: {}".format(result.error_message)) return JsonResponse({"code": 1004, "message": result.error_message}) args = result.params token_id = args.get("id", None) if not token_id or not GeyeTokenModel.instance.is_exist(token_id): return JsonResponse({"code": 1004, "message": "token id不存在!"}) obj = GeyeTokenModel.instance.update_token(args) if obj: return JsonResponse({"code": 1001, "message": "更新成功!", "data": { "id": obj.id, "tokenName": obj.token_name, "tokenContent": mask_token(obj.token), "status": obj.status, "remainLimit": obj.remain_limit, }}) else: return JsonResponse({"code": 1002, "message": "更新失败!"})
def get(request): logger.debug("COOKIES: {}".format(request.COOKIES)) # csrf_token = "22222" csrf_token = django.middleware.csrf.get_token(request) response = HttpResponse(csrf_token) # response.set_cookie("csrftoken", csrf_token, domain="192.168.62.129", samesite=None) return response
def _request_page(self, request_header, request_data) -> Optional[requests.Response]: """ 请求每一页搜索结果 :param request_header: :param request_data: :return: """ logger.debug("request_data: {} || request_header: {}".format( request_data, request_header)) api_limit_cnt = 0 token_id = request_header["token_id"] header = request_header["header"] while self.is_running(): # make_request会循环请求5次,如果超过该次数还请求失败,则会返回None response: Optional[requests.Response] = self.make_request( header, request_data) # 请求超过最大次数、收到结束signal等情况,直接返回None if response is None: return None # 收到了正常的response,解析status_code status_code = response.status_code logger.debug("status_code: {} || response header: {}".format( response.status_code, response.headers)) if status_code == 401: # token有问题,这个情况下不需要再次请求了,直接返回None logger.error( "401 - Bad credentials, see: https://developer.github.com/v3" ) GeyeTokenModel.instance.filter( is_deleted=0, pk=token_id).update(remain_limit=-1) return None elif status_code == 403: # 触发了频率限制,这个时候需要wait 60s后再次请求 # 限制重试5次,如果都请求失败了,直接返回None GeyeTokenModel.instance.filter( is_deleted=0, pk=token_id).update(remain_limit=0) api_limit_cnt += 1 if api_limit_cnt >= 5: return None logger.error( "403 - API rate limit exceeded. Wait 60s and will retry..." ) self.ev.wait(60) continue else: # 正常情况,返回response token_remain_cnt = int( response.headers.get("X-RateLimit-Remaining", 0)) GeyeTokenModel.instance.filter( is_deleted=0, pk=token_id).update(remain_limit=token_remain_cnt) return response
def post(request): logger.debug("POST: {}".format(request.body)) result = RequestValidator.check_params(request, check_empty=True, check_params=[ "id", "ruleName", "ruleContent", "status", "needNotification", "clone", "delay", "priority" ]) if result.has_error: em = result.error_message logger.error("error_message: {}".format(em)) return JsonResponse({"code": 1004, "message": em}) request_data = result.params # 检查ID是否存在 srid = request_data.get("id", None) if not srid: return JsonResponse({"code": 1004, "message": "规则ID有误!"}) if not GeyeSearchRuleModel.instance.is_exist_by_pk(srid): return JsonResponse({"code": 1003, "message": "规则ID不存在!"}) rule_name = request_data.get("ruleName") rule_content = request_data.get("ruleContent") status = request_data.get("status") delay: str = request_data.get("delay") priority: str = request_data.get("priority") if isinstance(priority, str) and not priority.isdigit(): return JsonResponse({"code": 1003, "message": "优先级有误!"}) if isinstance(delay, str) and not delay.isdigit(): return JsonResponse({"code": 1003, "message": "刷新间隔有误!"}) # str -> int delay = int(delay) if isinstance(delay, str) else delay priority = int(priority) if isinstance(priority, str) else priority need_notification = 0 clone = 0 # update db obj = GeyeSearchRuleModel.instance.filter( is_deleted=0, id=srid).update(name=rule_name, rule=rule_content, status=status, priority=priority, delay=delay, need_notification=need_notification, clone=clone) if obj: return JsonResponse({"code": 1001, "message": "更新规则成功!"}) else: return JsonResponse({"code": 1002, "message": "更新规则失败!"})
def post(request): # frid = request.POST.get("id", None) frid = json.loads(request.body).get("id", None) logger.debug("frid: {}".format(frid)) if not frid or not GeyeFilterRuleModel.instance.is_exist_global(frid): return JsonResponse({"code": 1004, "message": "规则ID不存在!"}) if GeyeFilterRuleModel.instance.fake_delete_global(frid): return JsonResponse({"code": 1001, "message": "删除成功!"}) else: return JsonResponse({"code": 1002, "message": "删除失败!"})
def post(request): srid = json.loads(request.body).get("id", None) logger.debug("srid: {}".format(srid)) # logger.debug("request body: {}".format(json.loads(request.body))) if not srid: return JsonResponse({"code": 1004, "message": "规则id有误!"}) if not GeyeSearchRuleModel.instance.is_exist_by_pk(srid): return JsonResponse({"code": 1003, "message": "规则id不存在!"}) if not GeyeSearchRuleModel.instance.fake_delete(pk=srid): return JsonResponse({"code": 1002, "message": "删除失败!"}) else: return JsonResponse({"code": 1001, "message": "删除成功!"})
def _worker(self): logger.info("RefreshEngine start!") refresh_task_queue = self.app_ctx.MessageQueues.SEARCH_TASK_QUEUE while self.status == self.EngineStatus.RUNNING: logger.debug("start build search task.") rows = GeyeSearchRuleModel.objects.filter(is_deleted=0, status=1).all() current_time = datetime.datetime.now() for row in rows: delay = int(row.delay) if row.last_refresh_time + datetime.timedelta( minutes=delay) < current_time: # 该刷新了,添加到任务队列中去 # 添加一个字典,如果后续改成分布式,需要改成JSON字符串 # Task格式: # tuple(priority, _task) # build task _data = { "search_rule_id": row.id, "search_rule_name": row.name, "search_rule_content": row.rule, } # task = (row.priority, _data) task = PriorityTask(row.priority, _data) logger.debug("task: {}".format(task)) while True: try: refresh_task_queue.put_nowait(task) break except queue.Full: logger.warning("SearchTask队列已满,等待3秒后重试") self.ev.wait(3) continue # 更新任务的最后刷新时间 row.last_refresh_time = current_time row.save() self.ev.wait(settings.REFRESH_INTERVAL) logger.info("RefreshEngine end!")
def post(request): logger.debug(f"POST data: {request.body}") # 校验参数 validator = RequestValidator() result = validator.check_params(request, check_params=[ "taskType", "eventType", "interval", "priority", "ruleContent", "status" ], check_empty=True) if result.has_error: return JsonResponse({ "code": 1004, "message": result.error_message }) # 校验参数 params = result.params task_type = params.get("taskType") event_type = params.get("eventType") if task_type not in MonitorTaskTypeConstant.lst(): return JsonResponse({"code": 1003, "message": "taskType有误!"}) for _post_event_type in event_type: if _post_event_type not in MonitorEventTypeConstant.lst(): return JsonResponse({"code": 1003, "message": "eventType有误!"}) # 更新数据 with transaction.atomic(): obj: GeyeMonitorRules = GeyeMonitorRules.instance.select_for_update(). \ filter(is_deleted=False, pk=params.get("id")).first() if not obj: return JsonResponse({"code": 1003, "message": "规则不存在!"}) obj.task_type = task_type obj.event_type = ",".join(event_type) obj.rule_content = params.get("ruleContent") obj.status = params.get("status") obj.interval = params.get("interval") obj.priority = params.get("priority") obj.save() return JsonResponse({"code": 1001, "message": "更新成功!"})
def get(request): rows = GeyeFilterRuleModel.instance.all_global_filter_rule() logger.debug("rows: {}".format(rows)) data = [] for row in rows: data.append({ "id": row.id, "name": row.name, "ruleType": row.rule_type, "ruleEngine": row.rule_engine, "ruleContent": row.rule, "status": row.status, "parentId": row.parent_id, "action": row.action, "position": row.position, "priority": row.priority, }) return JsonResponse({"code": 1001, "message": "获取成功!", "data": data})
def get(request): srid = request.GET.get("id", None) rule_name = request.GET.get("rule_name", None) logger.debug("srid: {}, rule_name: {}".format(srid, rule_name)) if not srid and not rule_name: return JsonResponse({"code": 1004, "message": "id和rule_name均有误"}) search_rule_obj = GeyeSearchRuleModel.instance.get_detail( pk=srid, rule_name=rule_name) if not search_rule_obj: return JsonResponse({"code": 1003, "message": "规则不存在!"}) # filter_rule_obj = GeyeFilterRuleModel.instance.get_filter_rules_by_srid(srid, contains_global_rule=False) filter_rule_obj = GeyeFilterRuleModel.instance.filter( is_deleted=0, parent_id=srid).order_by("-priority").all() rv = { "search_rule": { "ruleName": search_rule_obj.name, "ruleContent": search_rule_obj.rule, "status": search_rule_obj.status, "priority": search_rule_obj.priority, "delay": search_rule_obj.delay, "needNotification": int(search_rule_obj.need_notification), "clone": int(search_rule_obj.clone), }, "filter_rule": [{ "id": fr.id, "name": fr.name, "ruleType": fr.rule_type, "ruleEngine": fr.rule_engine, "ruleContent": fr.rule, "status": fr.status, "parentId": fr.parent_id, "action": fr.action, "position": fr.position, "priority": fr.priority } for fr in filter_rule_obj], } return JsonResponse({"code": 1001, "message": "success", "data": rv})
def _real_worker(self): while self.is_running(): priority, task = self._get_task() if not priority or not task: continue # 解析任务内容 # "rule_id": row.id, # "rule_content": row.rule, rule_content = task.get("rule_content") rule_id = task.get("rule_id") if not rule_content: continue # 构建请求的参数 # 有几个参数是必须的 # p 是页码,q是搜索词 params = { "tab": "public", "scope": "/", "type": "content", "q": self.__encode_keyword(rule_content) } # 默认搜索 3 页 results = [] for p in range(1, 4): params["p"] = p # 从 db 里取出 yuque.com 域名的 cookie 信息 # 先写死 "yuque.com" header = { "Cookie": self._get_cookie_header() } logger.debug("yuque header: {}".format(header)) response = self.make_request(self.SEARCH_API, header, params) parsed_result = self.parse_response(response) results.extend(parsed_result) # 放进队列中 self._put_task(PriorityTask(priority, {"rule_id": rule_id, "results": results}))
def _worker(self): logger.debug("{} start.".format(self.name)) while self.is_running(): _, task = self._get_task() if not task: continue rule_id = task.get("rule_id") result_list = task.get("results") for result in result_list: leak = GeyeYuqueLeaksModel() leak.title = result.get("title") leak.go_url = result.get("url") leak.url = result.get("raw_url") leak.book_name = result.get("book_name") leak.group_name = result.get("group_name") leak.abstract = result.get("abstract") leak.search_rule_obj = result.get("") leak.search_rule_id = rule_id leak.status = 1 leak.content_updated_at = task.get("content_updated_at") leak.first_published_at = task.get("first_published_at") leak.paper_created_at = task.get("created_at") leak.paper_updated_at = task.get("updated_at") leak.save() logger.debug("Save yuque leak <<{}>>".format( result.get("title"))) logger.debug("{} end.".format(self.name))
def _worker(self): logger.info("{name} start!".format(name=self.name)) while self.__running(): logger.debug("start build monitor task.") rows: List[GeyeMonitorRules] = GeyeMonitorRules.instance.get_all() current_time = datetime.datetime.now() for _row in rows: interval = _row.interval if _row.last_fetch_time + datetime.timedelta( minutes=interval) < current_time: task = PriorityTask( _row.priority, { "task_type": _row.task_type, "event_type": _row.event_type, "rule_content": _row.rule_content, "rule_id": _row.id, }) logger.debug( "Create monitor task: {task}".format(task=task)) while self.__running(): try: self._monitor_task_queue.put_nowait(task) break except queue.Full: self.ev.wait(3) continue # 更新rule的最后刷新时间 _row.last_fetch_time = current_time _row.save() self.ev.wait(30) logger.info("{name} stop!".format(name=self.name))
def make_request(self, header, data) -> Optional[requests.Response]: """ 发出搜索请求 :param header: 请求的header,包括token等信息 :param data: 搜索的内容 """ # 获取代理设置信息 proxies = random.choice(self.all_proxies) if self.use_proxies else None # 请求计数 # todo:先写死到代码里,计划移植到配置中 request_cnt = 0 while self.is_running(): try: request_cnt += 1 if request_cnt == 5: logger.warning("请求超出最大次数!") break logger.debug("before requests.get()") response = requests.get(self.search_api_url, params=data, headers=header, timeout=12, proxies=proxies) logger.debug("after requests.get()") return response except requests.RequestException as e: logger.error( "Error while make request. requests.RequestException: {}". format(e)) logger.error("Try re-request after 5s.") self.ev.wait(5) continue return None
def _worker(self): current_name = threading.current_thread().name logger.info("{} start!".format(current_name)) while self.is_running(): # 获取任务信息,没有取到就继续循环 task_priority, search_task = self.get_task_from_queue() if not task_priority or not search_task: continue # 解析数据内容 srid = search_task.get("search_rule_id") rule_name = search_task.get("search_rule_name") rule_content = search_task.get("search_rule_content") logger.debug("parse task data done.") # 循环请求每一页 for page_num in range(1, self.search_page_max_size + 1): # 构建请求数据 request_data = self.build_request_data(rule_content, page_num) request_header = self.build_request_header() if request_header is None: logger.error( "No available token found. Jumping search operator.") break # 发起请求,如果response为None,说明收到了结束信号,直接break response = self._request_page(request_header, request_data) if response is None: break logger.debug("response.text: {}".format(response.text)) # logger.debug("response header: {}".format(response.headers)) # 正常内容 开始解析内容 # return_val = { # "filter_tasks": [], # "has_next_page": True, # "error": None # } results = self.parse_response(response, srid, rule_name) if results["error"]: # 解析有问题,这里是否需要重新请求当前页? continue # 将生成的filter_task放入filter队列 for task in results["filter_tasks"]: self.push_to_queue(task_priority, task) # 根据has_next_page字段决定是否请求下一页 if not results["has_next_page"]: logger.debug( "Jump remains page because of 'has_next_page' is False." ) break logger.info("{} end!".format(current_name))
def put_task_to_queue(self, task, target_queue: queue.PriorityQueue = None): """ 把任务放到队列中去 :param task: 待处理的任务 :param target_queue: 待放入的队列 :return: """ if not target_queue: target_queue = self.filter_task_queue while self.status == self.EngineStatus.RUNNING: try: target_queue.put_nowait(PriorityTask(task[0], task[1])) break except queue.Full: # get queue name q_name = "unknown" for k, v in self.__dict__.items(): if v is target_queue: q_name = k logger.debug("{q_name}已满,1秒后重试.".format(q_name=q_name)) self.ev.wait(1) continue
def post(request): logger.debug(f"POST data: {request.body}") # 校验参数 validator = RequestValidator() result = validator.check_params(request, check_params=[ "taskType", "eventType", "interval", "priority", "ruleContent", "status" ], check_empty=True) if result.has_error: return JsonResponse({ "code": 1004, "message": result.error_message }) # 校验参数 params = result.params # logger.debug(f"params: {params}") task_type = params.get("taskType") event_type = params.get("eventType") logger.debug(f"TaskTypeConstantList: {MonitorTaskTypeConstant.lst()}") logger.debug( f"EventTypeConstantList: {MonitorEventTypeConstant.lst()}") if task_type not in MonitorTaskTypeConstant.lst(): return JsonResponse({"code": 1003, "message": "taskType有误!"}) for _post_event_type in event_type: if _post_event_type not in MonitorEventTypeConstant.lst(): return JsonResponse({"code": 1003, "message": "eventType有误!"}) # 插入数据 obj = GeyeMonitorRules.instance.create( task_type=task_type, event_type=",".join(event_type), rule_content=params.get("ruleContent"), status=params.get("status"), interval=params.get("interval"), priority=params.get("priority")) if obj: return JsonResponse({ "code": 1001, "message": "添加成功", "data": obj.convert_to_dict() }) else: return JsonResponse({"code": 1002, "message": "添加失败"})
def _worker(self): current_name = threading.current_thread().name logger.info("{} start!".format(current_name)) while self.status == self.EngineStatus.RUNNING: # task_priority其实就是search rule中指定的优先级 task_priority, task = self.get_task_from_queue() if not task or not task_priority: continue # 预先过滤一次hash值,如果已经泄露的表中存在这样的hash,跳过后续的检查 # 可能会有漏报 # 某文件已经命中规则A,存入表中 # 当匹配规则B时,会导致跳过匹配该文件 # result = self.check_hash(task) # 获取所有需要filter的规则,先全局filter,再子filter all_filter_rules: List[ GeyeFilterRuleModel] = self.get_filter_rules(task["srid"]) logger.debug("Get all filter rules: {}".format(all_filter_rules)) # 获取完整的代码 response_result = self.get_raw_code(task["full_code_url"]) if not response_result["success"]: # 失败了,把任务重新放回队列 # 这里可能导致worker卡死 # self.put_task_to_queue(target_queue=self.filter_task_queue, task=(task_priority, task)) # logger.debug("Re-put done. continue.") logger.error( "获取raw code失败,URL:{url}".format(url=task["full_code_url"])) continue raw_code = response_result["code"] # 按照规则开始匹配 logger.debug("#### [start] SEARCH RULE: {}".format( task["search_rule_name"])) logger.debug("#### Content URL: {}".format(task["full_code_url"])) for _rule in all_filter_rules: logger.debug("==== filter rule: {}, content: {}".format( _rule, _rule.rule)) result = self.do_filter(_rule, task, raw_code) # 匹配过程中有错误,直接终止匹配 if not result or result["error"]: break # 根据规则的正向/反向,获取是否命中 # hit变量表示是否命中规则 if _rule.rule_type == 1: # 正向匹配,匹配到算命中 hit = True if result["found"] else False elif _rule.rule_type == 2: # 反向匹配,没有匹配到算命中 hit = True if not result["found"] else False else: logger.error("Error rule_type: {}".format(_rule.rule_type)) break logger.debug("filter end. hit result: %s", hit) # 根据匹配结果,决定是向下匹配还是存起来 if hit: _action = _rule.action # 1-啥也不做,继续下一条匹配,不保存,可以用于其他规则的前置 # 2-设为误报,结束匹配,不保存,可以排除掉一定不是敏感信息泄露的内容 # 3-设为误报,结束匹配,保存,可以排除掉一定不是敏感信息泄露的内容 # 4-设为确认,结束匹配,保存,确定规则 # 5-设为待确认,结束匹配,保存 if _action == 1: logger.debug("Action: None -> continue next.") continue elif _action == 2: logger.debug( "Action: Ignore -> no save -> end filter.") break elif _action == 3: logger.debug("Action: Ignore -> save -> end filter.") save_task = (task_priority, { "code": result["code"], "status": LeaksStatusConstant.IGNORE, "pushed": 0, "frid": _rule.id, "filter_task": task, "filter_rule_name": _rule.name }) self.put_task_to_queue( save_task, target_queue=self.save_task_queue) break elif _action == 4: logger.debug("Action: Confirm -> save -> end filter.") save_task = (task_priority, { "code": result["code"], "status": LeaksStatusConstant.CONFIRM, "pushed": 0, "frid": _rule.id, "filter_task": task, "filter_rule_name": _rule.name }) self.put_task_to_queue( save_task, target_queue=self.save_task_queue) break elif _action == 5: logger.debug( "Action: To-be-confirmed -> save -> end filter.") save_task = (task_priority, { "code": result["code"], "status": LeaksStatusConstant.TO_BE_CONFIRMED, "pushed": 0, "frid": _rule.id, "filter_task": task, "filter_rule_name": _rule.name }) self.put_task_to_queue( save_task, target_queue=self.save_task_queue) break else: logger.error( "Unknown action value: {}".format(_action)) else: logger.debug("no hit, continue filter next rule.") continue logger.debug("#### [end] SEARCH RULE: {}".format( task["search_rule_name"])) logger.info("{} end!".format(current_name))
def parse_response(response): """ 解析 yuque 接口返回的 response :param response: :return: """ result = response.json() data = result.get("data") total_hits = data.get("totalHits") num_hits = data.get("numHits") # 打个log看看 logger.debug("total_hits: {}, num_hits".format(total_hits, num_hits)) # 这里面是命中的信息 hits_list = data.get("hits") ret_list = [] for hits in hits_list: abstract = hits.get("abstract", "NO_ABSTRACT_FIELD") book_name = hits.get("book_name", "NO_BOOK_NAME_FIELD") group_name = hits.get("group_name", "NO_GROUP_NAME_FIELD") paper_id = hits.get("id", "NO_ID_FIELD") url = hits.get("url", "NO_URL_FIELD") title = hits.get("title", "NO_TITLE_FIELD") record = hits.get("record", None) if record: content_updated_at = record.get("content_updated_at", "") first_published_at = record.get("first_published_at", "") published_at = record.get("published_at", "") created_at = record.get("created_at", "") updated_at = record.get("updated_at", "") else: content_updated_at = "" first_published_at = "" published_at = "" created_at = "" updated_at = "" # 如果开启了获取真实链接的配置,那么再请求一次获取真实的文章URL # TODO 现在默认获取,以后把这个配置项移动到配置文件里 paper_full_url = "https://yuque.com{}".format(url) redirect_path = requests.get(paper_full_url, timeout=9).history[-1].headers.get("location") paper_raw_url = "https://yuque.com{}".format(redirect_path) ret_list.append({ "abstract": abstract, "book_name": book_name, "group_name": group_name, "id": paper_id, "title": title, "url": url, "raw_url": paper_raw_url, "content_updated_at": content_updated_at, "first_published_at": first_published_at, "published_at": published_at, "created_at": created_at, "updated_at": updated_at, }) return ret_list
def post(request): logger.debug("POST: {}".format(request.body)) # 检查参数是否为空 result = RequestValidator.check_params(request, check_empty=True, check_params=[ "name", "ruleType", "ruleEngine", "ruleContent", "status", "action", "position", "priority" ]) if result.has_error: logger.error("error: {}".format(result.error_message)) return JsonResponse({ "code": 1004, "message": result.error_message }) params = result.params name = params.get("name") if not name: return JsonResponse({"code": 1003, "message": "规则名称有误!"}) rule_type = CommonConvert.ensure_int(params.get("ruleType", 1)) if rule_type not in (1, 2): return JsonResponse({"code": 1005, "message": "ruleType有误!"}) rule_engine = CommonConvert.ensure_int(params.get("ruleEngine", 1)) if rule_engine not in (1, 2): return JsonResponse({"code": 1006, "message": "ruleEngine有误!"}) rule_content = params.get("ruleContent", "") if not rule_content: return JsonResponse({"code": 1004, "message": "ruleContent不能为空"}) status = CommonConvert.ensure_int(params.get("status", 1)) if status not in (1, 0): return JsonResponse({"code": 1007, "message": "status有误!"}) # action action = CommonConvert.ensure_int(params.get("action", 1)) if action not in range(1, 6): return JsonResponse({"code": 1007, "message": "action有误!"}) # position position = CommonConvert.ensure_int(params.get("position", 1)) if position not in range(1, 6): return JsonResponse({"code": 1008, "message": "position有误!"}) # priority priority = CommonConvert.ensure_int(params.get("priority", 5)) if priority not in range(0, 11): return JsonResponse({"code": 1009, "message": "priority有误!"}) obj = GeyeFilterRuleModel.instance.create(name=name, rule_type=rule_type, rule_engine=rule_engine, rule=rule_content, status=status, parent_id=0, action=action, position=position, priority=priority) if obj: return JsonResponse({ "code": 1001, "message": "添加成功!", "data": { "id": obj.id, "name": obj.name, "ruleType": obj.rule_type, "ruleEngine": obj.rule_engine, "ruleContent": obj.rule, "status": obj.status, "parentId": obj.parent_id, "action": obj.action, "position": obj.position, "priority": obj.priority } }) else: return JsonResponse({"code": 1002, "message": "添加失败!"})
def post(request: HttpRequest): logger.debug("POST: {}".format(request.body)) r_json = {"code": 1001, "message": "", "data": ""} # 简单的检查参数是否为空 result = RequestValidator.check_params(request, [ "ruleName", "ruleContent", "status", "defaultFilter", "delay", "priority", "notification", "clone" ], check_empty=True) logger.debug("check result: {}".format(result)) if result.has_error: r_json["code"] = 1004 r_json["message"] = result.error_message logger.error("error_message: {}".format(result.error_message)) return JsonResponse(r_json) request_data = result.params rule_name = request_data.get("ruleName") rule_content = request_data.get("ruleContent") # 检查rule name是否存在 if GeyeSearchRuleModel.instance.is_exist(rule_name): r_json["code"] = 1002 r_json["message"] = "规则名称已存在!" return JsonResponse(r_json) status = request_data.get("status", 0) default_filter = request_data.get("defaultFilter", 1) default_filter = int(default_filter) delay: str = request_data.get("delay", "30") priority: str = request_data.get("priority", "5") # 检查优先级和delay if isinstance(priority, str) and not priority.isdigit(): r_json["code"] = 1003 r_json["message"] = "非法的优先级!" return JsonResponse(r_json) if isinstance(delay, str) and not delay.isdigit(): r_json["code"] = 1003 r_json["message"] = "非法的搜索间隔时间!" # 通知 和 auto-clone功能暂不开启 notification = 0 clone = 0 # 插入到数据库中 obj = GeyeSearchRuleModel.instance.create( name=rule_name, rule=rule_content, status=status, priority=priority, last_refresh_time=None, delay=delay, need_notification=notification, clone=clone) # 如果default filter 为 true,则插入默认规则 if default_filter: # 默认filter为: # 如果没有匹配到搜索的关键词,则结束匹配 GeyeFilterRuleModel.instance.create(name="DefaultFilter", rule_type=2, rule_engine=2, rule=rule_content, status=1, parent_id=obj.id, action=2, position=4, priority=10) r_json["code"] = 1001 r_json["message"] = "创建成功!" r_json["data"] = obj.id return JsonResponse(r_json)
def regex_filter(rule_content, filter_content, frid) -> dict: # 每次匹配的时候都获取一下,这样可以做到热切换 regex_engine = settings.REGEX_ENGINE # 返回值 filter_result = {"error": False, "found": False, "code": ""} if settings.REGEX_ENGINE == "inner": # inner engine logger.debug("Use 'inner' regex engine.") result_queue = multiprocessing.Queue() p = Process(target=RuleEngine._regex_inner_engine, args=( rule_content, filter_content, result_queue, )) p.start() # 等待60秒来进行正则匹配 p.join(60) if p.is_alive(): logger.error( "[INNER REGEX] filter timeout! frid: {}".format(frid)) p.terminate() p.join() # 主动释放queue,防止内存泄露 del result_queue filter_result["error"] = True return filter_result # 获取queue中的数据 try: _result = result_queue.get_nowait() filter_result["found"] = _result["found"] filter_result["code"] = _result["code"] return filter_result except queue.Empty: # 进程结束了,但是没获取到东西 logger.error( "Empty result get from queue! frid: {}".format(frid)) filter_result["error"] = True return filter_result # try: # p.join(60) # _result = result_queue.get_nowait() # filter_result["found"] = _result["found"] # filter_result["code"] = _result["code"] # return filter_result # except multiprocessing.TimeoutError: # # 进程超时 # logger.error("[INNER REGEX] filter timeout! frid: {}".format(frid)) # p.terminate() # filter_result["error"] = True # return filter_result # except queue.Empty: # # 线程结束了,但是没获取到东西 # logger.error("Empty result get from queue! frid: {}".format(frid)) # filter_result["error"] = True # return filter_result elif settings.REGEX_ENGINE == "grep": # grep engine rule = shlex.quote(rule_content) content = shlex.quote(filter_content) _result = RuleEngine._regex_grep_engine(rule, content) filter_result["error"] = _result["error"] filter_result["found"] = _result["found"] filter_result["code"] = _result["code"] return filter_result else: logger.error("Un-support regex-engine '{}' !".format(regex_engine)) return filter_result
def _worker(self): logger.info("{name} start!".format(name=self.name)) while self.__is_running(): task_priority, task = self.__get_task() if task_priority is None or task is None: self.__wait(1) continue # 解析task中的数据 # { # "task_type": _row.task_type, # 可选值来自 MonitorTaskTypeConstant,监控的维度 # "event_type": _row.event_type, # 可选值来自MonitorEventTypeConstant,监控的事件类型,多个值用逗号分隔 # "rule_content": _row.rule_content, # 根据task_type有不同含义 # "rule_id": _row.id, # } logger.debug("get task: {}".format(task)) task_type = task.get("task_type", None) event_type: str = task.get("event_type", None) rule_content = task.get("rule_content", None) monitor_rule_id = task.get("rule_id", None) if not task_type or not event_type or not rule_content or not monitor_rule_id: self.__wait(1) continue # 根据task_type 获取不同的API接口 api_url = MonitorAPIUrl.get(task_type, None) if not api_url: logger.error("task_type有误,无法获取API!") continue api_url = api_url.format(**json.loads(rule_content)) # 请求API获取数据 results = self.__fetch_api(api_url) if not results["success"]: logger.error( "Fetch API failed! {err}".format(err=results["reason"])) continue logger.debug("results: {}".format(results)) # 从API的返回中parse对应的时间内容,event_type可以为多个事件,返回格式如下 # ret_val = { # "success": False, # "message": "Unknown Error", # "data": [], # typing: List[Dict] # } parse_result = EventParser.parse(event_type.split(","), results["data"]) if not parse_result.get("success"): logger.error(parse_result.get("message")) continue else: # 把数据扔到队列里去,把event存起来 self.__put_task( task_priority, { "data": parse_result.get("data"), "monitor_rule_id": monitor_rule_id, }) logger.info("{name} stop!".format(name=self.name))