def submit_task(task_data): conn('task').insert_one(task_data) task_id = str(task_data.pop("_id")) task_data["task_id"] = task_id celerytask.arl_task(options=task_data) return task_data
def post(self): """ 资产组添加 """ args = self.parse_args(add_asset_scope_fields) name = args.pop('name') scope = args.pop('scope') black_scope = args.pop('black_scope') black_scope_array = [] if black_scope: black_scope_array = re.split(r",|\s", black_scope) scope_array = re.split(r",|\s", scope) for x in scope_array: if not utils.is_valid_domain(x): return utils.build_ret(ErrorMsg.DomainInvalid, {"scope": x}) if not scope_array: return utils.build_ret(ErrorMsg.DomainInvalid, {"scope": ""}) scope_data = { "name": name, "scope": scope, "scope_array": scope_array, "black_scope": black_scope, "black_scope_array": black_scope_array, } conn('asset_scope').insert(scope_data) scope_id = str(scope_data.pop("_id")) scope_data["scope_id"] = scope_id data = {"message": "success", "data": scope_data, "code": 200} return data
def set_domain_info_list(self): """ 将domain_info_list替换为仅仅包括新增域名 """ self.domain_info_list = [] self.record_map = {} logger.info("start build domain monitor task, new domain {}".format( len(self.new_domain_set))) t1 = time.time() self.task_tag = "task" #标记为正常任务,让build_domain_info 工作 new = self.build_domain_info(self.new_domain_set) new = self.clear_domain_info_by_record(new) self.task_tag = "monitor" elapse = time.time() - t1 logger.info("end build domain monitor task {}, elapse {}".format( len(new), elapse)) #删除前面步骤插入的域名 conn('domain').delete_many({"task_id": self.task_id}) #重新保存新发现的域名 self.save_domain_info_list(new, CollectSource.MONITOR) self.domain_info_list = new
def test_exec_task(self): submit_task(task_data) query = {"task_id": task_data["task_id"]} if task_data["options"]["port_scan"]: self.assertTrue(len(list(conn("site").find(query))) >= 1) self.assertTrue(len(list(conn("ip").find(query))) >= 1)
def add_job(domain, scope_id, options=None, interval=60 * 1, name="", scope_type=AssetScopeType.DOMAIN): logger.info("add {} job {} {} {}".format(scope_type, interval, domain, scope_id)) if options is None: if scope_type == AssetScopeType.DOMAIN: options = domain_monitor_options if scope_type == AssetScopeType.IP: options = ip_monitor_options current_time = int(time.time()) + 30 item = { "domain": domain, "scope_id": scope_id, "interval": interval, "next_run_time": current_time, "next_run_date": utils.time2date(current_time), "last_run_time": 0, "last_run_date": "-", "run_number": 0, "status": SchedulerStatus.RUNNING, "monitor_options": options, "name": name, "scope_type": scope_type } conn('scheduler').insert(item) return str(item["_id"])
def run_forever(): logger.info("start scheduler server ") while True: curr_time = int(time.time()) for item in all_job(): if item.get("status") == SchedulerStatus.STOP: continue if item["next_run_time"] <= curr_time: domain = item["domain"] scope_id = item["scope_id"] options = item["monitor_options"] name = item["name"] scope_type = item.get("scope_type") if not scope_type: scope_type = AssetScopeType.DOMAIN submit_job(domain=domain, job_id=str(item["_id"]), scope_id=scope_id, options=options, name=name, scope_type=scope_type) item["next_run_time"] = curr_time + item["interval"] item["next_run_date"] = utils.time2date(item["next_run_time"]) query = {"_id": item["_id"]} conn('scheduler').find_one_and_replace(query, item) logger.info(time.time()) time.sleep(30)
def insert_task_data(self): celery_id = "" if current_task._get_current_object(): celery_id = current_task.request.id task_data = { 'name': self.task_name, 'target': self.ip_target, 'start_time': '-', 'end_time': '-', 'status': TaskStatus.WAITING, 'type': 'ip', 'options': { "port_scan_type": "test", "port_scan": True, "service_detection": False, "os_detection": False, "site_identify": False, "site_capture": False, "file_leak": False, "site_spider": False, "ssl_cert": False, 'scope_id': self.scope_id }, 'celery_id': celery_id } if self.options is None: self.options = {} task_data["options"].update(self.options) conn('task').insert_one(task_data) self.task_id = str(task_data.pop("_id"))
def build_data(self, args=None, collection=None): default_field = self.get_default_field(args) page = default_field.get("page", 1) size = default_field.get("size", 10) orderby_list = default_field.get('order', [("_id", -1)]) query = self.build_db_query(args) result = conn(collection).find(query).sort(orderby_list).skip( size * (page - 1)).limit(size) count = conn(collection).count(query) items = self.build_return_items(result) special_keys = ["_id", "save_date", "update_date"] for key in query: if key in special_keys: query[key] = str(query[key]) data = { "page": page, "size": size, "total": count, "items": items, "query": query, "code": 200 } return data
def wrap_domain_executors(base_domain=None, job_id=None, scope_id=None, options=None, name=""): celery_id = "" if current_task._get_current_object(): celery_id = current_task.request.id task_data = { 'name': name, 'target': base_domain, 'start_time': '-', 'status': 'waiting', 'type': 'domain', 'task_tag': 'monitor', #标记为监控任务 'options': { 'domain_brute': True, 'domain_brute_type': 'test', 'riskiq_search': False, 'alt_dns': False, 'arl_search': True, 'port_scan_type': 'test', 'port_scan': True, 'service_detection': False, 'service_brute': False, 'os_detection': False, 'site_identify': False, 'site_capture': False, 'file_leak': False, 'site_spider': False, 'search_engines': False, 'ssl_cert': False, 'fofa_search': False, 'scope_id': scope_id }, 'celery_id': celery_id } if options is None: options = {} task_data["options"].update(options) conn('task').insert_one(task_data) task_id = str(task_data.pop("_id")) domain_executor = DomainExecutor(base_domain, task_id, task_data["options"]) try: update_job_run(job_id) new_domain = domain_executor.run() if new_domain: sync_asset(task_id, scope_id, update_flag=True) except Exception as e: logger.exception(e) domain_executor.update_task_field("status", TaskStatus.ERROR) domain_executor.update_task_field("end_time", utils.curr_date()) logger.info("end domain_executors {} {} {}".format(base_domain, scope_id, options))
def post(self): """ 资产组添加 """ args = self.parse_args(add_asset_scope_fields) name = args.pop('name') scope = args.pop('scope') black_scope = args.pop('black_scope') scope_type = args.pop('scope_type') if scope_type not in [AssetScopeType.IP, AssetScopeType.DOMAIN]: scope_type = AssetScopeType.DOMAIN black_scope_array = [] if black_scope: black_scope_array = re.split(r",|\s", black_scope) scope_array = re.split(r",|\s", scope) # 清除空白符 scope_array = list(filter(None, scope_array)) new_scope_array = [] for x in scope_array: if scope_type == AssetScopeType.DOMAIN: if not utils.is_valid_domain(x): return utils.build_ret(ErrorMsg.DomainInvalid, {"scope": x}) new_scope_array.append(x) if scope_type == AssetScopeType.IP: transfer = utils.ip.transfer_ip_scope(x) if transfer is None: return utils.build_ret(ErrorMsg.ScopeTypeIsNotIP, {"scope": x}) new_scope_array.append(transfer) if not new_scope_array: return utils.build_ret(ErrorMsg.DomainInvalid, {"scope": ""}) scope_data = { "name": name, "scope_type": scope_type, "scope": ",".join(new_scope_array), "scope_array": new_scope_array, "black_scope": black_scope, "black_scope_array": black_scope_array, } conn('asset_scope').insert(scope_data) scope_id = str(scope_data.pop("_id")) scope_data["scope_id"] = scope_id return utils.build_ret(ErrorMsg.Success, scope_data)
def update_job_run(job_id): curr_time = int(time.time()) item = find_job(job_id) if not item: return item["next_run_time"] = curr_time + item["interval"] item["next_run_date"] = utils.time2date(item["next_run_time"]) item["last_run_time"] = curr_time item["last_run_date"] = utils.time2date(curr_time) item["run_number"] += 1 query = {"_id": item["_id"]} conn('scheduler').find_one_and_replace(query, item)
def submit_task(task_data): conn('task').insert_one(task_data) task_id = str(task_data.pop("_id")) task_data["task_id"] = task_id celery_action = CeleryAction.DOMAIN_TASK if task_data["type"] == "domain": celery_action = CeleryAction.DOMAIN_TASK elif task_data["type"] == "ip": celery_action = CeleryAction.IP_TASK task_options = {"celery_action": celery_action, "data": task_data} celerytask.arl_task(options=task_options) return task_data
def stop_job(job_id): item = find_job(job_id) item["next_run_date"] = "-" item["next_run_time"] = sys.maxsize item["status"] = SchedulerStatus.STOP query = {"_id": ObjectId(job_id)} ret = conn('scheduler').find_one_and_replace(query, item) return ret
def recover_job(job_id): current_time = int(time.time()) + 30 item = find_job(job_id) next_run_time = current_time + item["interval"] item["next_run_date"] = utils.time2date(next_run_time) item["next_run_time"] = next_run_time item["status"] = SchedulerStatus.RUNNING query = {"_id": ObjectId(job_id)} ret = conn('scheduler').find_one_and_replace(query, item) return ret
def sync_by_category(self, category): dist_collection = 'asset_{}'.format(category) for data in conn(category).find({"task_id": self.task_id}): query = {"scope_id": self.scope_id, category: data[category]} del data["_id"] data["scope_id"] = self.scope_id old = conn(dist_collection).find_one(query) if old is None: data["save_date"] = utils.curr_date_obj() data["update_date"] = data["save_date"] logger.info("sync {}, insert {} {} -> {}".format( category, data[category], self.task_id, self.scope_id)) conn(dist_collection).insert_one(data) if old and self.update_flag: curr_date = utils.curr_date_obj() data["save_date"] = old.get("save_date", curr_date) data["update_date"] = curr_date if category == 'ip': if data.get("domain") and old.get("domain"): old["domain"].extend(data["domain"]) data["domain"] = list(set(old["domain"])) logger.info("sync {}, replace {} {} -> {}".format( category, data[category], self.task_id, self.scope_id)) conn(dist_collection).find_one_and_replace(query, data)
def send_batch_export_file(self, task_id_list, _type): _type_map_field_name = { "site": "site", "domain": "domain", "ip": "ip", "url": "url" } items_set = set() filed_name = _type_map_field_name.get(_type, "") for task_id in task_id_list: if not filed_name: continue if not task_id: continue query = {"task_id": task_id} items = conn(_type).distinct(filed_name, query) items_set |= set(items) return self.send_file(items_set, _type)
def all_job(): items = [] for item in conn('scheduler').find(): items.append(item) return items
def delete_job(job_id): ret = conn("scheduler").delete_one({"_id": ObjectId(job_id)}) return ret
def find_job(job_id): query = {"_id": ObjectId(job_id)} item = conn('scheduler').find_one(query) return item