def scheduleUrl(self): """ run_type为1,脚本需定义run_url方法 """ DEBUG("scheduleUrl start") sql = "SELECT `rule_id`,`risk`,`file_name` FROM `%s` WHERE `run_type` = 1 ORDER BY `priority`" % RULE_TABLE # rules = [] # for rule in db.iter(sql): # rules.append((str(rule.rule_id), rule.file_name, rule.risk)) rules = [(str(rule.rule_id), rule.file_name, rule.risk) for rule in db.iter(sql) if str(rule.rule_id) not in self.finished_progress] if not conf.spider_finish: #spider not finished, start crawler CrawlEngine.start() sql = "SELECT `url`,`method`,`params`,`referer` FROM %s WHERE `task_id`=%s" % (URL_TABLE, self.task_id) # reqs = [] # for url in db.iter(sql): # reqs.append(Url(url.url, url.method, url.params, url.referer)) reqs = [Url(url.url, url.method, url.params, url.referer) for url in db.iter(sql)] for rule_id, filename, risk in rules: run_url = attr_from_script(filename, RUN_URL_DEFAULT_FUN) if run_url: DEBUG("rule_id:%s filename:%s run_url start" % (rule_id, filename)) for req in reqs: self.pool.spawn(self.runUrl, rule_id, run_url, req, filename, risk) gevent.sleep(0) DEBUG("rule_id:%s filename:%s run_url end" % (rule_id, filename)) DEBUG("scheduleUrl end")
def get_exist_url(self): sql = "SELECT * FROM url WHERE task_id=%s" urls = None try: urls = [ url for url in db.iter(sql, self.task_id)] except Exception: ERROR('get_url exception,task_id:%s' % self.task_id) return urls or []
def get_exist_url(self): sql = "SELECT * FROM url WHERE task_id=%s" urls = None try: urls = [url for url in db.iter(sql, self.task_id)] except Exception: ERROR('get_url exception,task_id:%s' % self.task_id) return urls or []
def scheduleUrl(self): """ run_type为1,脚本需定义run_url方法 """ DEBUG("scheduleUrl start") sql = "SELECT `rule_id`,`risk`,`file_name` FROM `%s` WHERE `run_type` = 1 ORDER BY `priority`" % RULE_TABLE # rules = [] # for rule in db.iter(sql): # rules.append((str(rule.rule_id), rule.file_name, rule.risk)) rules = [(str(rule.rule_id), rule.file_name, rule.risk) for rule in db.iter(sql) if str(rule.rule_id) not in self.finished_progress] if not conf.spider_finish: #spider not finished, start crawler CrawlEngine.start() sql = "SELECT `url`,`method`,`params`,`referer` FROM %s WHERE `task_id`=%s" % ( URL_TABLE, self.task_id) # reqs = [] # for url in db.iter(sql): # reqs.append(Url(url.url, url.method, url.params, url.referer)) reqs = [ Url(url.url, url.method, url.params, url.referer) for url in db.iter(sql) ] for rule_id, filename, risk in rules: run_url = attr_from_script(filename, RUN_URL_DEFAULT_FUN) if run_url: DEBUG("rule_id:%s filename:%s run_url start" % (rule_id, filename)) for req in reqs: self.pool.spawn(self.runUrl, rule_id, run_url, req, filename, risk) gevent.sleep(0) DEBUG("rule_id:%s filename:%s run_url end" % (rule_id, filename)) DEBUG("scheduleUrl end")
def scheduleDomain(self): """ run_type为2,脚本需定义run_domain方法 """ DEBUG("scheduleDomain start") sql = "SELECT `rule_id`,`risk`,`file_name` FROM `%s` WHERE `run_type` = 2 ORDER BY `priority`" % RULE_TABLE # domainRule = [] # for rule in db.iter(sql): # domainRule.append((str(rule.rule_id), rule.file_name, rule.risk)) domainRule = [ (str(rule.rule_id), rule.file_name, rule.risk) for rule in db.iter(sql) if str(rule.rule_id) not in self.finished_progress] for rule_id, filename, risk in domainRule: run_domain = attr_from_script(filename, RUN_DOMAIN_DEFAULT_FUN) if run_domain: DEBUG("rule_id:%s filename:%s run_domain start" % (rule_id, filename)) self.pool.spawn(self.runDomain, rule_id, run_domain, filename, risk) gevent.sleep(0) DEBUG("rule_id:%s filename:%s run_domain end" % (rule_id, filename)) DEBUG("scheduleDomain end")
def scheduleDomain(self): """ run_type为2,脚本需定义run_domain方法 """ DEBUG("scheduleDomain start") sql = "SELECT `rule_id`,`risk`,`file_name` FROM `%s` WHERE `run_type` = 2 ORDER BY `priority`" % RULE_TABLE # domainRule = [] # for rule in db.iter(sql): # domainRule.append((str(rule.rule_id), rule.file_name, rule.risk)) domainRule = [(str(rule.rule_id), rule.file_name, rule.risk) for rule in db.iter(sql) if str(rule.rule_id) not in self.finished_progress] for rule_id, filename, risk in domainRule: run_domain = attr_from_script(filename, RUN_DOMAIN_DEFAULT_FUN) if run_domain: DEBUG("rule_id:%s filename:%s run_domain start" % (rule_id, filename)) self.pool.spawn(self.runDomain, rule_id, run_domain, filename, risk) gevent.sleep(0) DEBUG("rule_id:%s filename:%s run_domain end" % (rule_id, filename)) DEBUG("scheduleDomain end")