def program_init(path): # 初始化图片目录 try: makedirs(path) L_LOGGER.info(u"图片目录建立成功。") except OSError, e: L_LOGGER.error(u"%s图片目录已经存在。%s" % (path, e))
def program_init(path): # 初始化图片目录 try: makedirs(path) L_LOGGER.info(u"图片目录建立成功。") except OSError, e: L_LOGGER.error(u"%s图片目录已经存在。%s" % (path, e))
def get_bugs_url(): session = init_db() base_number = 1 base_url = 'https://www.wooyun.org/bugs/page/' repeat_flag = False sleep_time = 60 while True: if repeat_flag: L_LOGGER.info(u"漏洞获取轮询结束,休眠60分钟") base_number = 1 repeat_flag = False sleep(3600) target_url = "%s%s" % (base_url, base_number) L_LOGGER.info(u"获取目标 %s 页面漏洞列表。" % target_url) content = http_request_get(url=target_url) if content: sleep_time = 60 html = content.text dom = etree.HTML(html) urls = dom.xpath("/html/body/div[5]/table[3]/tbody") # /html/body/div[5]/table[3]/tbody/tr[1]/td/a for url in urls: if not len(url): repeat_flag = True break for u in url: bug_name = u[1][0].text if not bug_name: bug_name = u"邮件保护需重新爬取" bug_url = "%s%s" % ("http://www.wooyun.org", u[1][0].get('href')) is_exist = session.query(LBugs).filter(LBugs.BugUrl == bug_url).filter(LBugs.BugName == bug_name)\ .count() if not is_exist: new_bug_url = LBugs(BugUrl=bug_url, BugName=bug_name, IsGet=0) session.add(new_bug_url) session.commit() else: sleep(randint(0, 5)) base_number += 1 continue else: sleep(sleep_time) sleep_time += 60 L_LOGGER.error(u"页面无法访问 %s!休眠 %s 秒!" % (target_url, sleep_time)) #base_number += 1 L_LOGGER.error(u"进程退出") session.close()
def get_bugs_url(): session = init_db() base_number = 1 base_url = 'https://www.wooyun.org/bugs/page/' repeat_flag = False sleep_time = 60 while True: if repeat_flag: L_LOGGER.info(u"漏洞获取轮询结束,休眠60分钟") base_number = 1 repeat_flag = False sleep(3600) target_url = "%s%s" % (base_url, base_number) L_LOGGER.info(u"获取目标 %s 页面漏洞列表。" % target_url) content = http_request_get(url=target_url) if content: sleep_time = 60 html = content.text dom = etree.HTML(html) urls = dom.xpath("/html/body/div[5]/table[3]/tbody") # /html/body/div[5]/table[3]/tbody/tr[1]/td/a for url in urls: if not len(url): repeat_flag = True break for u in url: bug_name = u[1][0].text if not bug_name: bug_name = u"邮件保护需重新爬取" bug_url = "%s%s" % ("http://www.wooyun.org", u[1][0].get('href')) is_exist = session.query(LBugs).filter(LBugs.BugUrl == bug_url).filter(LBugs.BugName == bug_name)\ .count() if not is_exist: new_bug_url = LBugs(BugUrl=bug_url, BugName=bug_name, IsGet=0) session.add(new_bug_url) session.commit() else: sleep(randint(0, 5)) base_number += 1 continue else: sleep(sleep_time) sleep_time += 60 L_LOGGER.error(u"页面无法访问 %s!休眠 %s 秒!" % (target_url, sleep_time)) #base_number += 1 L_LOGGER.error(u"进程退出") session.close()