def monitor_website(website, batch_num): monitor_website_dao = MonitorWebsiteDao if len(website.domain_name) == 0: logger.info("website_domain is None! merchant_name: %s ", website.merchant_name) monitor_website = MonitorWebsite() monitor_website.website_name = website.website_name monitor_website.merchant_name = website.merchant_name monitor_website.merchant_num = website.merchant_num monitor_website.domain_name = website.domain_name monitor_website.saler = website.saler monitor_website.batch_num = batch_num monitor_website.kinds = "首页是否可打开" monitor_website.level = '-' monitor_website.access = '异常' monitor_website.is_normal = '无法获取' monitor_website.outline = '商户域名为空。' monitor_website.level = '-' monitor_website.pageview = '-' monitor_website_dao.add(monitor_website) return else: logger.info("website_domain is not None! merchant_name: %s ", website.domain_name) # 首页监控 driver = WebDriver.get_phantomjs() service = TrafficService() access = AccessibleService() domain_names = str(website.domain_name) domain_name_list = domain_names.split(",") for domain_name in domain_name_list: try: logger.info("-------------------") logger.info("check whether website available,domain_name : %s", website.domain_name) # 截图 monitor_website = MonitorWebsite() monitor_website.website_name = website.website_name monitor_website.merchant_name = website.merchant_name monitor_website.merchant_num = website.merchant_num monitor_website.saler = website.saler monitor_website.domain_name = domain_name monitor_website.batch_num = batch_num monitor_website.kinds = "首页是否可打开" monitor_website.level = '-' monitor_website.snapshot = "" logger.info("预留使用代理入口...") # domain_name_rich, current_url = access.get_proxy_access_res(domain_name) # if domain_name_rich is None: # logger.info("不使用代理重试访问: %s", domain_name) # domain_name_rich, current_url = access.get_access_res(domain_name) # else: # logger.info("使用代理可以访问: %s", domain_name_rich) domain_name_rich, current_url = access.get_access_res( domain_name) logger.info("domain_name: %s", domain_name) logger.info("domain_name_rich: %s", domain_name_rich) logger.info("current_url: %s", current_url) if domain_name_rich is not None: logger.info("domain : %s", str(domain_name_rich)) monitor_website.access = '正常' monitor_website.is_normal = '正常' monitor_website.outline = '正常' monitor_website.level = '-' monitor_website.pageview = '-' monitor_website.batch_num = batch_num pageview = service.get_traffic( domain_name=domain_name_rich) monitor_website.pageview = pageview.reach_rank[0] try: driver.get(domain_name_rich) title = driver.title snapshot = SnapshotService.create_snapshot( driver, batch_num, website, '网站') monitor_website.snapshot = snapshot if title == '没有找到站点' or title == '未备案提示': monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.outline = title monitor_website.level = '高' monitor_website_dao.add(monitor_website) else: monitor_website_dao.add(monitor_website) except Exception as e: logger.info(e) monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.outline = '首页访问检测到异常' monitor_website.level = '高' monitor_website.pageview = '-' monitor_website.snapshot = SnapshotService.simulation_404( domain_name) monitor_website.batch_num = batch_num monitor_website_dao.add(monitor_website) else: monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.outline = '首页访问检测到异常' monitor_website.level = '高' monitor_website.pageview = '-' monitor_website.batch_num = batch_num if current_url is None: logger.info("snapshot 404") monitor_website.snapshot = SnapshotService.simulation_404( domain_name) else: chrome_driver = WebDriver.get_chrome() try: chrome_driver.get(current_url) snapshot = SnapshotService.create_snapshot( chrome_driver, batch_num, website, '网站') monitor_website.snapshot = snapshot except Exception as e: logger.error(e) index = str(e).find("timeout") if index != -1: logger.info("访问超时") monitor_website.outline = '访问超时' monitor_website.snapshot = SnapshotService.simulation_404( current_url) else: monitor_website.outline = str(e) monitor_website.snapshot = SnapshotService.simulation_404( current_url) monitor_website_dao.add(monitor_website) return None, None finally: chrome_driver.quit() logger.info("website is not available : %s return!", domain_name) monitor_website_dao.add(monitor_website) return except Exception as e: logger.info(e) monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.outline = '巡检系统异常,建议手动重试!' monitor_website.level = '高' monitor_website_dao.add(monitor_website) finally: driver.quit()
def monitor_website(weburl, batch_num): # 內容监控 keyword_dao = KeywordDao() keywords = keyword_dao.get_all() access = AccessibleService() monitor_weburl_dao = MonitorWeburlDao() monitor_weburl = MonitorUrl() monitor_weburl.website_name = weburl.website_name monitor_weburl.domain_name = weburl.domain_name monitor_weburl.merchant_name = weburl.merchant_name monitor_weburl.merchant_num = weburl.merchant_num monitor_weburl.saler = weburl.saler monitor_weburl.url = weburl.url monitor_weburl.batch_num = batch_num monitor_weburl.title = weburl.title # 监测死链接 reachable, current_url = access.get_access_res(weburl.url) use_proxy = False if reachable is None: logger.info("使用代理重试访问: %s", weburl.url) reachable, current_url = access.get_proxy_access_res(weburl.url) use_proxy = True else: logger.info("不使用代理可以访问: %s", weburl.url) if reachable is None: logger.info("检测到误404 : %s", weburl.url) monitor_weburl.outline = '检测到误404' monitor_weburl.is_normal = '异常' monitor_weburl.level = '高' snapshot = SnapshotService.simulation_404(weburl.url) monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '死链接' monitor_weburl_dao.add(monitor_weburl) return else: logger.info("url可以访问: %s", weburl.url) # 截图 if use_proxy: driver = WebDriver.get_proxy_chrome() else: driver = WebDriver.get_chrome() try: driver.get(weburl.url) snapshot = SnapshotService.snapshot_weburl(driver, batch_num, weburl, '网站内容') print(snapshot) print(monitor_weburl) monitor_weburl.outline = '网页打开正常' monitor_weburl.is_normal = '正常' monitor_weburl.level = '-' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '是否能打开' monitor_weburl_dao.add(monitor_weburl) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') # 监测页面敏感词 for keyword in keywords: index = soup.find(keyword.name) if index is not None: logger.info("senti url alert,there is : %s", str(keyword.name)) monitor_weburl.outline = '检测到敏感词:' + str(keyword.name) monitor_weburl.is_normal = '异常' monitor_weburl.level = '低' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '命中敏感词' monitor_weburl_dao.add(monitor_weburl) # 监测 非金融平台包含充值、提现、钱包功能 illegal_fun = soup.find("充值") if illegal_fun is not None: logger.info("senti url alert,there is : %s", str("充值")) monitor_weburl.outline = '检测到包含充值、提现、钱包功能' monitor_weburl.is_normal = '异常' monitor_weburl.level = '低' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '非法功能' monitor_weburl_dao.add(monitor_weburl) # 监测 误导宣传 mislead1 = soup.find("融宝资金担保") mislead2 = soup.find("融宝托管") if mislead1 is not None or mislead2 is not None: monitor_weburl.outline = '检测到误导宣传' monitor_weburl.is_normal = '异常' monitor_weburl.level = '中' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '误导宣传' monitor_weburl_dao.add(monitor_weburl) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor_website(weburl, batch_num): # 內容监控 keyword_dao = KeywordDao() keywords = keyword_dao.get_all() monitor_weburl_dao = MonitorWeburlDao() monitor_weburl = MonitorUrl() monitor_weburl.website_name = weburl.website_name monitor_weburl.domain_name = weburl.domain_name monitor_weburl.merchant_name = weburl.merchant_name monitor_weburl.merchant_num = weburl.merchant_num monitor_weburl.saler = weburl.saler monitor_weburl.url = weburl.url monitor_weburl.batch_num = batch_num monitor_weburl.title = weburl.title driver = WebDriver.get_phantomjs() try: logger.info("monitor_url: %s", weburl.url) if str(weburl.url).startswith("http"): print() else: weburl.url = "http://" + weburl.url logger.info("weburl.url: %s", weburl) logger.info("weburl.url: %s", weburl.url) driver.get(weburl.url) snapshot = SnapshotService.snapshot_weburl(driver, batch_num, weburl, '网站内容') logger.info("snapshot: %s", snapshot) monitor_weburl.outline = '' monitor_weburl.is_normal = '正常' monitor_weburl.level = '-' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '是否能打开' logger.info("monitor_url: add %s", weburl.url) source = driver.page_source if source.__eq__('<html><head></head><body></body></html>'): monitor_weburl.outline = '网页打开异常' monitor_weburl.is_normal = '异常' else: print() monitor_weburl_dao.add(monitor_weburl) soup = BeautifulSoup(source, 'html.parser') # 监测页面敏感词 for keyword in keywords: index = soup.find(keyword.name) if index is not None: logger.info( "senti url alert,there is [ %s] in the url page!", str(keyword.name)) monitor_weburl.outline = '检测到敏感词:' + str(keyword.name) monitor_weburl.is_normal = '异常' monitor_weburl.level = '低' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '命中敏感词' monitor_weburl_dao.add(monitor_weburl) # 监测 非金融平台包含充值、提现、钱包功能 illegal_fun = soup.find("充值") if illegal_fun is not None: logger.info("senti url alert,there is [ %s] in the url page!", str("充值")) monitor_weburl.outline = '检测到包含充值、提现、钱包功能' monitor_weburl.is_normal = '异常' monitor_weburl.level = '低' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '非法功能' monitor_weburl_dao.add(monitor_weburl) # 监测 误导宣传 mislead1 = soup.find("融宝资金担保") mislead2 = soup.find("融宝托管") if mislead1 is not None or mislead2 is not None: monitor_weburl.outline = '检测到误导宣传' monitor_weburl.is_normal = '异常' monitor_weburl.level = '中' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '误导宣传' monitor_weburl_dao.add(monitor_weburl) except Exception as e: # ERROR No transaction is begun. logger.error(e) conn = DB_Session() try: logger.info("检测到误404 : %s", weburl.url) monitor_weburl.outline = '检测到页面404' monitor_weburl.is_normal = '异常' monitor_weburl.level = '高' snapshot = SnapshotService.simulation_404(weburl.url) monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '死链接' logger.info("monitor_url:Exception %s", weburl.url) monitor_weburl_dao.add(monitor_weburl) except Exception as e: logger.info(e) conn.rollback() raise finally: conn.close() finally: driver.quit()
def monitor_website(website, batch_num): monitor_website_dao = MonitorWebsiteDao monitor_website = MonitorWebsite() monitor_website.website_name = website.website_name monitor_website.merchant_name = website.merchant_name monitor_website.merchant_num = website.merchant_num monitor_website.domain_name = website.domain_name monitor_website.saler = website.saler monitor_website.batch_num = batch_num monitor_website.kinds = "首页是否可打开" monitor_website.level = '高' monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.pageview = '-' if len(website.domain_name) == 0: logger.info("website_domain is None! merchant_name: %s ", website.merchant_name) monitor_website.outline = '商户网址为空。' monitor_website_dao.add(monitor_website) return else: logger.info("domain_name is %s! Go to inspect... ", website.domain_name) # 首页监控 domain_names = str(website.domain_name) domain_name_list = domain_names.split(",") for domain_name in domain_name_list: logger.info("-------------------") domain_name_rich = domain_name dns = domain_name if str(domain_name).startswith("http"): temp = domain_name[domain_name.find("/") + 2:] logger.info("domain with out http:: %s", temp) if str(temp).find("/") == -1: dns = temp else: start = temp.find("/") dns = temp[0:start] else: if str(domain_name).find("/") == -1: pass else: start = domain_name.find("/") dns = domain_name[0:start] pass domain_name_rich = "http://" + domain_name try: logger.info("dns: %s", dns) conn = http.client.HTTPSConnection(dns, timeout=10) conn.request('GET', domain_name_rich) resp = conn.getresponse() code = resp.code logger.info("code: %s", code) if code == 200: logger.info("使用webdriver进行截图: %s ... ", domain_name_rich) try: driver = WebDriver.get_phantomjs() driver.get(domain_name_rich) current_url = driver.current_url title = driver.title source = driver.page_source snapshot = SnapshotService.create_snapshot( driver, batch_num, website, '网站') logger.info("title: %s", title) logger.info("current_url: %s", current_url) if str(current_url) == "about:blank" and str( source ) == "<html><head></head><body></body></html>" and str( title) == "": logger.info("检测到about:blank : %s", current_url) monitor_website.outline = "网站疑似无法访问" monitor_website.snapshot = SnapshotService.simulation_404( domain_name_rich) monitor_website_dao.add(monitor_website) driver.quit() continue else: pass if str(current_url).index(domain_name_rich[7:]) == -1: logger.info("疑似跳转...: %s", current_url) monitor_website.outline = "疑似跳转,检测到首页地址为:" + current_url monitor_website.snapshot = snapshot monitor_website_dao.add(monitor_website) driver.quit() continue else: pass monitor_website.snapshot = snapshot logger.info("check title和source...") if title.__contains__('404'): monitor_website.outline = "疑似异常,检测到404" elif source.__contains__('ERR_NAME_NOT_RESOLVED'): monitor_website.outline = "疑似异常,Title信息:" + title elif source.__contains__('ERR_CONNECTION_REFUSED'): monitor_website.outline = "疑似异常,检测到 ERR_CONNECTION_REFUSED" elif source.__contains__('ERR_CONNECTION_TIMED_OUT'): monitor_website.outline = "疑似异常,检测到 ERR_CONNECTION_TIMED_OUT" elif source.__contains__('ERR_NAME_NOT_RESOLVED'): monitor_website.outline = "疑似异常,检测到 ERR_NAME_NOT_RESOLVED" elif source.__contains__('ERR_NAME_RESOLUTION_FAILED'): monitor_website.outline = "疑似异常,检测到 ERR_NAME_RESOLUTION_FAILED" elif source.__contains__( 'DNS_PROBE_FINISHED_NXDOMAIN'): monitor_website.outline = "疑似异常,检测到 DNS_PROBE_FINISHED_NXDOMAIN" elif source.__contains__('ERR_EMPTY_RESPONSE'): monitor_website.outline = "疑似异常,检测到 ERR_EMPTY_RESPONSE" elif source.__contains__('主机开设成功'): monitor_website.outline = "疑似异常,检测到类似网站在建信息" elif source.__contains__('非法阻断'): monitor_website.outline = "疑似异常,检测到非法阻断" elif source.__contains__('Bad Request'): monitor_website.outline = "疑似异常,检测到 Bad Request" elif source.__contains__('404 page not found'): monitor_website.outline = "疑似异常,检测到 404 page not found" elif source.__contains__( 'https://wanwang.aliyun.com/domain/parking'): monitor_website.outline = "疑似异常,检测到阻断拦截" elif source.__contains__('没有找到站点'): monitor_website.outline = "疑似异常,没有找到站点" elif source.__contains__('未备案提示'): monitor_website.outline = "疑似异常,未备案提示" elif str( source ) == "<html><head></head><body></body></html>" and str( title) == "": monitor_website.snapshot = SnapshotService.simulation_404( domain_name_rich) monitor_website.outline = "疑似无法访问" else: monitor_website.outline = '检测正常' monitor_website.access = '正常' monitor_website.is_normal = '正常' monitor_website.level = '-' logger.info("outline: %s", monitor_website.outline) monitor_website_dao.add(monitor_website) except Exception as e: logger.error(e) monitor_website.snapshot = SnapshotService.simulation_404( domain_name_rich) monitor_website.outline = '访问超时,可能被目标网站屏蔽,建议手动验证!' monitor_website_dao.add(monitor_website) finally: driver.quit() else: logger.info("确定无法访问!") monitor_website.outline = '检测到网站异常' monitor_website.snapshot = SnapshotService.simulation_404( domain_name_rich) monitor_website_dao.add(monitor_website) except Exception as e: logger.error(e) logger.info("urlopen 无法打开页面..") monitor_website.outline = 'urlopen无法打开网站。' monitor_website.snapshot = SnapshotService.simulation_404( domain_name_rich) monitor_website_dao.add(monitor_website)