Пример #1
0
 def get_proxy_access_res(url):
     if str(url).startswith("http"):
         http_url = str(url)
     else:
         http_url = "http://" + str(url)
     driver = WebDriver.get_proxy_chrome()
     if driver is None:
         return None, None
     else:
         try:
             logger.info("http_url: %s", http_url)
             driver.get(http_url)
             title = driver.title
             if title.__contains__('404') or driver.page_source.__contains__(
                     'ERR_NAME_NOT_RESOLVED') or driver.page_source.__contains__(
                 'ERR_CONNECTION_REFUSED') or driver.page_source.__contains__('ERR_CONNECTION_TIMED_OUT'):
                 return None, http_url
             else:
                 return http_url, driver.current_url
         except Exception as e:
             logger.error(e)
             return None, None
         finally:
             driver.quit()
    def monitor_website(weburl, batch_num):
        # 內容监控
        keyword_dao = KeywordDao()
        keywords = keyword_dao.get_all()
        access = AccessibleService()

        monitor_weburl_dao = MonitorWeburlDao()
        monitor_weburl = MonitorUrl()
        monitor_weburl.website_name = weburl.website_name
        monitor_weburl.domain_name = weburl.domain_name
        monitor_weburl.merchant_name = weburl.merchant_name
        monitor_weburl.merchant_num = weburl.merchant_num
        monitor_weburl.saler = weburl.saler
        monitor_weburl.url = weburl.url
        monitor_weburl.batch_num = batch_num
        monitor_weburl.title = weburl.title
        # 监测死链接
        reachable, current_url = access.get_access_res(weburl.url)
        use_proxy = False
        if reachable is None:
            logger.info("使用代理重试访问: %s", weburl.url)
            reachable, current_url = access.get_proxy_access_res(weburl.url)
            use_proxy = True
        else:
            logger.info("不使用代理可以访问: %s", weburl.url)
        if reachable is None:
            logger.info("检测到误404 : %s", weburl.url)
            monitor_weburl.outline = '检测到误404'
            monitor_weburl.is_normal = '异常'
            monitor_weburl.level = '高'
            snapshot = SnapshotService.simulation_404(weburl.url)
            monitor_weburl.snapshot = snapshot
            monitor_weburl.kinds = '死链接'
            monitor_weburl_dao.add(monitor_weburl)
            return
        else:
            logger.info("url可以访问: %s", weburl.url)
            #  截图
        if use_proxy:
            driver = WebDriver.get_proxy_chrome()
        else:
            driver = WebDriver.get_chrome()
        try:
            driver.get(weburl.url)
            snapshot = SnapshotService.snapshot_weburl(driver, batch_num,
                                                       weburl, '网站内容')
            print(snapshot)
            print(monitor_weburl)
            monitor_weburl.outline = '网页打开正常'
            monitor_weburl.is_normal = '正常'
            monitor_weburl.level = '-'
            monitor_weburl.snapshot = snapshot
            monitor_weburl.kinds = '是否能打开'
            monitor_weburl_dao.add(monitor_weburl)

            source = driver.page_source
            soup = BeautifulSoup(source, 'html.parser')
            # 监测页面敏感词
            for keyword in keywords:
                index = soup.find(keyword.name)
                if index is not None:
                    logger.info("senti url alert,there is : %s",
                                str(keyword.name))
                    monitor_weburl.outline = '检测到敏感词:' + str(keyword.name)
                    monitor_weburl.is_normal = '异常'
                    monitor_weburl.level = '低'
                    monitor_weburl.snapshot = snapshot
                    monitor_weburl.kinds = '命中敏感词'

                    monitor_weburl_dao.add(monitor_weburl)
            # 监测 非金融平台包含充值、提现、钱包功能
            illegal_fun = soup.find("充值")
            if illegal_fun is not None:
                logger.info("senti url alert,there is : %s", str("充值"))
                monitor_weburl.outline = '检测到包含充值、提现、钱包功能'
                monitor_weburl.is_normal = '异常'
                monitor_weburl.level = '低'
                monitor_weburl.snapshot = snapshot
                monitor_weburl.kinds = '非法功能'

                monitor_weburl_dao.add(monitor_weburl)
            # 监测 误导宣传
            mislead1 = soup.find("融宝资金担保")
            mislead2 = soup.find("融宝托管")
            if mislead1 is not None or mislead2 is not None:
                monitor_weburl.outline = '检测到误导宣传'
                monitor_weburl.is_normal = '异常'
                monitor_weburl.level = '中'
                monitor_weburl.snapshot = snapshot
                monitor_weburl.kinds = '误导宣传'

                monitor_weburl_dao.add(monitor_weburl)
        except Exception as e:
            logger.error(e)
            return
        finally:
            driver.quit()