Пример #1
0
 def snapshot_qichacha(batch_num, url, website):
     timestamp = int(time.time())
     snapshot = batch_num + "_" + website.merchant_name + "_" + website.merchant_num + "_工商_" + str(
         timestamp) + ".png"
     path = base_filepath + "/" + batch_num + "_" + website.merchant_name + "_" + website.merchant_num + "_工商_" + str(
         timestamp)
     try:
         driver = WebDriver.get_chrome_by_local()
         # driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver',
         #                           desired_capabilities=dcap,
         #                           options=chrome_options)
         driver.set_page_load_timeout(60)
         driver.set_script_timeout(60)
         driver.maximize_window()
         driver.get(url)
         driver.save_screenshot(path + ".png")
         img = Image.open(path + ".png")
         jpg = img.crop((265, 158, 420, 258))
         jpg.save(path + "_thumb.bmp")
         return driver, snapshot
     except Exception as e:
         logger.info(e)
         return None, None
 def get_merchant_url(batch_num, website):
     monitor_bc_dao = MonitorBcDao()
     url = "https://www.qichacha.com"
     driver = WebDriver.get_chrome_by_local()
     driver.set_page_load_timeout(60)
     driver.set_script_timeout(60)
     driver.maximize_window()
     timestamp = int(time.time())
     snapshot = batch_num + "_" + website.merchant_name + "_" + website.merchant_num + "_工商_" + str(
         timestamp) + ".png"
     path = base_filepath + "/" + batch_num + "_" + website.merchant_name + "_" + website.merchant_num + "_工商_" + str(
         timestamp)
     try:
         random_seconds = random.randint(20, 30)
         logger.info("企查查随机等待 %s 秒...", str(random_seconds))
         time.sleep(random_seconds)
         driver.get(url)
         driver.find_element_by_id("searchkey").send_keys(
             website.merchant_name)
         driver.find_element_by_id("V3_Search_bt").click()
         source = driver.page_source
         soup = BeautifulSoup(source, 'html.parser')
         title = soup.find(name="title")
         if title is None or str(title.get_text()) == "会员登录 - 企查查" or str(
                 title.get_text()) == "405":
             logger.info("qichacha res title :%s", str(title))
             driver.save_screenshot(path + ".png")
             img = Image.open(path + ".png")
             jpg = img.crop((265, 158, 420, 258))
             jpg.save(path + "_thumb.bmp")
             monitor_bc = MonitorBc(batch_num=batch_num,
                                    merchant_name=website.merchant_name,
                                    merchant_num=website.merchant_num,
                                    website_name=website.website_name,
                                    domain_name=website.domain_name,
                                    saler=website.saler,
                                    snapshot=snapshot,
                                    is_normal='异常',
                                    kinds='企业是否可查',
                                    level='-',
                                    outline='由于企查查反扒策略无法获取企业详情链接地址。',
                                    create_time=datetime.datetime.now())
             monitor_bc_dao.add(monitor_bc)
             return None
         tbodys = soup.find_all(id="search-result")
         trs = tbodys[0].find_all('tr')
         tds = trs[0].find_all('td')
         a = tds[2].find_all('a')
         name = a[0].get_text().strip()
         href = a[0].get('href')
         if name == website.merchant_name.strip() and str(href) is not None:
             return href.strip()
         else:
             driver.save_screenshot(path + ".png")
             img = Image.open(path + ".png")
             jpg = img.crop((265, 158, 420, 258))
             jpg.save(path + "_thumb.bmp")
             monitor_bc_dao = MonitorBcDao()
             monitor_bc = MonitorBc(batch_num=batch_num,
                                    merchant_name=website.merchant_name,
                                    merchant_num=website.merchant_num,
                                    website_name=website.website_name,
                                    domain_name=website.domain_name,
                                    saler=website.saler,
                                    snapshot=snapshot,
                                    is_normal='正常',
                                    kinds='企业是否可查',
                                    level='-',
                                    outline='企查查没有查询到商户公司',
                                    create_time=datetime.datetime.now())
             monitor_bc_dao.add(monitor_bc)
         return None
     except Exception as e:
         logger.error(e)
         driver.save_screenshot(path + ".png")
         img = Image.open(path + ".png")
         jpg = img.crop((265, 158, 420, 258))
         jpg.save(path + "_thumb.bmp")
         monitor_bc = MonitorBc(batch_num=batch_num,
                                merchant_name=website.merchant_name,
                                merchant_num=website.merchant_num,
                                website_name=website.website_name,
                                domain_name=website.domain_name,
                                saler=website.saler,
                                snapshot=snapshot,
                                is_normal='异常',
                                kinds='企业是否可查',
                                level='-',
                                outline='由于企查查反扒策略无法获取企业详情链接地址。建议手动进行验证。',
                                create_time=datetime.datetime.now())
         monitor_bc_dao.add(monitor_bc)
         return None
     finally:
         if driver is not None:
             driver.quit()
         else:
             pass