def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "https://www.baidu.com/" if driver is None: senti_util.log_error("百度搜索", url, batch_num, website) return else: pass try: driver.get(url) search_text_blank = driver.find_element_by_id("kw") search_text_blank.send_keys(keyword) search_text_blank.send_keys(Keys.RETURN) time.sleep(5) # driver.find_element_by_xpath('//input[@name="wd"]').send_keys(website_name) senti_util.snapshot_home("百度搜索", url, batch_num, website, driver) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') for result_table in soup.find_all('h3', class_='t'): a_click = result_table.find("a") title = a_click.get_text() if title.find(keyword) != -1: senti_util.senti_process_text("百度搜索", title, str(a_click.get("href")), batch_num, website) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://paynews.net/search.php?mod=forum" if driver is None: senti_util.log_error("支付产业网", url, batch_num, website) return else: pass try: driver.get(url) search_text_blank = driver.find_element_by_id("scform_srchtxt") search_text_blank.send_keys(keyword) search_text_blank.send_keys(Keys.RETURN) senti_util.snapshot_home("支付产业网", url, batch_num, website, driver) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') div_list = soup.find(attrs={'class': 'slst mtw'}) if div_list is not None and div_list.__len__() > 0: news = div_list.find_all('li') for new in news: href = new.find_all('a')[0].get("href") content = new.find_all('a')[0].get_text() if content.find(keyword) != -1: senti_util.senti_process_text( "支付产业网", content, "http://paynews.net/" + href, batch_num, website) else: logger.info("支付产业网没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://ts.21cn.com/home/search?keyword=" + urllib.parse.quote( keyword) if driver is None: senti_util.log_error("聚投诉", url, batch_num, website) return else: pass try: driver.get(url) driver.implicitly_wait(3) source = driver.page_source senti_util.snapshot_home("聚投诉", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') items = soup.find_all(attrs={'class': 'complain-item'}) if items.__len__() > 0: for item in items: href = item.find_all('a')[1].get("href") content = item.find_all('a')[1].get_text() if content.find(keyword) != -1: senti_util.senti_process_text( "聚投诉", content, "http://www.paycircle.cn" + href[1:], batch_num, website) else: logger.info("聚投诉没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "https://tousu.sina.com.cn/index/search/?keywords=" + urllib.parse.quote(keyword) + "&t=0" if driver is None: senti_util.log_error("黑猫投诉", url, batch_num, website) return else: pass try: driver.get(url) source = driver.page_source senti_util.snapshot_home("黑猫投诉", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') items = soup.find_all(attrs={'class': 'blackcat-con'}) if items.__len__() > 0: for item in items: href = item.find_all('a')[0].get("href") content = item.find_all('h1')[0].get_text() if content.find(keyword) != -1: senti_util.senti_process_text("黑猫投诉", content, href, batch_num, website) else: logger.info("黑猫投诉没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = 'https://baike.baidu.com/item/%s' % urllib.parse.quote(keyword) if driver is None: senti_util.log_error("百度百科", url, batch_num, website) return else: pass try: driver.get(url) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') check_exist = soup.find_all( name='p', attrs={'class': re.compile('sorryCont')}) if check_exist.__len__() == 0: description = soup.find( attrs={"name": "description"})['content'] senti_util.senti_process_text("百度百科", description, url, batch_num, website) else: senti_util.snapshot_home("百度百科", url, batch_num, website, driver) logger.info("百度百科没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://wenshu.court.gov.cn/" if driver is None: senti_util.log_error("黑猫投诉", url, batch_num, website) return else: pass try: driver.get(url) search_text_blank = driver.find_element_by_xpath( "//*[@class='searchKey search-inp']") search_text_blank.send_keys(keyword) search_text_blank.send_keys(Keys.RETURN) time.sleep(3) source = driver.page_source senti_util.snapshot_home("裁判文书网", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') for a_tag in soup.find_all('a', class_='caseName'): href = a_tag.get("href") title = a_tag.get_text() if title.find(keyword) != -1: senti_util.senti_process_text( "裁判文书网", title, "http://wenshu.court.gov.cn/website/wenshu" + href[2:], batch_num, website) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://www.paycircle.cn/company/search.php?kw=" + urllib.parse.quote( keyword) + "&c=SearchList&" if driver is None: senti_util.log_error("支付圈", url, batch_num, website) return else: pass try: driver.get(url) source = driver.page_source senti_util.snapshot_home("支付圈", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') div_list = soup.find_all(attrs={'class': 'list'}) if div_list.__len__() > 0: news = div_list[0].find_all('tr') for new in news: href = new.find_all('td')[2].find_all('a')[0].get("href") content = new.find_all('td')[2].find_all( 'li')[1].get_text() if content.find(keyword) != -1: senti_util.senti_process_text("支付圈", content, href, batch_num, website) else: logger.info("支付圈没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, website_name, batch_num, merchant_name, merchant_num): """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) """ driver = WebDriver.get_chrome() try: senti_util = SentiUtil() url = "http://tieba.baidu.com/f?fr=wwwt&kw=" + urllib.parse.quote( keyword) driver.get(url) senti_util.snapshot_home("百度贴吧", website_name, url, batch_num, merchant_name, merchant_num, driver) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') news = soup.find_all( "div", attrs={'class': 'threadlist_title pull_left j_th_tit '}) if news.__len__() > 0: for new in news: href = new.find_all('a')[0].get("href") content = new.find_all('a')[0].get_text() if content.find(keyword) != -1: senti_util.senti_process_text( "百度贴吧", website_name, content, "http://tieba.baidu.com" + href, batch_num, merchant_name, merchant_num) else: logger.info("百度贴吧没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(website_name, merchant_name, batch_num): """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) """ try: driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://www.chinaft.com.cn/news/search/_1.shtml?key=" + urllib.parse.quote( website_name) driver.get(url) source = driver.page_source senti_util.snapshot_home("交易中国", merchant_name, url, batch_num, driver) soup = BeautifulSoup(source, 'html.parser') news = soup.find_all("div", attrs={'class': 'xixi_ChinaFT_left_news_box'}) if news.__len__() > 0: for new in news: if not gl.check_by_batch_num(batch_num): break href = new.find_all('a')[1].get("href") content = new.find_all('a')[1].get_text() if content.find(website_name) != -1: senti_util.senti_process_text( "交易中国", merchant_name, content, "http://www.chinaft.com.cn" + href, batch_num) else: logger.info("交易中国没有搜索到数据: %s", merchant_name) except Exception as e: logger.error(e) finally: driver.quit()
def monitor(website_name, merchant_name, batch_num): try: driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "https://www.p2peye.com/search.php?mod=zonghe&srchtxt=" + urllib.parse.quote(website_name) driver.get(url) source = driver.page_source senti_util.snapshot_home("网贷天眼", merchant_name, url, batch_num, driver) soup = BeautifulSoup(source, 'html.parser') news = soup.find_all(attrs={'class': 'result-t'}) if news.__len__() > 0: for new in news: href = new.find_all('a')[0].get("href") content = new.get_text() if content.find(website_name) != -1: senti_util.senti_process_text("网贷天眼", merchant_name,content, "http://" + href[2:], batch_num) else: logger.info("网贷天眼没有搜索到数据: %s", merchant_name) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://www.zhifujie.com/search/search" if driver is None: senti_util.log_error("支付界", url, batch_num, website) return else: pass try: driver.get(url) search_text_blank = driver.find_element_by_id("searchbox") search_text_blank.send_keys(keyword) driver.find_element_by_xpath( '//button[contains(text(), "搜索")]').click() time.sleep(5) source = driver.page_source senti_util.snapshot_home("支付界", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') items = soup.find_all(attrs={'class': 'main-news-content-item'}) if items.__len__() > 0: for item in items: href = item.find_all('a')[1].get("href") content = item.find_all('a')[1].get_text() if content.find(keyword) != -1: senti_util.senti_process_text( "支付界", content, "http://www.paycircle.cn" + href[1:], batch_num, website) else: logger.info("支付界没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) senti_util.snapshot_home("支付界", url, batch_num, website, driver) return finally: driver.quit()
def monitor(website_name, merchant_name, batch_num): """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) """ try: driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "https://www.wdzj.com/front/search/index?key=" + urllib.parse.quote( website_name) driver.get(url) source = driver.page_source senti_util.snapshot_home("网贷之家", merchant_name, url, batch_num, driver) soup = BeautifulSoup(source, 'html.parser') tzbox = soup.find_all("ul", attrs={'class': 'so-tzbox'}) if tzbox.__len__() == 0: return news = tzbox[0].find_all("li") if news.__len__() > 0: for new in news: if not gl.check_by_batch_num(batch_num): break href = new.find_all('a')[0].get("href") content = new.get_text() if content.find(website_name) != -1: senti_util.senti_process_text("网贷之家", merchant_name, content, "http://" + href[2:], batch_num) else: logger.info("网贷之家没有搜索到数据: %s", merchant_name) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(website_name, merchant_name, batch_num): """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) """ try: driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://www.wangdaibus.com/search/list?subject=" + urllib.parse.quote( website_name) driver.get(url) time.sleep(10) senti_util.snapshot_home("网贷巴士", merchant_name, url, batch_num, driver) # driver.find_element_by_xpath('//input[@name="srchtxt"]').send_keys(website_name) # driver.find_element_by_xpath('//input[@name="srchtxt"]').send_keys(Keys.ENTER) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') news = soup.find_all("h3", attrs={'class': 'xs3'}) if news.__len__() > 0: for new in news: if not gl.check_by_batch_num(batch_num): break href = new.find_all('a')[0].get("href") content = new.get_text() if content.find(website_name) != -1: senti_util.senti_process_text( "网贷巴士", merchant_name, content, "http://www.wangdaibus.com/" + href, batch_num) else: logger.info("网贷巴士没有搜索到数据: %s", merchant_name) except Exception as e: logger.error(e) return finally: driver.quit()