def get_over_down(url): odds_browser = config.get_webdriver() odds_wait = WebDriverWait(odds_browser, 10) try: odds_browser.get(url) odds_wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, '#odds'))) html = etree.HTML(odds_browser.page_source) doc = pq(html) tr = doc('#odds tbody tr') text = [] odds = {} for item in tr.items(): for td in item.find('td').items(): result = re.search('Bet365', td.text()) if result: bet365 = td.parent() for bet365_td in bet365.find('td').items(): text.append(bet365_td.text()) break if len(text) < 10: print("大小球赔率页面错误") sys.exit() odds['start'] = text[2:5] odds['end'] = text[8:11] except TimeoutException: get_over_down(url) time.sleep(random.randint(3, 5)) odds_browser.close() return odds
def index_year(year, league): print("正在爬取 ", league['league'], " 第", year, '年') browser = config.get_webdriver() wait = WebDriverWait(browser, 10) try: config.match_total = 0 url = league['url'] % (year) print(url) browser.get(url) check = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, '#Table1'))) get_stage(browser, wait, year, league) browser.close() print("总场数:" + str(config.match_total)) except TimeoutException: index_year(year)
def get_odds(url): odds_browser = config.get_webdriver() odds_wait = WebDriverWait(odds_browser, 10) try: odds_browser.get(url) odds_wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, '#sel_showType'))) select = Select(odds_browser.find_element_by_id("sel_showType")) select.select_by_value('1') odds_wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, '#dataList'))) html = etree.HTML(odds_browser.page_source) doc = pq(html) td = doc('#oddstr_281 td') text = [] odds = {} for item in td.items(): text.append(item.text()) odds['start'] = text[2:5] text = [] end_text = [] next = td.parent().next() for item in next.items(): end_text.append(item.text()) text = end_text[0].split() odds['end'] = text[0:3] except TimeoutException: get_odds(url) time.sleep(random.randint(3, 5)) odds_browser.close() return odds
from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from urllib.parse import quote import sys from pyquery import PyQuery as pq import re from selenium.webdriver.support.select import Select import match import time import random import config match_year = ['2018-2019'] browser = config.get_webdriver() wait = WebDriverWait(browser, 10) def get_stage(year): for stage in range(0, 38): clicks = browser.find_elements(By.CSS_SELECTOR, '#showRound tr .lsm2') if (len(clicks) != 38): print("页面按钮个数不对" + str(len(clicks))) sys.exit() clicks[stage].click() check = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, '#Table3'))) time.sleep(random.randint(3, 8))