def __get_data(self): proxy = get_ip() if not self.queue.empty(): events_ID, ball = self.queue.get() # 分析页面 try: if proxy: resp = requests.get('https://live.leisu.com/shujufenxi-' + str(events_ID), proxies={ 'https': 'https://' + proxy, 'http': 'http://' + proxy }, headers=HEADERS, allow_redirects=False).text else: resp = requests.get('https://live.leisu.com/shujufenxi-' + str(events_ID), headers=HEADERS, allow_redirects=False).text if ball == 'since-trend': self.__get_trend(resp, events_ID, ball) elif ball == 'injury-situation': self.__get_situation(resp, events_ID, ball) elif ball == 'league-points': self.__get_league_points(resp, events_ID, ball) else: self.__get_historical(ball, events_ID, resp) # 调度队列的tesk_done方法 self.queue.task_done() except requests.exceptions.ProxyError: pass
def get_response(self): if not self.queue.empty(): ID = self.queue.get() proxy = get_ip() try: if proxy: response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS, timeout=3) else: response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), headers=HEADERS, timeout=3) html = etree.HTML(response.text) msg = {'赛事ID': ID} # 联赛积分 league_points = self._league_points(html) msg['联赛积分'] = league_points # 技术统计 technical_statistics = self._technical_statistics(html) msg['技术统计'] = technical_statistics # 近期战绩 historical = self._historical(html) msg['近期战绩'] = historical # 近期战绩 recent_record = self._recent_record(html) msg['近期战绩'] = recent_record # 未来赛程 fixture = self._fixture(html) msg['未来赛程'] = fixture print(msg) # 插入数据库 self.mongo.insert_one(msg, '赛事ID') # 调度队列的tesk_done方法 self.queue.task_done() except requests.exceptions.ProxyError: self.queue.get(ID)
def get_response(self): if not self.queue.empty(): ID = self.queue.get() proxy = get_ip() try: if proxy: try: response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS, allow_redirects=False) except: response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS, allow_redirects=False, verify=False) else: try: response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), headers=HEADERS, allow_redirects=False) except: response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), headers=HEADERS, allow_redirects=False, verify=False) html = etree.HTML(response.text) # 联赛积分 self._league_points(html, ID) # 技术统计 self._technical_statistics(html, ID) # 近期战绩 self._historical(html, ID) # 近期战绩 self._recent_record(html, ID) # 未来赛程 self._fixture(html, ID) # 调度队列的tesk_done方法 self.queue.task_done() except requests.exceptions.ProxyError: pass
def get_response(self): if not self.queue.empty(): ID = self.queue.get() proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/lanqiu/detail-{}'.format(ID), proxies={ 'https': 'https://' + proxy, 'http': 'http://' + proxy }, headers=HEADERS, allow_redirects=False).text else: response = requests.get( 'https://live.leisu.com/lanqiu/detail-{}'.format(ID), headers=HEADERS, allow_redirects=False).text html = etree.HTML(response) # 文字直播 self.get_text_broadcas(html, ID) # 球员信息 self.get_player(html, ID) # 调度队列的tesk_done方法 self.queue.task_done()
def get_home_data(self): try: proxy = get_ip() if proxy: response = requests.get('https://live.leisu.com/lanqiu', proxies={ 'https': 'https://' + proxy, 'http': 'http://' + proxy }, headers=HEADERS, allow_redirects=False, timeout=3).text else: response = requests.get('https://live.leisu.com/lanqiu', headers=HEADERS, allow_redirects=False, timeout=3).text html = etree.HTML(response) # 正在比赛的数据 lives = html.xpath('//div[@id="live"]/ul/li') self._model(lives, 'basketball_live') # 未开始比赛的数据 notStart = html.xpath('//div[@id="notStart"]/ul/li') self._model(notStart, 'basketball_notStart') # 已经完成的比赛数据 finished = html.xpath('//div[@id="finished"]/ul/li') self._model(finished, 'basketball_finished') except Exception as e: print(e)
def __dispose_one_data(self): proxy = get_ip() if not self.queue.empty(): ID = self.queue.get() # 用于储存数据的 # 文字直播及图片内信息 info = self._text_broadcast(ID) # 球队信息 info['球队信息'] = self._teams_info(ID) # 插入赛事ID info['赛事ID'] = ID self.mongo.insert_one(info, '赛事ID')
def __dispose_one_data(self): proxy = get_ip() if not self.queue.empty(): msg = self.queue.get() try: if proxy: response = requests.get('https://live.leisu.com/detail-' + str(msg['赛事ID']), proxies={ 'https': 'https://' + proxy }, headers=HEADERS, allow_redirects=False).text else: response = requests.get('https://live.leisu.com/detail-' + str(msg['赛事ID']), headers=HEADERS, allow_redirects=False).text # print(response) # 文字解说 narrate_ = re.findall('EVENT=(.*?])', response) if narrate_: narrate = narrate_[0].replace('雷速体育', '我们') else: narrate = '' html = etree.HTML(response) # 比分 try: score = f'''{html.xpath('//div[@class="score home"]/text()')[0]}-{html.xpath('//div[@class="score away"]/text()')[0]}''' except: score = '' # 半场 half_score_ = html.xpath('//span[@class ="half-score"]/text()') if half_score_: half_score = half_score_[0] else: half_score = '' # 角球 lab_data = html.xpath( '//span[@class="lab corner"]/span[@class="text"]/text()') if lab_data: lab_corner = lab_data[0] + '-' + lab_data[1] else: lab_corner = '' msg['半场'] = half_score msg['角球'] = lab_corner msg['解说'] = narrate self.mongo.insert_one(msg, '赛事ID') except requests.exceptions.ProxyError: pass
def get_data(self): # 发起请求 获取网页源代码 try: proxy = get_ip() if proxy: response = requests.get(self.url, headers=HEADERS, proxies={ 'https': 'https://' + proxy }, allow_redirects=False, timeout=3).text else: response = requests.get(self.url, headers=HEADERS, allow_redirects=False).text # 正则获取 js 数据 result = re.findall('THATDATA=(.*})', response) # 转 JSON 格式 data = json.loads(result[1]) # 球队信息 teams = data['teams'] # 赛事 events = data['events'] # 正在比赛的信息 live = data['matchesTrans']['live'] self._dispose_live('football_live', events, live, teams) # 未开始的比赛 notStart = data['matchesTrans']['notStart'] self._dispose_live('football_notStart', events, notStart, teams) # 已完成的比赛 finished = data['matchesTrans']['finished'] self._dispose_live('football_finished', events, finished, teams) # 其他 other = data['matchesTrans']['other'] self._dispose_live('football_other', events, other, teams) except: pass
def get_response(self): if not self.queue.empty(): ID = self.queue.get() proxy = get_ip() if proxy: response = requests.get('https://api.namitiyu.com/v1/basketball/match/detail?sid={}&lang=zh'.format(ID), proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS).json() else: response = requests.get('https://api.namitiyu.com/v1/basketball/match/detail?sid={}&lang=zh'.format(ID), headers=HEADERS).json() msg = {} msg['赛事ID'] = ID # 文字直播 dic_text = self.get_text_broadcas(response) msg['文字直播'] = dic_text # 球员信息 dic_players = self.get_player(response) msg['球员信息'] = dic_players # 插入数据库 self.mongo.insert_one(msg, '赛事ID') # 调度队列的tesk_done方法 self.queue.task_done()
def _future_one_data(self, day): date = time.strftime('%Y%m%d', time.localtime(time.time() + day * 24 * 3600)) proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/lanqiu/saicheng?date={}'.format(date), headers=HEADERS, proxies={ 'http': 'https://' + proxy }).text else: response = requests.get( 'https://live.leisu.com/lanqiu/saicheng?date={}'.format(date), headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('//ul[@class="layout-grid-list"]/li') for data in datas: eventID = data.xpath('./@data-id')[0] # 赛事ID time_ = data.xpath('.//span[@class="time"]/text()') # 时间 if time_: times = date + time_[0] else: times = '' type_ = data.xpath('.//span[@class="no-state"]/span/text()') # 状态 if type_: types = type_[0] else: types = '' event = data.xpath( './/div[@class="list-right"]/div[1]/div[1]/span/span/text()')[ 0] # 赛事 # 主场信息 home_team_ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()' ) # 队名 if home_team_: home_team = home_team_[0] else: home_team = '' home_team_logo__ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style' ) if home_team_logo__: home_team_logo_ = home_team_logo__[0] else: home_team_logo_ = '' if home_team_logo_: home_team_logo = 'https:' + re.findall( 'url\((.*?)\?', home_team_logo_)[0] # 队logo else: home_team_logo = '' home_info_1234 = data.xpath( './/div[@class="r-left"]/div[1]/div[2]/div/text()') # 1234 home_shangxia_ = data.xpath( './/div[@class="r-left"]/div[1]/div[3]/text()') # 上下 if home_shangxia_: home_shangxia = home_shangxia_[0] else: home_shangxia = '' home_quanchang_ = data.xpath( './/div[@class="r-left"]/div[1]/b/text()') # 全场 if home_quanchang_: home_quanchang = home_quanchang_[0] else: home_quanchang = '' home_fencha_ = data.xpath( './/div[@class="r-left"]/div[1]/div[4]/text()') # 分差 if home_fencha_: home_fencha = home_fencha_[0] else: home_fencha = '' home_zongfen_ = data.xpath( './/div[@class="r-left"]/div[1]/div[5]/text()') # 总分 if home_zongfen_: home_zongfen = home_zongfen_[0] else: home_zongfen = '' home_ouzhi_ = data.xpath( './/div[@class="r-left"]/div[1]/div[6]//span[@class="exponent"]/span[@class="text"]/text()' ) # 欧指 if home_ouzhi_: home_ouzhi = home_ouzhi_[0] else: home_ouzhi = '' home_rangfen1_ = data.xpath( './/div[@class="r-left"]/div[1]/div[7]/div[1]/text()') # 让分 if home_rangfen1_: home_rangfen1 = home_rangfen1_[0] else: home_rangfen1 = '' home_rangfen2_ = data.xpath( './/div[@class="r-left"]/div[1]/div[7]//span[@class="exponent"]/span/text()' ) # 让分 if home_rangfen2_: home_rangfen2 = home_rangfen2_[0] else: home_rangfen2 = '' home_rangfen = home_rangfen1 + ' ' + home_rangfen2 # 让分 home_hefen1_ = data.xpath( './/div[@class="r-left"]/div[1]/div[8]/div[1]/text()') # 总分 if home_hefen1_: home_hefen1 = home_hefen1_[0] else: home_hefen1 = '' home_hefen2_ = data.xpath( './/div[@class="r-left"]/div[1]/div[8]//span[@class="exponent"]/span/text()' ) # 总分 if home_hefen2_: home_hefen2 = home_hefen2_[0] else: home_hefen2 = '' home_hefen = home_hefen1 + ' ' + home_hefen2 # 客场信息 away_team_ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()' ) # 队名 if away_team_: away_team = away_team_[0] else: away_team = '' away_team_logo__ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style' ) if away_team_logo__: away_team_logo_ = away_team_logo__[0] else: away_team_logo_ = '' if away_team_logo_: away_team_logo = 'https:' + re.findall( 'url\((.*?)\?', away_team_logo_)[0] # 队logo else: away_team_logo = '' away_info_1234 = data.xpath( './/div[@class="r-left"]/div[2]/div[2]/div/text()') # 1234 away_shangxia_ = data.xpath( './/div[@class="r-left"]/div[2]/div[3]/text()') # 上下 if away_shangxia_: away_shangxia = away_shangxia_[0] else: away_shangxia = '' away_quanchang_ = data.xpath( './/div[@class="r-left"]/div[2]/b/text()') # 全场 if away_quanchang_: away_quanchang = away_quanchang_[0] else: away_quanchang = '' away_fencha_ = data.xpath( './/div[@class="r-left"]/div[2]/div[4]/text()') # 分差 if away_fencha_: away_fencha = away_fencha_[0] else: away_fencha = '' away_zongfen_ = data.xpath( './/div[@class="r-left"]/div[2]/div[5]/text()') # 总分 if away_zongfen_: away_zongfen = away_zongfen_[0] else: away_zongfen = '' away_ouzhi_ = data.xpath( './/div[@class="r-left"]/div[2]/div[6]//span[@class="exponent"]/span[@class="text"]/text()' ) # 欧指 if away_ouzhi_: away_ouzhi = away_ouzhi_[0] else: away_ouzhi = '' away_rangfen1_ = data.xpath( './/div[@class="r-left"]/div[2]/div[7]/div[1]/text()') # 让分 if away_rangfen1_: away_rangfen1 = away_rangfen1_[0] else: away_rangfen1 = '' away_rangfen2_ = data.xpath( './/div[@class="r-left"]/div[2]/div[7]//span[@class="exponent"]/span/text()' ) # 让分 if away_rangfen2_: away_rangfen2 = away_rangfen2_[0] else: away_rangfen2 = '' away_rangfen = away_rangfen1 + ' ' + away_rangfen2 away_hefen1_ = data.xpath( './/div[@class="r-left"]/div[2]/div[8]/div[1]/text()') # 总分 if away_hefen1_: away_hefen1 = away_hefen1_[0] else: away_hefen1 = '' away_hefen2_ = data.xpath( './/div[@class="r-left"]/div[2]/div[8]//span[@class="exponent"]/span/text()' ) if away_hefen2_: away_hefen2 = away_hefen2_[0] else: away_hefen2 = '' away_hefen = away_hefen1 + ' ' + away_hefen2 dic = { '赛事ID': eventID, '赛事': event, '时间': times, '状态': types, '主队': { '队名': home_team, '队logo': home_team_logo, '1234': home_info_1234, '上下': home_shangxia, '全场': home_quanchang, '分差': home_fencha, '总分': home_zongfen, '欧指': home_ouzhi, '让分': home_rangfen, '合分': home_hefen }, '客队': { '队名': away_team, '队logo': away_team_logo, '1234': away_info_1234, '上下': away_shangxia, '全场': away_quanchang, '分差': away_fencha, '总分': away_zongfen, '欧指': away_ouzhi, '让分': away_rangfen, '合分': away_hefen } } # 保存数据库 # if dic['赛事'] != 0: self.mongo.insert_one(dic, '赛事ID') self.redis.insert_one('basketball_history_events', eventID, eventID)
def __get_data(self): proxy = get_ip() if not self.queue.empty(): eventID = self.queue.get() msg = {'msg': []} msg['赛事ID'] = eventID if proxy: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', proxies={ 'http': 'https://' + proxy }, headers=HEADERS, allow_redirects=False).text else: response = requests.get( f'https://live.leisu.com/3in1-{str(eventID)}', headers=HEADERS, allow_redirects=False).text html = etree.HTML(response) datas = html.xpath('/html/body/div[1]/div[3]//tr')[1:] for data in datas: name = data.xpath('./td[2]/span[2]/text()')[0].strip() if not name: name = 'Bet365' # name = 'https:' + data.xpath('./td[2]/span[2]/img/@src')[0].strip() # 欧指 ouzhi_1 = data.xpath('./td[3]/div[1]/span/text()') # 主胜 if not ouzhi_1: ouzhi_1 = ['', '', ''] ouzhi_2 = data.xpath( './td[3]/div[2]/span/span/span/text()') # 主胜 if not ouzhi_2: ouzhi_2 = ['', '', ''] # 让球 rangqiu_1 = data.xpath('./td[4]/div[1]/span/text()') if not rangqiu_1: rangqiu_1 = ['', '', ''] rangqiu_2 = data.xpath('./td[4]/div[2]/span/span/span/text()') if not rangqiu_2: rangqiu_2 = ['', ''] rangqiu_3 = data.xpath('./td[4]/div[2]/span/span[2]/text()') if not rangqiu_3: rangqiu_3 = [''] # 进球数 jiqiushu_1 = data.xpath('./td[5]/div[1]/span/text()') if not jiqiushu_1: jiqiushu_1 = ['', '', ''] jiqiushu_2 = data.xpath('./td[5]/div[2]/span/span/span/text()') if not jiqiushu_2: jiqiushu_2 = ['', ''] jiqiushu_3 = data.xpath('./td[5]/div[2]/span/span[2]/text()') if not jiqiushu_3: jiqiushu_3 = [''] dic = {f'{name}': [{'欧指': {'主胜': [ouzhi_1[0].strip(), ouzhi_2[0].strip()], '和局': [ouzhi_1[1].strip(), ouzhi_2[1].strip()], '客胜': [ouzhi_1[2].strip(), ouzhi_2[2].strip()]}, \ '让球': {'主胜': [rangqiu_1[0].strip(), rangqiu_2[0].strip()], '盘口': [rangqiu_1[1].strip(), rangqiu_3[0].strip()], '客胜': [rangqiu_1[2].strip(), rangqiu_2[1].strip()]}, \ '进球数': {'大球': [jiqiushu_1[0].strip(), jiqiushu_2[0].strip()], '和局': [jiqiushu_1[1].strip(), jiqiushu_3[0].strip()], '小球': [jiqiushu_1[2].strip(), jiqiushu_2[1].strip()]}}]} msg['msg'].append(dic) # 保存数据 if msg['msg']: self.mongo_index.insert_one(msg, '赛事ID')
def _history_one_data(self, day): date = time.strftime('%Y%m%d', time.localtime(time.time() + day * 24 * 3600)) proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/wanchang?date={}'.format(date), headers=HEADERS, proxies={ 'http': 'https://' + proxy }).text else: response = requests.get( 'https://live.leisu.com/wanchang?date={}'.format(date), headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('//*[@id="finished"]/ul/li') for data in datas: event_ID = data.xpath('./@data-id')[0] # 赛事ID event_LOGO_ = data.xpath( './/span[@class="lab-events"]/span/@style') # 赛事LOGO if event_LOGO_: event_LOGO = 'https:' + re.findall('url\((.*?)\?', event_LOGO_[0])[0] else: event_LOGO = '' event_ = data.xpath('.//a[@class="event-name"]/span/text()') # 赛事 if event_: event = event_[0] else: event = '' count_ = data.xpath('.//span[@class="lab-round"]/text()') # 轮次 if count_: count = count_[0] else: count = '' event_time_ = data.xpath( './/span[@class="lab-time"]/text()') # 比赛时间 if event_time_: event_time = date + event_time_[0] else: event_time = '' team_home_ = data.xpath( './/span[@class="lab-team-home"]/span/a/text()') # 主场球队 if team_home_: team_home = team_home_[0] else: team_home = '' team_home_ID_ = data.xpath( './/span[@class="lab-team-home"]/span/a/@href') # 主场球队ID if team_home_ID_: team_home_ID = team_home_ID_[0].split('-')[-1] else: team_home_ID = '' score_ = data.xpath('.//span[@class="score"]/b/text()') # 比分 if score_: score = score_[0] else: score = '' team_away_ = data.xpath( './/span[@class="lab-team-away"]/span/a/text()') # 客场球队 if team_away_: team_away = team_away_[0] else: team_away = '' team_away_ID_ = data.xpath( './/span[@class="lab-team-away"]/span/a/@href') # 客场球队ID if team_away_ID_: team_away_ID = team_away_ID_[0].split('-')[-1] else: team_away_ID = '' lab_half_ = data.xpath('.//span[@class="lab-half"]/text()') # 半场 if lab_half_: lab_half = lab_half_[0] else: lab_half = '' lab_corner_ = data.xpath( './/span[@class="lab-corner"]/span/text()') # 角球 if lab_corner_: lab_corner = lab_corner_[0] else: lab_corner = '' lab_bet_dds_ = data.xpath( './/span[@class="lab-bet-odds"]/span/text()') # 胜负 if lab_bet_dds_: lab_bet_dds = lab_bet_dds_[0] else: lab_bet_dds = '' lab_ratel_ = data.xpath('.//span[@class="lab-ratel"]/text()') # 让球 if lab_ratel_: lab_ratel = lab_ratel_[0] else: lab_ratel = '' lab_size_ = data.xpath( './/span[@class="lab-size"]/span/text()') # 进球数 if lab_size_: lab_size = lab_size_[0] else: lab_size = '' dic = { '赛事ID': event_ID, '赛事LOGO': event_LOGO, '赛事': event, '轮次': count, '比赛时间': event_time, '主场球队': team_home, '主场球队ID': team_home_ID, '比分': score, '客场球队': team_away, '客场球队ID': team_away_ID, '半场': lab_half, '角球': lab_corner, '胜负': lab_bet_dds, '让球': lab_ratel, '进球数': lab_size } # 保存数据库 self.mongo.insert_one(dic, '赛事ID') self.redis.insert_one('football_history_events', event_ID, event_ID)