def __init__(self): # 联赛积分 self.mongo = MongoDB('shujufenxi') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): self.redis = Redis_Pool() self.mongo = { 'home_infos': MongoDB('home_infos'), 'news_text_broadcas': MongoDB('news_text_broadcas'), 'new_players_info': MongoDB('new_players_info') } # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): # 联赛积分 self.mongo = { 'league-points': MongoDB('league-points'), 'technical-statistics': MongoDB('technical-statistics'), 'historical': MongoDB('historical'), 'recent-record': MongoDB('recent-record'), 'fixture': MongoDB('fixture') } self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
def __init__(self): self.redis = Redis_Pool() self.mongo = MongoDB('detail') # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool()
class NewSpider(object): def __init__(self): self.redis = Redis_Pool() self.mongo = MongoDB('detail') # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake) def get_ID(self, name): datas = self.redis.find(name) for k, v in datas.items(): try: ID = eval(k) self.queue.put(ID) except: pass for i in range(NEW_THREADING): if not self.queue.empty(): self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake) # time.sleep(2) # 守护线程 self.coroutine_pool.join() def get_response(self): if not self.queue.empty(): ID = self.queue.get() proxy = get_ip() if proxy: response = requests.get('https://api.namitiyu.com/v1/basketball/match/detail?sid={}&lang=zh'.format(ID), proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS).json() else: response = requests.get('https://api.namitiyu.com/v1/basketball/match/detail?sid={}&lang=zh'.format(ID), headers=HEADERS).json() msg = {} msg['赛事ID'] = ID # 文字直播 dic_text = self.get_text_broadcas(response) msg['文字直播'] = dic_text # 球员信息 dic_players = self.get_player(response) msg['球员信息'] = dic_players # 插入数据库 self.mongo.insert_one(msg, '赛事ID') # 调度队列的tesk_done方法 self.queue.task_done() def get_text_broadcas(self, response): '''文字直播''' msg = {'msg': {}} text_ls_data = response['data']['tlive'] if not text_ls_data: msg['msg'] = '暂无信息' else: for i in range(len(text_ls_data)): msg['msg']['第{}节'.format(i + 1)] = text_ls_data[i] return msg def get_player(self, response): '''获取球员信息''' msg = {'msg': {}} text_ls_data = response['data']['players'] if not text_ls_data: msg['msg'] = '暂无信息' else: for text_ls in text_ls_data[0:2]: for text in text_ls: text[4] = 'https://cdn.leisu.com/basketball/player/' + text[4] data = text[6].split('^') if data[-1] == 0: text[6] = '是^' + text[6][0:-4] msg['msg'] = text_ls_data return msg def run_today(self): threads = [] threads.append(threading.Thread(target=self.get_ID, args=('basketball_live',))) threads.append(threading.Thread(target=self.get_ID, args=('basketball_notStart',))) threads.append(threading.Thread(target=self.get_ID, args=('basketball_finished',))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() @classmethod def start(cls): st = cls() while True: st.run_today()
def __init__(self): # 实例化 self.mongo = MongoDB('home_page') self.redis = Redis_Pool()
class TimeDataSpider(object): def __init__(self): # 实例化 self.mongo = MongoDB('home_page') self.redis = Redis_Pool() def get_data(self, day): if day < 0: self._history_one_data(day) else: self._future_one_data(day) def _future_one_data(self, day): date = time.strftime('%Y%m%d', time.localtime(time.time() + day * 24 * 3600)) proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/lanqiu/saicheng?date={}'.format(date), headers=HEADERS, proxies={ 'http': 'https://' + proxy }).text else: response = requests.get( 'https://live.leisu.com/lanqiu/saicheng?date={}'.format(date), headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('//ul[@class="layout-grid-list"]/li') for data in datas: eventID = data.xpath('./@data-id')[0] # 赛事ID time_ = data.xpath('.//span[@class="time"]/text()') # 时间 if time_: times = date + time_[0] else: times = '' type_ = data.xpath('.//span[@class="no-state"]/span/text()') # 状态 if type_: types = type_[0] else: types = '' event = data.xpath( './/div[@class="list-right"]/div[1]/div[1]/span/span/text()')[ 0] # 赛事 # 主场信息 home_team_ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()' ) # 队名 if home_team_: home_team = home_team_[0] else: home_team = '' home_team_logo__ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style' ) if home_team_logo__: home_team_logo_ = home_team_logo__[0] else: home_team_logo_ = '' if home_team_logo_: home_team_logo = 'https:' + re.findall( 'url\((.*?)\?', home_team_logo_)[0] # 队logo else: home_team_logo = '' home_info_1234 = data.xpath( './/div[@class="r-left"]/div[1]/div[2]/div/text()') # 1234 home_shangxia_ = data.xpath( './/div[@class="r-left"]/div[1]/div[3]/text()') # 上下 if home_shangxia_: home_shangxia = home_shangxia_[0] else: home_shangxia = '' home_quanchang_ = data.xpath( './/div[@class="r-left"]/div[1]/b/text()') # 全场 if home_quanchang_: home_quanchang = home_quanchang_[0] else: home_quanchang = '' home_fencha_ = data.xpath( './/div[@class="r-left"]/div[1]/div[4]/text()') # 分差 if home_fencha_: home_fencha = home_fencha_[0] else: home_fencha = '' home_zongfen_ = data.xpath( './/div[@class="r-left"]/div[1]/div[5]/text()') # 总分 if home_zongfen_: home_zongfen = home_zongfen_[0] else: home_zongfen = '' home_ouzhi_ = data.xpath( './/div[@class="r-left"]/div[1]/div[6]//span[@class="exponent"]/span[@class="text"]/text()' ) # 欧指 if home_ouzhi_: home_ouzhi = home_ouzhi_[0] else: home_ouzhi = '' home_rangfen1_ = data.xpath( './/div[@class="r-left"]/div[1]/div[7]/div[1]/text()') # 让分 if home_rangfen1_: home_rangfen1 = home_rangfen1_[0] else: home_rangfen1 = '' home_rangfen2_ = data.xpath( './/div[@class="r-left"]/div[1]/div[7]//span[@class="exponent"]/span/text()' ) # 让分 if home_rangfen2_: home_rangfen2 = home_rangfen2_[0] else: home_rangfen2 = '' home_rangfen = home_rangfen1 + ' ' + home_rangfen2 # 让分 home_hefen1_ = data.xpath( './/div[@class="r-left"]/div[1]/div[8]/div[1]/text()') # 总分 if home_hefen1_: home_hefen1 = home_hefen1_[0] else: home_hefen1 = '' home_hefen2_ = data.xpath( './/div[@class="r-left"]/div[1]/div[8]//span[@class="exponent"]/span/text()' ) # 总分 if home_hefen2_: home_hefen2 = home_hefen2_[0] else: home_hefen2 = '' home_hefen = home_hefen1 + ' ' + home_hefen2 # 客场信息 away_team_ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()' ) # 队名 if away_team_: away_team = away_team_[0] else: away_team = '' away_team_logo__ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style' ) if away_team_logo__: away_team_logo_ = away_team_logo__[0] else: away_team_logo_ = '' if away_team_logo_: away_team_logo = 'https:' + re.findall( 'url\((.*?)\?', away_team_logo_)[0] # 队logo else: away_team_logo = '' away_info_1234 = data.xpath( './/div[@class="r-left"]/div[2]/div[2]/div/text()') # 1234 away_shangxia_ = data.xpath( './/div[@class="r-left"]/div[2]/div[3]/text()') # 上下 if away_shangxia_: away_shangxia = away_shangxia_[0] else: away_shangxia = '' away_quanchang_ = data.xpath( './/div[@class="r-left"]/div[2]/b/text()') # 全场 if away_quanchang_: away_quanchang = away_quanchang_[0] else: away_quanchang = '' away_fencha_ = data.xpath( './/div[@class="r-left"]/div[2]/div[4]/text()') # 分差 if away_fencha_: away_fencha = away_fencha_[0] else: away_fencha = '' away_zongfen_ = data.xpath( './/div[@class="r-left"]/div[2]/div[5]/text()') # 总分 if away_zongfen_: away_zongfen = away_zongfen_[0] else: away_zongfen = '' away_ouzhi_ = data.xpath( './/div[@class="r-left"]/div[2]/div[6]//span[@class="exponent"]/span[@class="text"]/text()' ) # 欧指 if away_ouzhi_: away_ouzhi = away_ouzhi_[0] else: away_ouzhi = '' away_rangfen1_ = data.xpath( './/div[@class="r-left"]/div[2]/div[7]/div[1]/text()') # 让分 if away_rangfen1_: away_rangfen1 = away_rangfen1_[0] else: away_rangfen1 = '' away_rangfen2_ = data.xpath( './/div[@class="r-left"]/div[2]/div[7]//span[@class="exponent"]/span/text()' ) # 让分 if away_rangfen2_: away_rangfen2 = away_rangfen2_[0] else: away_rangfen2 = '' away_rangfen = away_rangfen1 + ' ' + away_rangfen2 away_hefen1_ = data.xpath( './/div[@class="r-left"]/div[2]/div[8]/div[1]/text()') # 总分 if away_hefen1_: away_hefen1 = away_hefen1_[0] else: away_hefen1 = '' away_hefen2_ = data.xpath( './/div[@class="r-left"]/div[2]/div[8]//span[@class="exponent"]/span/text()' ) if away_hefen2_: away_hefen2 = away_hefen2_[0] else: away_hefen2 = '' away_hefen = away_hefen1 + ' ' + away_hefen2 dic = { '赛事ID': eventID, '赛事': event, '时间': times, '状态': types, '主队': { '队名': home_team, '队logo': home_team_logo, '1234': home_info_1234, '上下': home_shangxia, '全场': home_quanchang, '分差': home_fencha, '总分': home_zongfen, '欧指': home_ouzhi, '让分': home_rangfen, '合分': home_hefen }, '客队': { '队名': away_team, '队logo': away_team_logo, '1234': away_info_1234, '上下': away_shangxia, '全场': away_quanchang, '分差': away_fencha, '总分': away_zongfen, '欧指': away_ouzhi, '让分': away_rangfen, '合分': away_hefen } } # 保存数据库 # if dic['赛事'] != 0: self.mongo.insert_one(dic, '赛事ID') self.redis.insert_one('basketball_history_events', eventID, eventID) def _history_one_data(self, day): date = time.strftime('%Y%m%d', time.localtime(time.time() + day * 24 * 3600)) proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/lanqiu/wanchang?date={}'.format(date), headers=HEADERS, proxies={ 'http': 'https://' + proxy }).text else: response = requests.get( 'https://live.leisu.com/lanqiu/wanchang?date={}'.format(date), headers=HEADERS).text html = etree.HTML(response) datas = html.xpath('//ul[@class="layout-grid-list"]/li') for data in datas: eventID = data.xpath('./@data-id')[0] # 赛事ID time_ = data.xpath('.//span[@class="time"]/text()') # 时间 if time_: times = date + time_[0] else: times = '' type_ = data.xpath('.//span[@class="no-state"]/span/text()') # 状态 if type_: types = type_[0] else: types = '' event_ = data.xpath( './/div[@class="list-right"]/div[1]/div[1]/span/span/text()' ) # 赛事 if event_: event = event_[0] else: event = 0000 # 主场信息 home_team_ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()' ) # 队名 if home_team_: home_team = home_team_[0] else: home_team = '' home_team_logo__ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style' ) if home_team_logo__: home_team_logo_ = home_team_logo__[0] else: home_team_logo_ = '' if home_team_logo_: home_team_logo = 'https:' + re.findall( 'url\((.*?)\?', home_team_logo_)[0] # 队logo else: home_team_logo = '' home_info_1234 = data.xpath( './/div[@class="r-left"]/div[1]/div[2]/div/text()') # 1234 home_shangxia_ = data.xpath( './/div[@class="r-left"]/div[1]/div[3]/text()') # 上下 if home_shangxia_: home_shangxia = home_shangxia_[0] else: home_shangxia = '' home_quanchang_ = data.xpath( './/div[@class="r-left"]/div[1]/b/text()') # 全场 if home_quanchang_: home_quanchang = home_quanchang_[0] else: home_quanchang = '' home_fencha_ = data.xpath( './/div[@class="r-left"]/div[1]/div[4]/text()') # 分差 if home_fencha_: home_fencha = home_fencha_[0] else: home_fencha = '' home_zongfen_ = data.xpath( './/div[@class="r-left"]/div[1]/div[5]/text()') # 总分 if home_zongfen_: home_zongfen = home_zongfen_[0] else: home_zongfen = '' home_ouzhi_ = data.xpath( './/div[@class="r-left"]/div[1]/div[6]//span[@class="exponent"]/span[@class="text"]/text()' ) # 欧指 if home_ouzhi_: home_ouzhi = home_ouzhi_[0] else: home_ouzhi = '' home_rangfen1_ = data.xpath( './/div[@class="r-left"]/div[1]/div[7]/div[1]/text()') # 让分 if home_rangfen1_: home_rangfen1 = home_rangfen1_[0] else: home_rangfen1 = '' home_rangfen2_ = data.xpath( './/div[@class="r-left"]/div[1]/div[7]//span[@class="exponent"]/span/text()' ) # 让分 if home_rangfen2_: home_rangfen2 = home_rangfen2_[0] else: home_rangfen2 = '' home_rangfen = home_rangfen1 + ' ' + home_rangfen2 # 让分 home_hefen1_ = data.xpath( './/div[@class="r-left"]/div[1]/div[8]/div[1]/text()') # 总分 if home_hefen1_: home_hefen1 = home_hefen1_[0] else: home_hefen1 = '' home_hefen2_ = data.xpath( './/div[@class="r-left"]/div[1]/div[8]//span[@class="exponent"]/span/text()' ) # 总分 if home_hefen2_: home_hefen2 = home_hefen2_[0] else: home_hefen2 = '' home_hefen = home_hefen1 + ' ' + home_hefen2 # 客场信息 away_team_ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//span[@class="lang"]/text()' ) # 队名 if away_team_: away_team = away_team_[0] else: away_team = '' away_team_logo__ = data.xpath( './/div[@class="r-left"]/div[1]/div[1]//i[@class="ico"]/@style' ) if away_team_logo__: away_team_logo_ = away_team_logo__[0] else: away_team_logo_ = '' if away_team_logo_: away_team_logo = 'https:' + re.findall( 'url\((.*?)\?', away_team_logo_)[0] # 队logo else: away_team_logo = '' away_info_1234 = data.xpath( './/div[@class="r-left"]/div[2]/div[2]/div/text()') # 1234 away_shangxia_ = data.xpath( './/div[@class="r-left"]/div[2]/div[3]/text()') # 上下 if away_shangxia_: away_shangxia = away_shangxia_[0] else: away_shangxia = '' away_quanchang_ = data.xpath( './/div[@class="r-left"]/div[2]/b/text()') # 全场 if away_quanchang_: away_quanchang = away_quanchang_[0] else: away_quanchang = '' away_fencha_ = data.xpath( './/div[@class="r-left"]/div[2]/div[4]/text()') # 分差 if away_fencha_: away_fencha = away_fencha_[0] else: away_fencha = '' away_zongfen_ = data.xpath( './/div[@class="r-left"]/div[2]/div[5]/text()') # 总分 if away_zongfen_: away_zongfen = away_zongfen_[0] else: away_zongfen = '' away_ouzhi_ = data.xpath( './/div[@class="r-left"]/div[2]/div[6]//span[@class="exponent"]/span[@class="text"]/text()' ) # 欧指 if away_ouzhi_: away_ouzhi = away_ouzhi_[0] else: away_ouzhi = '' away_rangfen1_ = data.xpath( './/div[@class="r-left"]/div[2]/div[7]/div[1]/text()') # 让分 if away_rangfen1_: away_rangfen1 = away_rangfen1_[0] else: away_rangfen1 = '' away_rangfen2_ = data.xpath( './/div[@class="r-left"]/div[2]/div[7]//span[@class="exponent"]/span/text()' ) # 让分 if away_rangfen2_: away_rangfen2 = away_rangfen2_[0] else: away_rangfen2 = '' away_rangfen = away_rangfen1 + ' ' + away_rangfen2 away_hefen1_ = data.xpath( './/div[@class="r-left"]/div[2]/div[8]/div[1]/text()') # 总分 if away_hefen1_: away_hefen1 = away_hefen1_[0] else: away_hefen1 = '' away_hefen2_ = data.xpath( './/div[@class="r-left"]/div[2]/div[8]//span[@class="exponent"]/span/text()' ) if away_hefen2_: away_hefen2 = away_hefen2_[0] else: away_hefen2 = '' away_hefen = away_hefen1 + ' ' + away_hefen2 dic = { '赛事ID': eventID, '赛事': event, '时间': times, '状态': types, '主队': { '队名': home_team, '队logo': home_team_logo, '1234': home_info_1234, '上下': home_shangxia, '全场': home_quanchang, '分差': home_fencha, '总分': home_zongfen, '欧指': home_ouzhi, '让分': home_rangfen, '合分': home_hefen }, '客队': { '队名': away_team, '队logo': away_team_logo, '1234': away_info_1234, '上下': away_shangxia, '全场': away_quanchang, '分差': away_fencha, '总分': away_zongfen, '欧指': away_ouzhi, '让分': away_rangfen, '合分': away_hefen } } # 保存数据库 # if dic['赛事'] != 0: self.mongo.insert_one(dic, '赛事ID') self.redis.insert_one('basketball_future_events', eventID, eventID) def run(self): threads = [] for i in range(-3, 6): if i != 0: thread = threading.Thread(target=self.get_data, args=(i, )) threads.append(thread) # 启动线程 thread.start() else: continue # 守护线程 for thread in threads: thread.join() # self.get_data(-2) @classmethod def start(cls): st = cls() def ss(): if time.strftime('%H', time.localtime(time.time())) == '00': st.run() st.run() # 每隔一段时间执行一次run方法 schedule.every(600).seconds.do(ss) while True: schedule.run_pending() time.sleep(1)
class DataSpider(object): def __init__(self): # 联赛积分 self.mongo = MongoDB('shujufenxi') self.redis = Redis_Pool() # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake) def get_ID(self, name): datas = self.redis.find(name) for k, v in datas.items(): ID = eval(k) self.queue.put(ID) for i in range(DATA_THREADING): if not self.queue.empty(): self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake) # 守护线程 self.coroutine_pool.join() def get_response(self): if not self.queue.empty(): ID = self.queue.get() proxy = get_ip() try: if proxy: response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS, timeout=3) else: response = requests.get('https://live.leisu.com/lanqiu/shujufenxi-{}'.format(ID), headers=HEADERS, timeout=3) html = etree.HTML(response.text) msg = {'赛事ID': ID} # 联赛积分 league_points = self._league_points(html) msg['联赛积分'] = league_points # 技术统计 technical_statistics = self._technical_statistics(html) msg['技术统计'] = technical_statistics # 近期战绩 historical = self._historical(html) msg['近期战绩'] = historical # 近期战绩 recent_record = self._recent_record(html) msg['近期战绩'] = recent_record # 未来赛程 fixture = self._fixture(html) msg['未来赛程'] = fixture print(msg) # 插入数据库 self.mongo.insert_one(msg, '赛事ID') # 调度队列的tesk_done方法 self.queue.task_done() except requests.exceptions.ProxyError: self.queue.get(ID) def _league_points(self, html): '''联赛积分''' msg = {'联赛积分': []} datas = html.xpath('//div[@id="league-points"]/div[2]/div') if len(datas) < 2: msg['联赛积分'] = '暂无数据' else: for data in datas: dic = {'msg': []} team = data.xpath('.//span[@class="name"]/text()')[0] dic['队名'] = team rank = data.xpath('.//div[@class="float-left f-s-12 color-999 line-h-25"]/text()')[0] dic['排名'] = rank das = data.xpath('.//tr')[1:] for da in das: type_ls = da.xpath('./td[1]/text()') # 类型 if type_ls: type = type_ls[0].strip() else: type = '' sai_ls = da.xpath('./td[2]/text()') # 赛 if sai_ls: sai = sai_ls[0].strip() else: sai = '' sheng_ls = da.xpath('./td[3]/text()') # 胜 if sheng_ls: sheng = sheng_ls[0].strip() else: sheng = '' fu_ls = da.xpath('./td[4]/text()') # 负 if fu_ls: fu = fu_ls[0].strip() else: fu = '' defen_ls = da.xpath('./td[5]/text()') # 得分 if defen_ls: defen = defen_ls[0].strip() else: defen = '' shifen_ls = da.xpath('./td[6]/text()') # 失分 if shifen_ls: shifen = shifen_ls[0].strip() else: shifen = '' jingshengfen_ls = da.xpath('./td[7]/text()') # 净胜分 if jingshengfen_ls: jingshengfen = jingshengfen_ls[0].strip() else: jingshengfen = '' paiming_ls = da.xpath('./td[8]/text()') # 排名 if paiming_ls: paiming = paiming_ls[0].strip() else: paiming = '' shenglv_ls = da.xpath('./td[9]/text()') # 胜率 if shenglv_ls: shenglv = shenglv_ls[0].strip() else: shenglv = '' dic['msg'].append({'类型': type, '赛': sai, '胜': sheng, '负': fu, '得分': defen, '失分': shifen, '净胜分': jingshengfen, '排名': paiming, '胜率': shenglv}) msg['联赛积分'].append(dic) return msg def _technical_statistics(self, html): '''技术统计''' msg = {'技术统计': []} datas = html.xpath('//div[@id="technical-statistics"]/div[2]/div') if not datas: msg['技术统计'] = '暂无数据' else: for data in datas[1:]: dic = {'msg': []} team = data.xpath('.//span[@class="name"]/text()')[0] dic['队名'] = team das = data.xpath('.//tr')[1:] for da in das: type_ls = da.xpath('./td[1]/text()') # 类型 if type_ls: type = type_ls[0].strip() else: type = '' sai_ls = da.xpath('./td[2]/text()') # 投篮命中率 if sai_ls: sai = sai_ls[0].strip() else: sai = '' sheng_ls = da.xpath('./td[3]/text()') # 三分命中率 if sheng_ls: sheng = sheng_ls[0].strip() else: sheng = '' fu_ls = da.xpath('./td[4]/text()') # 平均篮板 if fu_ls: fu = fu_ls[0].strip() else: fu = '' defen_ls = da.xpath('./td[5]/text()') # 平均助攻 if defen_ls: defen = defen_ls[0].strip() else: defen = '' shifen_ls = da.xpath('./td[6]/text()') # 平均抢断 if shifen_ls: shifen = shifen_ls[0].strip() else: shifen = '' jingshengfen_ls = da.xpath('./td[7]/text()') # 平均失误 if jingshengfen_ls: jingshengfen = jingshengfen_ls[0].strip() else: jingshengfen = '' dic['msg'].append( {'类型': type, '投篮命中率': sai, '三分命中率': sheng, '平均篮板': fu, '平均助攻': defen, '平均抢断': shifen, '平均失误': jingshengfen}) msg['技术统计'].append(dic) return msg def _historical(self, html): '''历史交锋''' msg = {'历史交锋': []} datas = html.xpath('//div[@id="historical"]/div[2]//tr') if not datas: msg['历史交锋'] = '暂无数据' else: for data in datas[1:]: event = data.xpath('./td[1]/a/text()')[0] # 赛事 time = data.xpath('./td[2]/text()')[0].strip() # 比赛时间 away_team = data.xpath('./td[3]/a/span/text()')[0] # 客队 score = str(data.xpath('./td[4]/a/span/text()')).replace(',', ':')[1:-1] # 比分 home_team = data.xpath('./td[5]/a/span/text()')[0] # 主队 shengfu = data.xpath('./td[6]/span/text()')[0].strip() # 胜负 fencha = data.xpath('./td[7]/text()')[0].strip() # 分差 rangfen = data.xpath('./td[8]/text()')[0].strip() # 让分 panlu_ls = data.xpath('./td[9]/span/text()') # 盘路 if panlu_ls: panlu = panlu_ls[0].strip() else: panlu = '' zongfen = data.xpath('./td[10]/text()')[0].strip() # 总分 zongfenpan = data.xpath('./td[11]/text()')[0].strip() # 总分盘 jinqiushu_ls = data.xpath('./td[12]/span/text()') # 进球数 if jinqiushu_ls: jinqiushu = jinqiushu_ls[0].strip() else: jinqiushu = '' msg['历史交锋'].append({'赛事': event, '比赛时间': time, '客队': away_team, '比分': score, '主队': home_team, '胜负': shengfu, '分差': fencha, '让分': rangfen, '盘路': panlu, '总分': zongfen, '总分盘': zongfenpan, '进球数': jinqiushu}) return msg def _recent_record(self, html): '''近期战绩''' msg = {'近期战绩': []} datas = html.xpath('//div[@id="recent-record"]/div[2]/div') # if not datas: # msg['近期战绩'] = '暂无数据' # else: for data in datas: team_ls = data.xpath('.//span[@class="name"]/text()') if team_ls: team = team_ls[0] else: team = '' dic = {'战队': team, 'msg': []} dats = data.xpath('.//tr')[1:] for dat in dats: event = dat.xpath('./td[1]/a/text()')[0] # 赛事 time = dat.xpath('./td[2]/text()')[0].strip() # 比赛时间 away_team = dat.xpath('./td[3]/a/span/text()')[0] # 客队 score = str(dat.xpath('./td[4]/a/span/text()')).replace(',', ':')[1:-1] # 比分 home_team = dat.xpath('./td[5]/a/span/text()')[0] # 主队 shengfu = dat.xpath('./td[6]/span/text()')[0].strip() # 胜负 fencha = dat.xpath('./td[7]/text()')[0].strip() # 分差 rangfen = dat.xpath('./td[8]/text()')[0].strip() # 让分 panlu_ls = dat.xpath('./td[9]/span/text()') # 盘路 if panlu_ls: panlu = panlu_ls[0].strip() else: panlu = '' zongfen = dat.xpath('./td[10]/text()')[0].strip() # 总分 zongfenpan = dat.xpath('./td[11]/text()')[0].strip() # 总分盘 jinqiushu_ls = dat.xpath('./td[12]/span/text()') # 进球数 if jinqiushu_ls: jinqiushu = jinqiushu_ls[0].strip() else: jinqiushu = '' dic['msg'].append( {'赛事': event, '比赛时间': time, '客队': away_team, '比分': score, '主队': home_team, '胜负': shengfu, '分差': fencha, '让分': rangfen, '盘路': panlu, '总分': zongfen, '总分盘': zongfenpan, '进球数': jinqiushu}) msg['近期战绩'].append(dic) return msg def _fixture(self, html): '''未来赛程''' msg = {'未来赛程': []} datas = html.xpath('//div[@id="fixture"]/div[2]/div') for data in datas: team_ls = data.xpath('.//span[@class="name"]/text()') if team_ls: team = team_ls[0] else: team = '' dic = {'战队': team, 'msg': []} dats = data.xpath('.//table/tr') if len(dats) > 1: for dat in dats[1:]: event = dat.xpath('./td[1]/span/text()')[0] # 赛事 time = dat.xpath('./td[2]/text()')[0].strip() # 比赛时间 away_team = dat.xpath('./td[3]/text()')[0] # 客队 home_team = dat.xpath('./td[4]/text()')[0] # 主队 time_speed = dat.xpath('./td[5]/text()')[0] # 与本场相隔 dic['msg'].append( {'赛事': event, '比赛时间': time, '客队': away_team, '主队': home_team, '与本场相隔': time_speed}) msg['未来赛程'].append(dic) return msg def run(self): threads = [] threads.append(threading.Thread(target=self.get_ID, args=('basketball_notStart',))) threads.append(threading.Thread(target=self.get_ID('basketball_finished',))) threads.append(threading.Thread(target=self.get_ID('basketball_live',))) threads.append(threading.Thread(target=self.get_ID('basketball_history_events',))) threads.append(threading.Thread(target=self.get_ID('basketball_future_events',))) # 开启线程 for thread in threads: thread.start() # 守护线程 for thread in threads: thread.join() @classmethod def start(cls): '''未开始或已经结束''' st = cls() st.run() # 每隔一段时间执行一次run方法 schedule.every(DATA_LIVE_TIME).seconds.do(st.run) while True: schedule.run_pending() time.sleep(1)
class NewSpider(object): def __init__(self): self.redis = Redis_Pool() self.mongo = { 'home_infos': MongoDB('home_infos'), 'news_text_broadcas': MongoDB('news_text_broadcas'), 'new_players_info': MongoDB('new_players_info') } # 创建队列和协程池 self.queue = Queue() self.coroutine_pool = Pool() def __chech_callbake(self, temp): '''异步回调函数''' if not self.queue.empty(): self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake) def get_ID(self): datas = self.redis.find('basketball_live') print(datas) for k, v in datas.items(): ID = eval(k) self.queue.put(ID) for i in range(NEW_THREADING): if not self.queue.empty(): self.coroutine_pool.apply_async(self.get_response, callback=self.__chech_callbake) # time.sleep(2) # 守护线程 self.coroutine_pool.join() def get_response(self): if not self.queue.empty(): ID = self.queue.get() proxy = get_ip() if proxy: response = requests.get( 'https://live.leisu.com/lanqiu/detail-{}'.format(ID), proxies={ 'https': 'https://' + proxy, 'http': 'http://' + proxy }, headers=HEADERS, allow_redirects=False).text else: response = requests.get( 'https://live.leisu.com/lanqiu/detail-{}'.format(ID), headers=HEADERS, allow_redirects=False).text html = etree.HTML(response) # 文字直播 self.get_text_broadcas(html, ID) # 球员信息 self.get_player(html, ID) # 调度队列的tesk_done方法 self.queue.task_done() def get_text_broadcas(self, html, ID): '''文字直播''' msg = {'文字直播': []} msg['赛事ID'] = ID datas = html.xpath( '//div[@class="nano-content"]/ul[@class="list-content"]/li') if not datas: msg['文字直播'] = '暂无信息' else: for data in datas: time = data.xpath('./div[@class="code"]/text()')[0] score = str( data.xpath('./div[@class="score"]/span/text()')).replace( ',', '-')[1:-1] tip = data.xpath('./div[@class="tip"]/text()')[0] msg['文字直播'].append({'时间': time, '比分': score, '文字描述': tip}) # 保存 self.mongo['news_text_broadcas'].insert_one(msg, '赛事ID') def get_player(self, html, ID): '''获取球员信息''' msg = {'人员信息': []} msg['赛事ID'] = ID datas = html.xpath('//div[@class="content clearfix"]/div') for data in datas[2:]: ms = {} team = data.xpath( './div[@class="logo-name"]/div[@class="name"]/text()') if team: ms['队名'] = team[0] else: ms['队名'] = '' # 获取球队得失信息 team_infos = self._get_info(data) ms['球队总体信息'] = team_infos # 获取球员信息 players_infos = [player for player in self._get_player_info(data)] ms['球员信息'] = players_infos # 添加列表 msg['人员信息'].append(ms) print(msg) # 保存 self.mongo['new_players_info'].insert_one(msg, '赛事ID') def _get_player_info(self, data): players = [] infos = data.xpath('./div[@class="sp-tb"]/div[@class="list"]/div') for info in infos[1:]: dic = {} beihao = info.xpath('./div[1]/span/text()') if beihao: dic['背号'] = beihao[0] else: dic['背号'] = '' name = info.xpath('./div[2]//span[@class="o-hidden name"]/text()') if name: dic['姓名'] = name[0] else: dic['姓名'] = '' shoufa = info.xpath('./div[3]/span/text()') if shoufa: dic['首发'] = shoufa[0] else: dic['首发'] = '' chuchangshijian = info.xpath('./div[4]/span/text()') if chuchangshijian: dic['出场时间'] = chuchangshijian[0] else: dic['出场时间'] = '' toulan = info.xpath('./div[5]/span/text()') if toulan: dic['投篮'] = toulan[0] else: dic['投篮'] = '' sanfen = info.xpath('./div[6]/span/text()') if sanfen: dic['三分'] = sanfen[0] else: dic['三分'] = '' faqiu = info.xpath('./div[7]/span/text()') if faqiu: dic['罚球'] = faqiu[0] else: dic['罚球'] = '' qianlanban = info.xpath('./div[8]/span/text()') if qianlanban: dic['前篮板'] = qianlanban[0] else: dic['前篮板'] = '' houlanban = info.xpath('./div[9]/span/text()') if houlanban: dic['后篮板'] = houlanban[0] else: dic['后篮板'] = '' zonglanban = info.xpath('./div[10]/span/text()') if zonglanban: dic['总篮板'] = zonglanban[0] else: dic['总篮板'] = '' zhugong = info.xpath('./div[11]/span/text()') if zhugong: dic['助攻'] = zhugong[0] else: dic['助攻'] = '' qiangduan = info.xpath('./div[12]/span/text()') if qiangduan: dic['抢断'] = qiangduan[0] else: dic['抢断'] = '' gaimao = info.xpath('./div[13]/span/text()') if gaimao: dic['盖帽'] = gaimao[0] else: dic['盖帽'] = '' shiwu = info.xpath('./div[14]/span/text()') if shiwu: dic['失误'] = shiwu[0] else: dic['失误'] = '' fangui = info.xpath('./div[15]/span/text()') if fangui: dic['犯规'] = fangui[0] else: dic['犯规'] = '' defen = info.xpath('./div[16]/span/text()') if defen: dic['得分'] = defen[0] else: dic['得分'] = '' yield dic players.append(dic) return players def _get_info(self, data): # 获取总体情况 dic = {} messages = data.xpath( './div[@class="sp-tb"]/div[@class="summary"]/div[@class="row totals "]' ) if not messages: pass else: defen = messages[0].xpath('./div[1]/span/text()') if defen: dic['得分'] = defen[0] else: dic['得分'] = '' zhugong = messages[0].xpath('./div[2]/span/text()') if zhugong: dic['助攻'] = zhugong[0] else: dic['助攻'] = '' lanban = messages[0].xpath('./div[3]/span/text()') if lanban: dic['篮板'] = lanban[0] else: dic['篮板'] = '' qianhoulanban = messages[0].xpath('./div[4]/span/text()') if qianhoulanban: dic['前-后篮板'] = qianhoulanban[0] else: dic['前-后篮板'] = '' duanqiang = messages[0].xpath('./div[5]/span/text()') if duanqiang: dic['抢断'] = duanqiang[0] else: dic['抢断'] = '' gaimao = messages[0].xpath('./div[6]/span/text()') if gaimao: dic['盖帽'] = gaimao[0] else: dic['盖帽'] = '' toulan = messages[0].xpath('./div[7]/span/text()') if toulan: dic['投篮( 中 - 投 )'] = toulan[0] else: dic['投篮( 中 - 投 )'] = '' sanfen = messages[0].xpath('./div[8]/span/text()') if sanfen: dic['三分'] = sanfen[0] else: dic['三分'] = '' faqiu = messages[0].xpath('./div[9]/span/text()') if faqiu: dic['罚球( 中 - 投 )'] = faqiu[0] else: dic['罚球( 中 - 投 )'] = '' shiwu = messages[0].xpath('./div[10]/span/text()') if shiwu: dic['失误'] = shiwu[0] else: dic['失误'] = '' fangui = messages[0].xpath('./div[11]/span/text()') if fangui: dic['犯规'] = fangui[0] else: dic['犯规'] = '' return dic @classmethod def start(cls): run = cls() while True: # try: run.get_ID()
def __init__(self): self.redis = Redis_Pool() self.mongo = MongoDB('home_info')
class HomeSpider(object): def __init__(self): self.redis = Redis_Pool() self.mongo = MongoDB('home_info') def get_home_data(self): try: proxy = get_ip() if proxy: response = requests.get('https://live.leisu.com/lanqiu', proxies={'https': 'https://'+proxy, 'http': 'http://'+proxy} , headers=HEADERS, allow_redirects=False, timeout=3).text else: response = requests.get('https://live.leisu.com/lanqiu', headers=HEADERS, allow_redirects=False, timeout=3).text html = etree.HTML(response) # 正在比赛的数据 lives = html.xpath('//div[@id="live"]/ul/li') self._model(lives, 'basketball_live') # 未开始比赛的数据 notStart = html.xpath('//div[@id="notStart"]/ul/li') self._model(notStart, 'basketball_notStart') # 已经完成的比赛数据 finished = html.xpath('//div[@id="finished"]/ul/li') self._model(finished, 'basketball_finished') except Exception as e: print(e) def _model(self, lives, name): data = self._get_dispose_datas(lives) # 写入之前清理数据 self.redis.delete(name) for dic in data: # 写入数据库 if dic['mgs']: self.redis.insert_one(name, dic['赛事ID'], str(dic)) self.mongo.insert_one(dic, '赛事ID') def _get_dispose_datas(self, lives): '''正在比赛的信息''' for live in lives: dic = {'mgs': []} events = live.xpath('.//div[@class="thead row"]/div[1]/span[1]/span/text()') if events: event = events[0] else: event = '' dic['赛事'] = event zhuangtai_1 = live.xpath('.//div[@class="thead row"]/div[1]/span[2]/text()') if not zhuangtai_1: zhuangtai_1 = '' else: zhuangtai_1 = zhuangtai_1[0] zhuangtai_2 = live.xpath('.//div[@class="thead row"]/div[1]/span[3]/text()') if not zhuangtai_2: zhuangtai_2 = '' else: zhuangtai_2 = zhuangtai_2[0] zhuangtai = zhuangtai_1 + ' ' + zhuangtai_2 dic['状态'] = zhuangtai times = live.xpath('.//span[@class="time"]/text()') if times: time = times[0] else: time = '' dic['时间'] = time try: eventID = re.findall('\d+', live.xpath('.//div[@class="d-row"]/div/div[@class="row"]/a/@href')[0])[0] except: eventID = '' dic['赛事ID'] = eventID datas = live.xpath('.//div[@class="d-row"]/div[@class="r-left"]/div') for data in datas: dat = data.xpath('./div[1]/i[@class="ico"]/@style') if dat: logo = 'https:' + re.findall('url\((.*?)\?', dat[0])[0] else: logo = '' home_team = data.xpath('./div[1]/span[1]/span/text()') if home_team: team = home_team[0] else: team = '' home_1234 = data.xpath('./div[2]/div/text()') home_shangxia = data.xpath('./div[3]/text()') if home_shangxia: shangxia = home_shangxia[0] else: shangxia = '' home_quanchang = data.xpath('./b/text()') if home_quanchang: quanchang = home_quanchang[0] else: quanchang = '' home_fencha = data.xpath('./div[4]/text()') if home_fencha: fencha = home_fencha[0] else: fencha = '' team_zongfen = data.xpath('./div[5]/text()') if team_zongfen: zongfen_ = team_zongfen[0] else: zongfen_ = '' home_ouzhi = data.xpath('./div[6]/span/span/text()') if home_ouzhi: ouzhi = home_ouzhi[0] else: ouzhi = '' home_rangfen = data.xpath('./div[7]/div[2]/span/span/text()') if home_rangfen: rangfen = home_rangfen[0] else: rangfen = '' home_zongfen = data.xpath('./div[8]/div[2]/span/span/text()') if home_zongfen: zongfen = home_zongfen[0] else: zongfen = '' dic['mgs'].append({'队名': team, 'LOGO': logo, '1234': home_1234, '上下': shangxia, '全场': quanchang, '分差': fencha, '队总分': zongfen_, '欧指':ouzhi, '让分': rangfen, '总分': zongfen}) yield dic @classmethod def start(cls): run = cls() while True: try: run.get_home_data() except requests.exceptions.ProxyError: pass