def summarization_once(self, index): """ get html from news """ print(index) texts = [] if index: url = 'https://www.baidu.com/s?ie=utf-8&mod=1&isbd=1&isid=919fab3c0002c9f1&wd=%E5%81%B7%E7%8B%97&pn=730&oq=%E5%81%B7%E7%8B%97&tn=baiduhome_pg&ie=utf-8&rsv_idx=2&rsv_pq=919fab3c0002c9f1&rsv_t=7e30ggF%2BMa91oOURk1bMtN8af5unSwOR08TodNBB%2F%2B6B6RBEwUi8l8IAe28ACA%2B8b5I5&gpc=stf%3D1517038564%2C1548574564%7Cstftype%3D1&tfflag=1&bs=%E5%81%B7%E7%8B%97&rsv_sid=undefined&_ss=1&clist=28bc21fb856a58b7%09350102124f079888%0928bc21fb856a58b7%0928bc2159845c1cf3%0928bc2015823fa56b%0928a121fb84a7d1a6&hsug=&f4s=1&csor=2&_cr1=34767&pn=' + \ str(index * 20) else: url = 'http://news.baidu.com/ns?rn=20&ie=utf-8&cl=2&ct=1&bs=%E6%AF%92%E7%8B%97%E8%82%89&rsv_bp=1&sr=0&f=8&prevct=no&tn=news&word=%E5%81%B7%E7%8B%97' news_lists = get_request_proxy(url, 0) if not news_lists: if can_retry(url): self.summarization_once(index) return summarization_lists = news_lists.find_all('div', class_='result') if not len(summarization_lists): if can_retry(url): self.summarization_once(index) return print('num: ', len(summarization_lists), url) for summarization in summarization_lists: temp_text = summarization.text.replace('\n', '').replace( '\xa0', '').replace('\t', '').strip() temp_text = ' '.join(temp_text.split()) texts.append(temp_text[:-8]) self.summarizations[int(index)] = texts
def get_song_detail(self, id): """ get song detail form playlist """ host = 'http://music.163.com/api/playlist/detail?id=' + str(id) json = get_request_proxy(host, 1) if json == 0: if can_retry(host): self.get_song_detail(id) return [] result = json['result'] tracks = result['tracks'] if len(tracks) <= 1: if can_retry(host): self.get_song_detail(id) return [] else: playcount = result['playCount'] for track in tracks: songid = track['id'] songname = track['name'] self.songlist.append([songid, songname, playcount]) self.finishlist.append(id)
def detail_once(self, index, url): """ get html from news """ # print(index) news_lists = get_request_proxy(url, 0) if not news_lists: if can_retry(url): self.detail_once(index, url) return test = news_lists.find_all('div', class_=[ 'article-content', 'mth-editor-content', 'con-news-art', 'Custom_UnionStyle' ]) if not len(test): test = self.cleantxt(news_lists.text) if not len(test): if can_retry(url): self.detail_once(index, url) return self.word_list[index] = test return word_list = ''.join([index.text for index in test]).replace('\u3000', '').replace('\n', '') self.word_list[int(index)] = word_list
def summarization_once(self, index): """ get html from news """ print(index) texts = [] hrefs = [] if index: url = 'https://www.google.com.hk/search?q=%E5%81%B7%E7%8B%97&newwindow=1&safe=strict&tbm=nws&ei=PcVKXJKRIc7s8AXB05e4Dw&sa=N&ved=0ahUKEwjSo5nBvojgAhVONrwKHcHpBfcQ8tMDCFE&biw=1627&bih=427&dpr=2&start=' + \ str(index * 10) else: url = 'https://www.google.com.hk/search?q=%E5%81%B7%E7%8B%97&newwindow=1&safe=strict&tbm=nws&ei=O8VKXJ7nFoP_8QX1oK_gDA&start=0&sa=N&ved=0ahUKEwje8JTAvojgAhWDf7wKHXXQC8w4ChDy0wMISQ&biw=1627&bih=427&dpr=2' news_lists = basic_req(url, 0) href_lists = news_lists.find_all('a', class_=['RTNUJf', 'l lLrAF']) summarization_lists = news_lists.find_all('div', class_='gG0TJc') if not len(href_lists) or not len(summarization_lists): if can_retry(url): self.summarization_once(index) return print('num: ', len(summarization_lists), url) for href in href_lists: hrefs.append(href['href']) for summarization in summarization_lists: temp_text = summarization.text.replace('\n', '').replace( '\xa0', '').replace('\t', '').replace('...', '').strip() temp_text = ' '.join(temp_text.split()) texts.append(temp_text) self.summarizations[int(index)] = texts self.hrefs[int(index)] = hrefs
def load_spot_once(self, pn=1, city_id=10186): ''' load spot once ''' data = { 'sAct': 'KMdd_StructWebAjax|GetPoisByTag', 'iMddid': city_id, 'iTagId': 0, 'iPage': pn, } data = self.load_sn(data) print(data) req = get_request_proxy(self.AJAX_ROUTER_URL, 11, data=data) if req is None or not 'data' in req or not 'list' in req['data']: if can_retry('{}{}{}'.format(self.AJAX_ROUTER_URL, city_id, pn)): self.load_spot_once(pn, city_id) return spot_list = req['data']['list'] spot_pn = req['data']['page'] spot_tmp = re.findall('<h3>.*?(.*?)</h3>', spot_list) try: total_pn = int(re.findall('共<span>(.*?)</span>', spot_pn)[0]) except Exception as e: total_pn = 1 echo(0, 'City id:', city_id, 'Page:', pn, spot_pn, e) if city_id not in self.spot_result: self.spot_result[city_id] = spot_tmp else: self.spot_result[city_id] += spot_tmp self.spot_pn[city_id] = total_pn
def get_goods_id_first(self, origin_url, index): """ get goods id first """ origin_url = origin_url.replace('https', 'http') # first_result = get_request_proxy(origin_url, 0) first_result = basic_req(origin_url, 0, header=self.headers) if not first_result or len(first_result.find_all('script')) < 2: if can_retry(origin_url): self.get_goods_id_first(origin_url, index) return wait = first_result.find_all('script')[1].text if not '"title":"' in wait: return title = re.findall('"title":".*","', wait)[0].split('","')[0].split('":"')[1] if title in self.title2map: self.goods_map[index] = self.title2map[title] self.url2goods[origin_url] = self.title2map[title] print(self.title2map[title]) else: print(title)
def _getroom_id(self, next_to=True, proxy=True): ''' get av room id ''' url = self.ROOM_INIT_URL % self._av_id html = get_request_proxy(url, 0) if proxy else basic_req(url, 0) head = html.find_all('head') if not len(head) or len( head[0].find_all('script')) < 4 or not '{' in head[0].find_all( 'script')[3].text: if can_retry(url): self._getroom_id(proxy=proxy) else: self._getroom_id(proxy=False) next_to = False if next_to: script_list = head[0].find_all('script')[3].text script_begin = script_list.index('{') script_end = script_list.index(';') script_data = script_list[script_begin:script_end] json_data = json.loads(script_data) if self._p == -1 or len(json_data['videoData']['pages']) < self._p: self._room_id = json_data['videoData']['cid'] else: self._room_id = json_data['videoData']['pages'][self._p - 1]['cid'] print('Room_id:', self._room_id)
def check_comment_once(self, av_id: int, pn: int): ''' check comment once ''' url = self.REPLY_V2_URL % (pn, av_id) json_req = get_request_proxy(url, 1) if json_req is None or not 'data' in json_req or not 'hots' in json_req[ 'data']: if can_retry(url): self.check_comment_once(av_id, pn) return hots = json_req['data']['hots'] replies = json_req['data']['replies'] temp_floor = [] if replies is None else [ii['floor'] for ii in replies] if replies is None: wait_check = [] if hots is None else hots else: wait_check = replies if hots is None else [*hots, *replies] for ii in wait_check: info = {'basic': self.get_comment_detail(ii, av_id, pn)} floor = info['basic'][0] crep = ii['replies'] if not crep is None: info['replies'] = [ self.get_comment_detail(ii, av_id, pn, floor) for ii in crep ] self.comment[av_id][floor] = info if len(temp_floor): for ii in range(min(temp_floor), max(temp_floor) + 1): if not ii in self.comment[av_id]: self.comment[av_id][ii] = {} self.comment_max[av_id] = min(temp_floor)
def load_url(self): """ load url form zimuzu """ url = 'http://zmz005.com/o5itP3' detail = get_request_proxy(url, 0) total = [] if not detail: print('retry') if can_retry(url): self.load_url() return season_list = detail.find_all('div', class_='tab-content info-content')[1:] for season in season_list: quality_list = season.find_all('div', class_='tab-pane') url_body = quality_list[1] if 'APP' in quality_list[0][ 'id'] else quality_list[0] season_id = re.findall(r"\d+\.?\d*", url_body['id'])[0] total.append(season_id) if int(season_id) < 12: url_body = quality_list[1] url_list = url_body.find_all('ul', class_='down-links') url = [ index.find_all('div', class_='copy-link')[1]['data-url'] for index in url_list ] total.append('\n'.join(url) + '\n') with codecs.open('zimuzu/data/southPark', 'w', encoding='utf-8') as f: f.write('\n'.join(total))
def get_request_v2(self, url, types, header): result = get_request_proxy(url, 0, header=header) if not result or not len(result.find_all('div', class_='content')): if can_retry(url): self.get_request_v2(url, types, header) return return result
def get_request(self, url, types): result = basic_req(url, 1) if not result: if can_retry(url): self.get_request(url, types) return return result
def get_request_v3(self, url, types): result = basic_req(url, 0) if result is None or not result or not len( result.find_all('p', class_='content')): if can_retry(url): self.get_request_v3(url, types) return return result
def get_check(self): ''' check comment ''' now_hour = int(time_str(format='%H')) now_min = int(time_str(format='%M')) now_time = now_hour + now_min / 60 if now_time > 0.5 and now_time < 8.5: return if os.path.exists('{}comment.pkl'.format(comment_dir)): with codecs.open('{}comment.pkl'.format(comment_dir), 'rb') as f: self.comment = pickle.load(f) if self.assign_up_mid == -1: return url = self.MEMBER_SUBMIT_URL % self.assign_up_mid json_req = get_request_proxy(url, 1) if json_req is None or not 'data' in json_req or not 'vlist' in json_req[ 'data']: if can_retry(url): self.get_check() return av_id_list = [[ii['aid'], ii['comment']] for ii in json_req['data']['vlist']] if self.basic_av_id not in [ii[0] for ii in av_id_list]: if can_retry(url): self.get_check() return threading_list = [] for (ii, jj) in av_id_list: if ii not in self.comment: self.comment[ii] = {} work = threading.Thread(target=self.comment_check_schedule, args=( ii, jj, )) threading_list.append(work) for work in threading_list: work.start() for work in threading_list: work.join() with codecs.open('{}comment.pkl'.format(comment_dir), 'wb') as f: pickle.dump(self.comment, f) return av_id_list
def href_once(self, index): """ get html from news """ print(index) texts = [] url = 'https://www.baidu.com/s?tn=news&rtt=4&bsst=1&cl=2&wd=毒狗肉&pn=' + \ str(index * 10) news_lists = get_request_proxy(url, 0) if not news_lists: if can_retry(url): self.href_once(index) return test = news_lists.find_all('div', class_='result') if not len(test): if can_retry(url): self.href_once(index) return href_list = [index.a['href'] for index in test] self.href_map[int(index)] = href_list
def request_text(self, url): ''' requests text ''' req = basic_req(url, 2) if req is None: echo(0, url) if can_retry(url): self.request_text(url) else: return '' else: echo(1, url) return req.text
def check_type_req(self, av_id: int): changeHeaders({'Referer': self.BASIC_AV_URL % av_id}) url = self.VIEW_URL % av_id json_req = get_request_proxy(url, 1) if json_req is None or 'data' not in json_req or 'tid' not in json_req[ 'data']: if can_retry(url): self.check_type_req(av_id) return self.rank_type[av_id] = json_req['data']['tid'] == self.assign_tid
def get_goods_second(self, url, index): second_result = basic_req(url, 0, header=self.headers) # second_result = get_request_proxy(url, 0) if not second_result or not len(second_result.find_all('input')): if can_retry(url): self.get_goods_second(url, index) return goods_id = second_result.find_all('input')[6]['value'] print(goods_id) self.goods_map[index] = goods_id
def get_playlist_id(self, classify, offset): """ get playlist id from classify """ host = 'https://music.163.com' allclassify = classify == '全部风格' url = host + self.classifylist[classify] + ( '?' if allclassify else '&') + 'order=hot&limit=35&offset=' + str(offset) html = basic_req(url, 0) if not html: if can_retry(url): self.get_playlist_id(classify, offset) return [] alist = html.find_all('a', class_='icon-play') if not len(alist): if can_retry(url): self.get_playlist_id(classify, offset) for index in alist: self.playlists.append(index['data-res-id'])
def load_img(self, index, img_id, img_url): """ load img """ img = get_request_proxy(img_url, 2) if img == True or img == False: if can_retry(img_url): self.load_img(index, img_id, img_url) return with codecs.open( 'buildmd/' + self.find_title(index).split('/')[0] + '/img/' + self.find_title(index).split('/')[1][:-3] + str(img_id + 1) + '.jpg', 'wb') as f: f.write(img.content)
def summarization_once(self, index): """ get html from news """ print(index) texts = [] url = 'https://www.baidu.com/s?ie=utf-8&mod=1&isbd=1&isid=919fab3c0002c9f1&wd=%E5%81%B7%E7%8B%97&oq=%E5%81%B7%E7%8B%97&tn=baiduhome_pg&ie=utf-8&rsv_idx=2&rsv_pq=919fab3c0002c9f1&rsv_t=7e30ggF%2BMa91oOURk1bMtN8af5unSwOR08TodNBB%2F%2B6B6RBEwUi8l8IAe28ACA%2B8b5I5&gpc=stf%3D1517038564%2C1548574564%7Cstftype%3D1&tfflag=1&bs=%E5%81%B7%E7%8B%97&rsv_sid=undefined&_ss=1&clist=28bc21fb856a58b7%09350102124f079888%0928bc21fb856a58b7%0928bc2159845c1cf3%0928bc2015823fa56b%0928a121fb84a7d1a6&hsug=&f4s=1&csor=2&_cr1=34767&pn=' + \ str(index * 10) news_lists = get_request_proxy(url, 0) if not news_lists: if can_retry(url): self.summarization_once(index) return test = news_lists.find_all( 'div', class_=['c-row c-gap-top-small', 'c-span18 c-span-last']) word = self.cleantxt(news_lists.text) if not len(word): if can_retry(url): self.summarization_once(index) return temp_map = self.find_location.test_province( self.find_location.city_province, word) self.total_map[int(index)] = temp_map self.word[index] = word
def get_lists(self): """ get title lists """ url = self.joint_url('3bb0c25eca85e764b6d55a281faf7195') title_json = get_request_proxy(url, 1) if not title_json: if can_retry(url): self.get_lists() return content = BeautifulSoup(title_json['content'], 'html.parser').find_all('a') self.request_list = [ re.split(r'/|=', index.text)[-1] for index in content ]
def get_classify(self): """ get classify from /discover/playlist """ version = begin_time() self.classifylist = {} host = 'https://music.163.com/discover/playlist' html = get_request_proxy(host, 0) if not html: print('Empty') if can_retry(host): self.get_classify() return [] alist = html.find_all('a', class_='s-fc1') if not len(alist): if can_retry(host): self.get_classify() print(html) for index in alist: self.classifylist[index.text] = index['href'] end_time(version)
def build_md_once(self, index, tid): """ build md in one """ url = self.joint_url(tid) title_json = get_request_proxy(url, 1) if not title_json: if can_retry(url, index): self.build_md_once(index, tid) return content = BeautifulSoup(title_json['content'], 'html.parser').find_all('div') text = [] img_href = [] img_id = 1 ttid = 1 img_title = self.find_title(index).split('/')[1][:-3] for word in content: temp_text = '' if word.span and len( word.span.text) and not word.span.text[0].isdigit: temp_text = '## ' + word.span.text ttid = 1 if word.img: temp_text = '![image](img/' + img_title + str(img_id) + '.jpg)' img_href.append(word.img['src'].replace('https', 'http')) img_id += 1 if not len(temp_text): temp_text = word.text if len(temp_text) and temp_text[0].isdigit(): temp_text = str(ttid) + '. **' + \ ' '.join(temp_text.split('\xa0')[1:]).strip() + '**' ttid += 1 if len(temp_text) and temp_text[0:2] == '//': temp_text = str(ttid) + '. **' + \ ' '.join(temp_text.split('\xa0')[2:]).strip() + '**' ttid += 1 if len(temp_text) and (temp_text[0] == '¥' or temp_text[0] == '€'): temp_text = '<a>' + temp_text + '</a>' text.append(temp_text) with codecs.open(data_dir + self.find_title(index), 'w', encoding='utf-8') as f: f.write('\n'.join(text)) self.img_map[index] = img_href print(index, len(img_href))
def search_goods_once(self, goods_name, index): if not os.path.exists('%scookie_alimama' % data_dir): print('alimama cookie not exist!!!') return with codecs.open('%scookie_alimama' % data_dir, 'r', encoding='utf-8') as f: cookie = f.readlines() url_list = [ 'https://pub.alimama.com/items/search.json?auctionTag=&perPageSize=50&shopTag=&_tb_token_=', cookie[1][:-1], '&pvid=', cookie[2][:-1], '&t=', str(int(round(time.time() * 1000))), '&_t=', str(int(round(time.time() * 1000))), '&q=', goods_name ] headers = { 'pragma': 'no-cache', 'X-Requested-With': 'XMLHttpRequest', 'cache-control': 'no-cache', 'Cookie': '', 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', 'Accept': 'application/json, text/javascript, */*; q=0.01', "Accept-Encoding": "", "Accept-Language": "zh-CN,zh;q=0.9", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3682.0 Safari/537.36", } headers['Cookie'] = cookie[0][:-1] ca = basic_req(''.join(url_list), 2, header=headers) if ca.status_code != 200 or not 'data' in ca.json(): if can_retry(''.join(url_list)): self.search_goods_once(goods_name, index) return page_list = ca.json()['data']['pageList'] title = [ '||'.join( [str(index['auctionId']), goods_name, str(index['zkPrice'])]) for index in page_list ][0] self.goods_name[index] = title print(title)
def load_collect_once(self, index): """ load taobao collect """ baseurl = 'https://shoucang.taobao.com/item_collect_n.htm?t=' url = baseurl + str(int(round(time.time() * 1000))) if index: url += 'ifAllTag=0&tab=0&tagId=&categoryCount=0&type=0&tagName=&categoryName=&needNav=false&startRow=' + \ str(30 * index) collect_html = basic_req(url, 0) if collect_html != True and collect_html != False: collect_list = collect_html.find_all( 'li', class_=[ "J_FavListItem g-i-item fav-item ", "J_FavListItem g-i-item fav-item isinvalid", "J_FavListItem g-i-item fav-item istmall ", "J_FavListItem g-i-item fav-item istmall isinvalid" ]) print(len(collect_list)) if collect_html == True or collect_html == False or not len( collect_list): if can_retry(baseurl + str(index), index): self.load_collect_once(index) return text = [] for collect in collect_list: data_id = collect['data-id'] # data_ownerid = collect['data-ownerid'] title = collect.find_all('a', class_='img-item-title-link')[0].text price = collect.find_all( 'div', class_='g_price')[0].strong.text if len( collect.find_all('div', class_='g_price')) else '0' text.append("||".join([data_id, title, price])) self.collect[index] = text
def load_goods_once(self, index, tid): """ build md in one """ url = self.joint_url(tid) title_json = get_request_proxy(url, 1) if not title_json: if can_retry(url, index): self.load_goods_once(index, tid) return content = BeautifulSoup(title_json['content'], 'html.parser') # return content content = content.find_all('div') if not len(content): if can_retry(url, index): self.load_goods_once(index, tid) return # print(len(content)) text = [] ttid = 0 text.append(self.find_title(index)) good_text = [] describe = [] title = '' url = '' tpud = '' for word in content: temp_text = '' temp_text = word.text if not len(temp_text): continue if len( temp_text ) and temp_text not in self.special_list and not '€' in temp_text and ( (temp_text[0].isdigit() and (not '【' in temp_text or '【已下架】' in temp_text)) or (temp_text[0] == '\xa0' and not 'http' in temp_text and not '¥' in temp_text and not '微信' in temp_text and not '(' in temp_text) or (word.span and len(word.span.text.replace('\xa0', '')) and (word.span['style'] == 'font-size:16px;color:#fc9db1;font-weight:bold;' or word.span['style'] == 'font-size:16px;color:#1e6792;background-color:#ffffff;font-weight:bold;' ))): temp_text = temp_text.replace('\xa0', ' ').replace('|', '') temp_text = temp_text.replace('//', '').replace('¥', '').strip() if not re.search(r'\d\.\d', temp_text): temp_text = temp_text.replace('.', ' ') elif temp_text.count('.') > 1: temp_text = temp_text.replace('.', ' ', 1) temp_list = temp_text.split() print(temp_list) if not len(temp_list): continue if ttid: text.append(' '.join([*good_text, *[url, tpud]])) url = '' tpud = '' ttid += 1 describe = [] good_text = [] if len(title): text.append(title) title = '' if temp_list[0].isdigit(): good_text.append(str(int(temp_list[0]))) else: good_text.append(str(ttid)) good_text.append(temp_list[0]) if len(temp_list) == 1: continue if len(good_text) == 1: good_text.append(temp_list[1]) elif temp_list[1].isdigit(): good_text.append(str(int(temp_list[1]))) if len(temp_list) > 2: describe = temp_list[2:] if len(temp_list) > 2 and temp_list[2].isdigit(): good_text.append(str(int(temp_list[2]))) elif len(temp_list) > 3 and temp_list[3].isdigit(): good_text.append(str(int(temp_list[3]))) describe = temp_list[2] if len(temp_list) > 4: describe = [*describe, *temp_list[4:]] elif len(temp_list) > 3 and len( temp_list[2]) > 3 and temp_list[2][2:].isdigit(): if len(temp_list[3]) > 3 and temp_list[3][2:].isdigit(): good_text.append(temp_list[2] + '/' + temp_list[3]) else: good_text.append(str(int(temp_list[2][2:]))) continue elif len(temp_list) > 2 and re.search(r'\d', temp_list[2]): digit_list = re.findall(r"\d+\.?\d*", temp_list[2]) good_text.append(digit_list[0]) if len(temp_list) > 3: describe = [*describe, *temp_list[3:]] elif len(temp_list) > 2: describe.append(temp_list[2]) if len(temp_list) > 3: describe = temp_list[3:] elif 'http' in temp_text: temp_text = temp_text.replace('\xa0', '').strip() print('http', temp_text) url = temp_text elif temp_text.count('€') == 2 or temp_text.count('¥') == 2: temp_text = temp_text.replace('\xa0', '').strip() print('¥', temp_text) tpud = temp_text elif '【店铺链接】' in temp_text: temp_text = temp_text.replace('\xa0', '').strip() print('【店铺链接】', temp_text) url += temp_text elif temp_text in self.title_list: print(2, temp_text) temp_text = temp_text.replace('\xa0', '') title = temp_text elif len(good_text) == 1: temp_text = temp_text.replace('\xa0', ' ').replace('.', ' ').replace( '¥', '').replace('|', '') temp_list = temp_text.split() print(3, temp_list) if not len(temp_list): continue elif len(temp_list) > 1 and temp_list[1].isdigit(): good_text.append(temp_list[0]) good_text.append(str(int(temp_list[1]))) describe = temp_list[2:] else: describe.append(temp_text) elif temp_text.count('¥') == 1: temp_text = temp_text.replace('¥', '').replace('\xa0', '').replace( '|', '').strip() digit_list = re.findall(r"\d+\.?\d*", temp_text) print('$', digit_list) if len(digit_list): good_text.append(digit_list[0]) else: temp_text = temp_text.replace('\xa0', '') print(4, temp_text) describe.append(temp_text) if len(good_text): text.append(' '.join([*good_text, *[url, tpud]])) text.append(' ') self.goods[index] = text print(len(text))
def load_rank_index(self, index: int, day_index: int): ''' load rank ''' changeHeaders({'Referer': self.AV_URL}) url = self.RANKING_URL % (index, day_index) text = basic_req(url, 3) rank_str = re.findall('window.__INITIAL_STATE__=(.*?);', text) if not len(rank_str): if can_retry(url): self.load_rank_index(index, day_index) return False rank_map = json.loads(rank_str[0]) rank_list = rank_map['rankList'] now_av_id = [] wait_check_public = [] rank_map = {} for ii, rank in enumerate(rank_list): av_id = int(rank['aid']) need_params = [ 'pts', 'author', 'mid', 'play', 'video_review', 'coins', 'duration', 'title' ] temp_rank_list = [ ii, *[rank[ii] for ii in need_params], index, day_index ] now_av_id.append(av_id) if not self.check_type(av_id): continue self.check_rank_rose(av_id, temp_rank_list) if self.add_av(av_id, ii, temp_rank_list[1]): rank_map[av_id] = temp_rank_list ''' check assign av rank ''' for ii in self.assign_ids: if not ii in self.public: wait_check_public.append(ii) if not ii in self.last_view and not ii in self.rank_map: self.rank_map[ii] = [] have_assign = len([0 for ii in self.assign_ids if ii in now_av_id]) > 0 ''' check tid type ''' threading_public = [] for ii in rank_map.keys(): work = threading.Thread(target=self.check_type_req, args=(ii, )) threading_public.append(work) for work in threading_public: work.start() for work in threading_public: work.join() for ii, jj in rank_map.items(): if self.check_type(ii) != True: continue if not ii in self.public: wait_check_public.append(ii) self.last_check[ii] = int(time.time()) self.rank_map[ii] = jj ''' load public basic data ''' threading_public = [] for ii in wait_check_public: work = threading.Thread(target=self.public_data, args=( ii, 0, )) threading_public.append(work) for work in threading_public: work.start() for work in threading_public: work.join() ''' begin monitor ''' threading_list = [] for ii, jj in self.public.items(): if not ii in self.public_list and jj[0] + one_day > int( time.time()): work = threading.Thread(target=self.public_monitor, args=( ii, 0, )) threading_list.append(work) for work in threading_list: work.start() return have_assign