Пример #1
0
Файл: qq.py Проект: hncg/water
 def play_info(self, db_tv_infos):
     url = 'http://data.video.qq.com/fcgi-bin/data?tid=70&&appid=10001007&appkey=e075742beb866145&callback=callback&low_login=1&idlist={}&otype=json' # noqa
     play_dir = PLAY_INFO_FILE_DIR + SAVE_FILE
     utils.mkdir(play_dir)
     for tv_info in db_tv_infos:
         if tv_info.type in [u'电视剧']:
             # print u"抓取《{}》播放信息中".format(tv_info.name)
             warning_message = u"qq Warning《{}》play_info ,结果不准确\r\n". \
                               format(tv_info.name)
             page = request(url.format(tv_info.tv_id))
             if not play_info_is_valid_qq(page):
                 utils.log(message=warning_message)
                 continue
             utils.write(play_dir, tv_info.name +
                         PLAY_INFO_FILE_FIX, page)
         elif tv_info.type in [u'综艺']:
             vids = tv_info.vids.split(',')
             episodes = tv_info.detail_episodes.split(',')
             for vid, episode in zip(vids, episodes):
                 # print u"抓取《{}》第{}期播放信息中。。。".format(tv_info.name, episode)
                 warning_message = u"qq《{}》第{}期play_info ,结果不准确\r\n". \
                                   format(tv_info.name, episode)
                 page = request(url.format(vid))
                 if not play_info_is_valid_qq(page):
                     utils.log(message=warning_message)
                     continue
                 utils.write(play_dir, tv_info.name + episode +
                             PLAY_INFO_FILE_FIX, page)
Пример #2
0
 def play_info(self, db_play_info_map, db_tv_infos):
     url = 'http://mixer.video.iqiyi.com/jp/mixin/videos/{}/'
     for db_tv_info in db_tv_infos:
         # print u'《{}》play_info 抓取中'.format(db_tv_info.name)
         tmp_all_play_counts = 0
         for vid in db_tv_info.vids.split(','):
             warning_message = u"iqy《{}》{} play_info ,结果不准确\r\n". \
                 format(db_tv_info.name, vid)
             page = request(url.format(vid))
             json_content = play_info_is_valid(page)
             if not json_content:
                 # print u'《{}》play_info 抓取失败'.format(db_tv_info.name)
                 time.sleep(30)
                 page = request(url.format(vid))
                 json_content = play_info_is_valid(page)
             if not json_content:
                 utils.log(message=warning_message)
                 continue
             tmp_all_play_counts += (int)(json_content.get('playCount'))
         all_play_counts = tmp_all_play_counts
         pre_all_play_counts = db_play_info_map.get(db_tv_info.name)
         day_play_counts = pre_all_play_counts and \
             max(all_play_counts - (int)(pre_all_play_counts), 0) \
             or 0
         PlayInfo.add(
             tv_id=vid,
             tv_name=db_tv_info.name,
             day_play_counts=day_play_counts,
             all_play_counts=all_play_counts,
             time_at=self.now,
             platform=PLATFORM,
             type=db_tv_info.type
         )
Пример #3
0
Файл: qq.py Проект: hncg/water
 def tv_names(self):
     urls = ['http://v.qq.com/rank/detail/2_-1_-1_-1_2_-1.html',
             'http://v.qq.com/rank/detail/10_-1_-1_-1_2_-1.html']
     names = []
     for url in urls:
         result = request(url)
         [names.append(re.compile(u'x_con_item_title"><a.+?>|</a>').
          sub(u'', m.group()))
          for m in re.finditer(u'x_con_item_title"><a.+?</a>', result)]
     return names
Пример #4
0
Файл: iqy.py Проект: hncg/water
 def dianshiju_infos(self):
     tv_infos = {}
     dianshi_names_url = 'http://top.iqiyi.com/dianshiju.html?rdm=' + \
         str(random.randint(1, 100000))
     for m in re.finditer(u' <li  j-delegate="liover"(.|\n)+?</li>',
                          request(dianshi_names_url)):
         name = re.search(u'title=".+?"', m.group()).group()[7:-1]
         url = re.search(u'http.+?html', m.group()).group()
         x_id_str = request(url)
         try:
             id = re.search(u'data-player-tvid=".+?"',
                            x_id_str).group()[18:-1]
         except:
             continue
         v_id = re.search(u'data-player-videoid=".+?"', x_id_str). \
             group()[21:-1]
         tv_infos[name] = [{'url': url}]
         tv_infos[name].append({'id': id})
         tv_infos[name].append({'v_id': v_id})
     return tv_infos
Пример #5
0
Файл: yk.py Проект: hncg/water
 def tv_urls_map(self):
     tv_urls_map = {}
     urls = ['http://www.youku.com/v_olist/c_97.html',
             'http://www.youku.com/v_olist/c_85.html']
     for url in urls:
         for m in re.finditer(u'<div class="p-link">(.|\n)+?</div>',
                              request(url)):
             url = re.search(u'http.+?\.html', m.group()).group()
             name = re.search(u' title=".+?"', m.group()).group()[8:-1]
             tv_urls_map[name] = url
     return tv_urls_map
Пример #6
0
Файл: iqy.py Проект: hncg/water
    def zongyi_infos(self):
        tv_infos = {}
        zongyi_names_url = 'http://top.iqiyi.com/index/top50.htm?cid=6&dim=day' + str(random.randint(1, 100000)) # noqa
        list_url = 'http://cache.video.qiyi.com/jp/sdvlst/6/{}/?callback=callback' # noqa
        for m in re.finditer(u'<li  j-delegate="liover"(.|\n)+?</li>',
                             request(zongyi_names_url)):
            url = re.search(u'http.+?html', m.group()).group()
            page = request(url)
            try:
                head = re.search(u'<h2 class="jiemu-tit">.+?</h2>', page).group() # noqa
                name = re.search(u'title=".+?"', head).group()[7:-1]
                url = re.search(u'http.+?\.html', head).group()
                id = re.search(u'sourceId:\d+', page)
                if not id:
                    continue
                id = re.search(u'\d+', id.group()).group()
            except:
                continue
            if not tv_infos.get(name):
                page = request(url)
                description = re.search(u'<span class="bigPic-b-jtxt">(.|\n)+?</span>', page) or u''# noqa
                if description:
                    description = re.compile(u'<.+?>').sub(u'', description.group()) # noqa
                cast_member = re.search(u'<p class="li-large">主持人:(.|\n)+?</p>', page) or u'' # noqa
                if cast_member:
                    cast_member = re.compile(u'<.+?>|\s|主持人:'). \
                        sub(u'', cast_member.group())

                lists = request(list_url.format(id.encode('utf8')))
                try:
                    lists = re.search(u'\({(.|\n)*}\)', lists).group()[1:-1]
                    json_lists = json.loads(lists)
                    vids = [str(_.get('tvId')) for _ in json_lists.get('data')]
                    current_number = json_lists.get('data')[0]['tvYear']
                except:
                    continue
                tv_infos[name] = [vids, current_number, description,
                                  cast_member]
            else:
                continue
        return tv_infos
Пример #7
0
 def dianshiju_info(self, tv_infos, db_tv_names):
     url = u'http://cache.video.qiyi.com/jp/vi/{}/{}/'
     for name, tv_info in tv_infos.items():
         # print u"抓取《{}》中".format(name)
         warning_message = u"《iqy {}》tv_info ,结果不准确\r\n". \
                           format(name)
         tv_id = tv_info[1]['id']
         vids = tv_info[2]['v_id']
         page = request(url.format(tv_id, vids))
         json_content = tv_info_is_valid(page)
         if not json_content:
             utils.log(message=warning_message)
             continue
         all_number = json_content['es']
         current_number = json_content['upOrder']
         description = json_content['info']
         label = json_content['tg']
         cast_member = json_content['ma']
         update_info = json_content['qiyiPlayStrategy'][:32]
         last_update_time = json_content['up']
         detail_urls = tv_info[0]['url']
         tv_type = u'电视剧'
         detail_titles = json_content['vn']
         detail_episodes = ''
         if name in db_tv_names:
             TvInfo.update(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member,
                           platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls,
                           vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
         else:
             TvInfo.add(name=name, tv_id=tv_id,
                        description=description,
                        last_update_time=last_update_time,
                        all_number=all_number,
                        current_number=current_number,
                        cast_member=cast_member, platform=PLATFORM,
                        label=label, update_info=update_info,
                        detail_urls=detail_urls, vids=tv_id,
                        type=tv_type,
                        detail_titles=detail_titles,
                        detail_episodes=detail_episodes,
                        )
Пример #8
0
Файл: qq.py Проект: hncg/water
 def tv_info(self, tv_names):
     url = 'http://s.video.qq.com/search?comment=1&plat=2&otype=json&query={}&callback=callback'  # noqa
     info_dir = TV_INFO_FILE_DIR + SAVE_FILE
     utils.mkdir(info_dir)
     for name in tv_names:
         warning_message = u"qq Warning《{}》tv_info ,结果不准确\r\n". \
                           format(name)
         page = request(url.format(name.encode('utf8')))
         if not tv_info_is_valid_qq(page):
             utils.log(message=warning_message)
             continue
         utils.write(info_dir, name + TV_INFO_FILE_FIX,
                     page.encode('utf8'))
Пример #9
0
Файл: sh.py Проект: hncg/water
 def pids_map(self):
     urls = ['http://tv.sohu.com/hotshow/', 'http://tv.sohu.com/hotdrama/']
     pids_map = {}
     for url in urls:
         page = request(url)
         all_lists = [_.group() for _ in
                      re.finditer(u'<ul class="rList">(.|\n)+?</ul>', page)]
         for all_list in all_lists[:20]:
             for _ in re.finditer(u'<li(.|\n)+?</li>', all_list):
                 pid = re.search(u'data-plid="\d+', _.group())
                 pid = re.search(u'\d+', pid.group()).group()
                 name = re.search(u'title=".+"', _.group()).group()[7:-1]
                 pids_map[name] = pid
     return pids_map
Пример #10
0
Файл: let.py Проект: hncg/water
 def dianshiju_urls_map(self):
     urls_map = {}
     url = 'http://top.le.com/tvhp.html'
     result = request(url)
     all_lists = re.search(u'<ol class="chart-list j-for"(.|\n)+?</ol>',
                           result)
     all_lists = [_.group() for _ in re.finditer(u'<li>(.|\n)+?</li>',
                  all_lists.group())][1:]
     for lists in all_lists:
         _list = [_.group() for _ in re.finditer(u'<span(.|\n)+?</span>',
                  lists)]
         try:
             name = re.compile(u'<.+?>').sub('', _list[1])
             url = re.search(u'http://.+?\.html', _list[1]).group()
             pid = re.search(u'\d+?\.html', url).group()[:-5]
             cast_member = re.compile(u'<.+?>|\s+').sub(' ', _list[2])
             label = re.compile(u'<.+?>|\s+').sub(' ', _list[4])
         except:
             continue
         urls_map[name] = [url, pid, cast_member, label]
     return urls_map
Пример #11
0
Файл: let.py Проект: hncg/water
 def zongyi_urls_map(self):
     urls_map = {}
     url = 'http://top.le.com/varhp.html'
     result = request(url)
     all_lists = re.search(u'<ol class="chart-list j-for"(.|\n)+?</ol>',
                           result)
     all_lists = [_.group() for _ in re.finditer(u'<li>(.|\n)+?</li>',
                  all_lists.group())][1:]
     for lists in all_lists:
         _list = [_.group() for _ in re.finditer(u'<span(.|\n)+?</span>',
                  lists)]
         try:
             name = re.compile(u'<.+?>').sub('', _list[1])
             name = re.search(u'《.+?》', name)
             name = name and name.group()[1:-1] or None
             if not name:
                 continue
             url = re.search(u'http://.+?\.html', _list[1]).group()
             label = re.compile(u'<.+?>|\s+').sub(' ', _list[2])
         except:
             continue
         urls_map[name] = [url, label]
     return urls_map
Пример #12
0
Файл: mg.py Проект: hncg/water
 def pids_map(self):
     urls = ['http://www.mgtv.com/tv/rbjj/',
             'http://www.mgtv.com/show/wprb/']
     pids_map = {}
     lists = []
     for url in urls:
         page = request(url)
         all_lists = re.search(u'<ul class="clearfix ullist-ele">(.|\n)+?</ul>', page).group() # noqa
         for _list in re.finditer(u' <li>(.|\n)+?</li>', all_lists):
             name = re.search(u'<span class="a-pic-t1".+?</span>',
                              _list.group()).group()
             name = re.compile(u'<.+?>').sub(u'', name)
             try:
                 pid = re.search(u'/\d+?/f', _list.group()).group()[1:-2]
                 type_n = (int)(re.search(u'v/\d+?/', _list.group()).group()[2:-1]) # noqa
             except:
                 continue
             if type_n not in TV_TYPE_MAP.keys():
                 continue
             if pid not in lists:
                 lists.append(pid)
                 pids_map[name] = [pid, type_n]
     return pids_map
Пример #13
0
Файл: let.py Проект: hncg/water
    def dianshiju(self, urls_map, db_tv_names, db_play_info_map):
        play_url = 'http://v.stat.letv.com/vplay/queryMmsTotalPCount?callback=callback&pid={}' # noqa
        for name, tv_info in urls_map.items():
            url = tv_info[0]
            pid = tv_info[1]
            tv_id = pid
            cast_member = tv_info[2]
            last_update_time = u''
            update_info = u''
            detail_urls = url
            tv_type = u'电视剧'
            detail_titles = u''
            detail_episodes = u''
            label = tv_info[3]
            page = request(url)
            content = dianshiju_is_valid(page)
            if not content:
                warning_message = u"let《{}》tv_info ,结果不准确\r\n". \
                    format(name)
                utils.log(message=warning_message)
                continue
            description = re.search(u'<p class="p7">(.|\n)+?</p>', content). \
                group()
            description = re.compile(u'<.+?>').sub('', description)
            all_number = re.search(u'共\d+?集', content).group()
            all_number = re.search(u'\d+', all_number).group()
            current_number = re.search(u'至\d+?集', content)
            current_number = current_number and re \
                .search(u'\d+', current_number.group()).group() or all_number

            page = request(play_url.format(pid))
            json_content = play_info_is_valid(page)
            if not json_content:
                page = request(play_url.format(pid))
                json_content = play_info_is_valid(page)
            if not json_content:
                warning_message = u"let《{}》play_info ,结果不准确\r\n". \
                    format(name)
                continue
            all_play_counts = json_content.get('plist_play_count')
            pre_all_play_counts = db_play_info_map.get(name)
            day_play_counts = pre_all_play_counts and \
                max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
            if name in db_tv_names:
                TvInfo.update(name=name, tv_id=pid,
                              description=description,
                              last_update_time=last_update_time,
                              all_number=all_number,
                              current_number=current_number,
                              cast_member=cast_member,
                              platform=PLATFORM,
                              label=label, update_info=update_info,
                              detail_urls=detail_urls,
                              vids=tv_id,
                              type=tv_type,
                              detail_titles=detail_titles,
                              detail_episodes=detail_episodes,
                              )
            else:
                TvInfo.add(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member, platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls, vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
            PlayInfo.add(
                tv_id=tv_id,
                tv_name=name,
                day_play_counts=day_play_counts,
                all_play_counts=all_play_counts,
                time_at=self.now,
                platform=PLATFORM,
                type=tv_type
            )
Пример #14
0
Файл: let.py Проект: hncg/water
    def zongyi(self, urls_map, db_tv_names, db_play_info_map):
        pids = []
        number_utl = 'http://api.le.com/mms/out/album/videos?id={}&cid=11&platform=pc&callback=callback' # noqa
        play_url = 'http://v.stat.letv.com/vplay/queryMmsTotalPCount?callback=callback&pid={}' # noqa
        description_url = 'http://www.le.com/zongyi/{}.html'
        for name, tv_info in urls_map.items():
            url = tv_info[0]
            label = tv_info[1]
            pid_page = request(url)
            pid_page = zongyi_is_valid(pid_page)
            if not pid_page:
                continue
            pid = re.search(u'pid: \d+?,', pid_page).group()
            pid = re.search(u'\d+', pid).group()
            if pid in pids:
                continue
            pids.append(pid)
            tv_id = pid
            d_page = request(description_url.format(pid.encode('utf8')))
            d_page = description_is_valid(d_page)
            if not d_page:
                warning_message = u"let《{}》description_info ,结果不准确\r\n". \
                    format(name)
                utils.log(message=warning_message)
                continue
            description = re.search(u'<p class="p7">(.|\n)+?</p>', d_page). \
                group()
            description = re.compile(u'<.+?>').sub('', description)

            last_update_time = u''
            update_info = u''
            detail_urls = url
            tv_type = u'综艺'
            detail_titles = u''
            detail_episodes = u''

            n_page = request(number_utl.format(pid))
            n_json = number_utl_is_valid(n_page)
            if not n_json:
                warning_message = u"let zongyi《{}》number_info ,结果不准确\r\n". \
                    format(name)
                utils.log(message=warning_message)
                continue
            all_number = n_json['total']
            current_number = n_json['data'][0]['episode']
            cast_member = []
            [cast_member.append(_.get('guest')) for _ in n_json['data']]
            # remove repeat
            cast_member = " ".join(cast_member)
            cast_member = cast_member.split(" ")
            cast_member = list(set(cast_member))
            cast_member = " ".join(cast_member)

            page = request(play_url.format(pid))
            json_content = play_info_is_valid(page)
            if not json_content:
                warning_message = u"let《{}》play_info ,结果不准确\r\n". \
                    format(name)
                utils.log(message=warning_message)
                continue
            all_play_counts = json_content.get('plist_play_count')
            pre_all_play_counts = db_play_info_map.get(name)
            day_play_counts = pre_all_play_counts and \
                max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
            if name in db_tv_names:
                TvInfo.update(name=name, tv_id=pid,
                              description=description,
                              last_update_time=last_update_time,
                              all_number=all_number,
                              current_number=current_number,
                              cast_member=cast_member,
                              platform=PLATFORM,
                              label=label, update_info=update_info,
                              detail_urls=detail_urls,
                              vids=tv_id,
                              type=tv_type,
                              detail_titles=detail_titles,
                              detail_episodes=detail_episodes,
                              )
            else:
                TvInfo.add(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member, platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls, vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
            PlayInfo.add(
                tv_id=tv_id,
                tv_name=name,
                day_play_counts=day_play_counts,
                all_play_counts=all_play_counts,
                time_at=self.now,
                platform=PLATFORM,
                type=tv_type
            )
Пример #15
0
Файл: sh.py Проект: hncg/water
 def info_and_play(self, pids_map, db_tv_names, db_play_info_map):
     play_url = "http://count.vrs.sohu.com/count/queryext.action?plids={}&callback=callback"  # noqa
     info_url = "http://pl.hd.sohu.com/videolist?playlistid={}&callback=callback"  # noqa
     for name, pid in pids_map.items():
         tv_id = pid
         info = request(info_url.format(pid.encode("utf8")))
         json_content = info_is_valid(info)
         if not json_content:
             warning_message = u"sh《{}》tv_info ,结果不准确\r\n".format(name)
             utils.log(message=warning_message)
             continue
         description = json_content["albumDesc"]
         last_update_time = ""
         current_number = json_content["updateSet"]
         all_number = json_content["totalSet"]
         all_number = all_number != u"0" and all_number or current_number
         tv_type = TV_TYPE_MAP.get(json_content["cid"])
         if tv_type == u"综艺":
             cast_member = json_content["hosts"]
         else:
             cast_member = json_content["actors"]
         cast_member = u",".join(cast_member)
         label = ",".join(json_content["categories"])
         update_info = json_content["updateNotification"]
         detail_urls = ""
         detail_titles = ""
         detail_episodes = ""
         play = request(play_url.format(pid.encode("utf8")))
         play_json = play_is_valid(play, pid)
         if not play_json:
             warning_message = u"sh《{}》play_info ,结果不准确\r\n".format(name)
             utils.log(message=warning_message)
             continue
         all_play_counts = play_json[pid]["total"]
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(
                 name=name,
                 tv_id=pid,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         else:
             TvInfo.add(
                 name=name,
                 tv_id=tv_id,
                 description=description,
                 last_update_time=last_update_time,
                 all_number=all_number,
                 current_number=current_number,
                 cast_member=cast_member,
                 platform=PLATFORM,
                 label=label,
                 update_info=update_info,
                 detail_urls=detail_urls,
                 vids=tv_id,
                 type=tv_type,
                 detail_titles=detail_titles,
                 detail_episodes=detail_episodes,
             )
         PlayInfo.add(
             tv_id=tv_id,
             tv_name=name,
             day_play_counts=day_play_counts,
             all_play_counts=all_play_counts,
             time_at=self.now,
             platform=PLATFORM,
             type=tv_type,
         )
Пример #16
0
Файл: mg.py Проект: hncg/water
 def info_and_play(self, pids_map, db_tv_names, db_play_info_map):
     play_url = 'http://videocenter-2039197532.cn-north-1.elb.amazonaws.com.cn/dynamicinfo?callback=callback&cid={}' # noqa
     info_url = 'http://www.mgtv.com/v/{type_n}/{pid}'
     year_url = 'http://www.mgtv.com/v/1/{}/s/json.year.js'
     number_url = 'http://www.mgtv.com/v/1/{pid}/s/json.{year}.js'
     for name, tv_infos in pids_map.items():
         pid = tv_infos[0].encode('utf8')
         tv_id = pid
         info = request(info_url.format(type_n=tv_infos[1], pid=tv_infos[0])) # noqa
         info = info_is_valid(info)
         if not info:
             warning_message = u"mg《{}》tv_info ,结果不准确\r\n". \
                 format(name)
             utils.log(message=warning_message)
             continue
         last_update_time = ''
         update_info = ''
         detail_urls = ''
         detail_titles = ''
         detail_episodes = ''
         current_number = re.search(u'"lastseries" : ".+?"', info).group()
         current_number = current_number.split(':')[1][2:-1]
         description = re.search(u'简介</em>(.|\n)+?</span>', info).group()
         description = re.compile(u'<.+?>|简介|:|\s').sub(u'', description)
         tv_type = TV_TYPE_MAP[tv_infos[1]]
         cast_flag = u'主演' if tv_type == u'电视剧' else u'主持人'
         cast_member = re.search(u'{}</em>(.|\n)+?</p>'.format(cast_flag),
                                 info).group()
         cast_member = re.compile(u'<.+?>|主演|主持人|:').sub(u'', cast_member)
         label = re.search(u'类型</em>(.|\n)+?</p>', info).group()
         label = re.compile(u'<.+?>|类型|:|\s').sub(u'', label)
         if tv_type == u'电视剧':
             all_number = re.search(u'共<b>\d+?</b>集', info).group()
             all_number = re.search(u'\d+', all_number).group()
         else:
             year_json = request(year_url.format(pid))
             year = year_json_is_valid(year_json)
             if not year:
                 warning_message = u"mg zongi《{}》year_info ,结果不准确\r\n". \
                     format(name)
                 utils.log(message=warning_message)
                 continue
             number_info = request(number_url.format(pid=pid,
                                   year=(int)(year[0])))
             number_info = number_info_is_valid(number_info)
             if not number_info:
                 warning_message = u"mg zongyi《{}》number_info ,结果不准确\r\n". \
                     format(name)
                 utils.log(message=warning_message)
                 continue
             all_number = len([_ for _ in number_info])
         play_info = request(play_url.format(pid))
         play_json = play_is_valid(play_info)
         if not play_json:
             warning_message = u"mg《{}》play_info ,结果不准确\r\n". \
                 format(name)
             utils.log(message=warning_message)
             continue
         all_play_counts_str = play_json['data']['allVVStr']
         all_play_counts = (float)(re.compile(u'万|亿').sub(u'', all_play_counts_str)) # noqa
         if u'万'in all_play_counts_str:
             all_play_counts *= 10000
         elif u'亿'in all_play_counts_str:
             all_play_counts *= 100000000
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and \
             max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(name=name, tv_id=pid,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member,
                           platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls,
                           vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
         else:
             TvInfo.add(name=name, tv_id=tv_id,
                        description=description,
                        last_update_time=last_update_time,
                        all_number=all_number,
                        current_number=current_number,
                        cast_member=cast_member, platform=PLATFORM,
                        label=label, update_info=update_info,
                        detail_urls=detail_urls, vids=tv_id,
                        type=tv_type,
                        detail_titles=detail_titles,
                        detail_episodes=detail_episodes,
                        )
         PlayInfo.add(
             tv_id=tv_id,
             tv_name=name,
             day_play_counts=day_play_counts,
             all_play_counts=all_play_counts,
             time_at=self.now,
             platform=PLATFORM,
             type=tv_type
         )
Пример #17
0
Файл: yk.py Проект: hncg/water
 def info_and_play(self, tv_urls_map, db_tv_names, db_play_info_map):
     for name, url in tv_urls_map.items():
         warning_message = u"yk 《{} 》结果不准确\r\n". \
                           format(name)
         page = request(url)
         content = info_and_play_is_valid(page, name)
         if not content:
             time.sleep(30)
             page = request(url)
             content = info_and_play_is_valid(page, name)
         if not content:
             utils.log(message=warning_message)
             continue
         last_update_time = ''
         label = ''
         update_info = ''
         detail_urls = url
         detail_titles = ''
         detail_episodes = ''
         tv_id = re.search(u'id.+?\.html', url).group()[:-5]
         title_str = re.search(u'<h1 class="title">(.|\n)+?</h1>', page). \
             group()
         tv_type = re.search(u'target="_blank">.+?<', title_str). \
             group()[16:-1]
         cast_member = []
         cast_member_flag = u'主持人' if tv_type == u'综艺' else u'主演'
         cast_member_str = re.search(
             cast_member_flag + u':</label>(.|\n)+?</span>', page).group()
         for m in re.finditer(u'<a.+?</a>', cast_member_str):
             cast_member.append(re.search('">.+?<', m.group()).group()[2:-1]) # noqa
         cast_member = ",".join(cast_member)
         description_str = re. \
             search(u'<span class="short" id="show_info_short"(.|\n)+?</div>', content).group() # noqa
         description = re.compile(u'<.*?>|查看详情>>').sub(u'', description_str)
         all_number = ''
         current_number = ''
         if tv_type == u'电视剧':
             number_str = re.search(u'class="basenotice"(.|\n)+?<',
                                    content).group()
             current_number = re.search(u'更新至\d+', number_str)
             all_number = re.search(u'共\d+', number_str).group()[1:]
             current_number = current_number and current_number.group()[3:] or all_number # noqa
         if tv_type == u'综艺':
             all_number = 0
             tmp_episode = []
             for _ in re.finditer(u'y\.episode\.show\(\'.+?\'\)', content):
                 number_url = 'http://www.youku.com/show_episode/{}.html?dt=json&divid={}' # noqa
                 divid = re.search(u'\'.+?\'', _.group()).group()[1:-1]
                 current_number_str = request(number_url.format(tv_id.encode('utf8'), divid.encode('utf8'))) # noqa
                 if not current_number_str:
                     warning_message = u"yk 《{} 》number结果不准确\r\n". \
                         format(name)
                     utils.log(message=warning_message)
                     continue
                 tmp_episode = [_ for _ in re.finditer(u'<ul(.|\n)+?</ul>',
                                                       current_number_str)]
                 all_number += len(tmp_episode)
             if not all_number:
                 tmp_episode = re.search(u'<div id="episode">(.|\n)+?</div>', page).group() # noqa
                 tmp_episode = [_ for _ in re.finditer(u'<ul(.|\n)+?</ul>',
                                                       tmp_episode)]
                 if not tmp_episode:
                     utils.log(message=warning_message)
                     continue
             try:
                 if not tmp_episode[0]:
                     continue
             except:
                 continue
             all_number = len(tmp_episode)
             current_number = re.search(u'<label>.+?</label>',
                                        tmp_episode[0].group()).group()
             current_number = re.compile(u'<.+?>|期'). \
                 sub(u'', current_number)
         all_play_counts = re.search(u'<label>总播放:</label>.+?\n', content) \
             .group()
         all_play_counts = (int)(re.compile(u'<label>总播放:</label>|,|\n')
                                 .sub(u'', all_play_counts))
         pre_all_play_counts = db_play_info_map.get(name)
         day_play_counts = pre_all_play_counts and \
             max(all_play_counts - (int)(pre_all_play_counts), 0) or 0
         if name in db_tv_names:
             TvInfo.update(name=name, tv_id=tv_id,
                           description=description,
                           last_update_time=last_update_time,
                           all_number=all_number,
                           current_number=current_number,
                           cast_member=cast_member,
                           platform=PLATFORM,
                           label=label, update_info=update_info,
                           detail_urls=detail_urls,
                           vids=tv_id,
                           type=tv_type,
                           detail_titles=detail_titles,
                           detail_episodes=detail_episodes,
                           )
         else:
             TvInfo.add(name=name, tv_id=tv_id,
                        description=description,
                        last_update_time=last_update_time,
                        all_number=all_number,
                        current_number=current_number,
                        cast_member=cast_member, platform=PLATFORM,
                        label=label, update_info=update_info,
                        detail_urls=detail_urls, vids=tv_id,
                        type=tv_type,
                        detail_titles=detail_titles,
                        detail_episodes=detail_episodes,
                        )
         PlayInfo.add(
             tv_id=tv_id,
             tv_name=name,
             day_play_counts=day_play_counts,
             all_play_counts=all_play_counts,
             time_at=self.now,
             platform=PLATFORM,
             type=tv_type
         )