Пример #1
0
Файл: mg.py Проект: zffzjx/water
 def pids_map(self):
     urls = [
         'http://www.mgtv.com/tv/rbjj/', 'http://www.mgtv.com/show/wprb/'
     ]
     pids_map = {}
     lists = []
     for url in urls:
         page = request(url)
         all_lists = re.search(
             u'<ul class="clearfix ullist-ele">(.|\n)+?</ul>',
             page).group()  # noqa
         for _list in re.finditer(u' <li>(.|\n)+?</li>', all_lists):
             name = re.search(u'<span class="a-pic-t1".+?</span>',
                              _list.group()).group()
             name = re.compile(u'<.+?>').sub(u'', name)
             try:
                 pid = re.search(u'/\d+?/f', _list.group()).group()[1:-2]
                 type_n = (int)(re.search(
                     u'v/\d+?/', _list.group()).group()[2:-1])  # noqa
             except:
                 continue
             if type_n not in TV_TYPE_MAP.keys():
                 continue
             if pid not in lists:
                 lists.append(pid)
                 pids_map[name] = [pid, type_n]
     return pids_map
Пример #2
0
def start_mg(now):
    start = int(time.time())
    print "mg开始抓取 .."
    mg_spi = SpiderMg()
    mg_db = SerializeMg(now)
    # db
    pids_map = mg_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'mg')
    db_tv_names = [_.name for _ in tv_infos]
    reverse = {v: k for k, v in TV_TYPE_MAP.iteritems()}
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            type_n = reverse[tv_info.type]
            pids_map[tv_info.name] = [tv_info.tv_id, type_n]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'mg', utils.format_time(time.time(), "%Y-%m-%d"))
    mg_db.info_and_play(pids_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'mg抓取完毕,耗时', utils.format_seconds(end - start)
Пример #3
0
def start_mg(now):
    start = int(time.time())
    print "mg开始抓取 .."
    mg_spi = SpiderMg()
    mg_db = SerializeMg(now)
    # db
    pids_map = mg_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'mg')
    db_tv_names = [_.name for _ in tv_infos]
    reverse = {v: k for k, v in TV_TYPE_MAP.iteritems()}
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            type_n = reverse[tv_info.type]
            pids_map[tv_info.name] = [tv_info.tv_id, type_n]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'mg', utils.format_time(time.time(), "%Y-%m-%d"))
    mg_db.info_and_play(pids_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'mg抓取完毕,耗时', utils.format_seconds(end - start)
Пример #4
0
Файл: mg.py Проект: hncg/water
 def pids_map(self):
     urls = ['http://www.mgtv.com/tv/rbjj/',
             'http://www.mgtv.com/show/wprb/']
     pids_map = {}
     lists = []
     for url in urls:
         page = request(url)
         all_lists = re.search(u'<ul class="clearfix ullist-ele">(.|\n)+?</ul>', page).group() # noqa
         for _list in re.finditer(u' <li>(.|\n)+?</li>', all_lists):
             name = re.search(u'<span class="a-pic-t1".+?</span>',
                              _list.group()).group()
             name = re.compile(u'<.+?>').sub(u'', name)
             try:
                 pid = re.search(u'/\d+?/f', _list.group()).group()[1:-2]
                 type_n = (int)(re.search(u'v/\d+?/', _list.group()).group()[2:-1]) # noqa
             except:
                 continue
             if type_n not in TV_TYPE_MAP.keys():
                 continue
             if pid not in lists:
                 lists.append(pid)
                 pids_map[name] = [pid, type_n]
     return pids_map