示例#1
0
def start_qq(now):
    start = int(time.time())
    print "qq开始抓取 .."
    qq_spi = SpiderQq()
    qq_db = SerializeQq(now)

    # spider tv_names
    tv_names = qq_spi.tv_names()
    tv_names = list(set(tv_names))

    # spider tv_info
    qq_spi.tv_info(tv_names)

    # db tv_info
    db_tv_names = [_.name for _ in TvInfo.mget_by_platform(u'qq')]
    qq_db.tv_info(tv_names, db_tv_names)

    tv_names = tv_names + db_tv_names
    tv_names = list(set(tv_names))

    # spider play
    db_tv_infos = TvInfo.mget_by_platform(u'qq')
    qq_spi.play_info(db_tv_infos)

    # db play_info
    qq_db.play_info(db_tv_infos)

    end = int(time.time())
    print 'qq抓取完毕,耗时', utils.format_seconds(end - start)
示例#2
0
文件: start.py 项目: hncg/water
def start_qq(now):
    start = int(time.time())
    print "qq开始抓取 .."
    qq_spi = SpiderQq()
    qq_db = SerializeQq(now)

    # spider tv_names
    tv_names = qq_spi.tv_names()
    tv_names = list(set(tv_names))

    # spider tv_info
    qq_spi.tv_info(tv_names)

    # db tv_info
    db_tv_names = [_.name for _ in TvInfo.mget_by_platform(u'qq')]
    qq_db.tv_info(tv_names, db_tv_names)

    tv_names = tv_names + db_tv_names
    tv_names = list(set(tv_names))

    # spider play
    db_tv_infos = TvInfo.mget_by_platform(u'qq')
    qq_spi.play_info(db_tv_infos)

    # db play_info
    qq_db.play_info(db_tv_infos)

    end = int(time.time())
    print 'qq抓取完毕,耗时', utils.format_seconds(end - start)
示例#3
0
def start_let(now):
    start = int(time.time())
    print "let开始抓取 .."
    let_spi = SpiderLet()
    let_db = SerializeLet(now)

    # dianshiju
    dianshiju_urls_map = let_spi.dianshiju_urls_map()
    tv_infos = TvInfo.mget_by_platform(u'let')
    db_tv_names = [_.name for _ in tv_infos]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'let', utils.format_time(time.time(), "%Y-%m-%d"))
    for tv_info in tv_infos:
        if not dianshiju_urls_map.get(
                tv_info.name) and tv_info.type == u'电视剧':  # noqa
            dianshiju_urls_map[tv_info.name] = [
                tv_info.detail_urls, tv_info.tv_id, tv_info.cast_member,
                tv_info.label
            ]  # noqa

    let_db.dianshiju(dianshiju_urls_map, db_tv_names, db_play_info_map)
    # zongyi
    zongyi_urls_map = let_spi.zongyi_urls_map()
    for tv_info in tv_infos:
        if not zongyi_urls_map.get(tv_info.name) and tv_info.type == u'综艺':
            zongyi_urls_map[tv_info.name] = [
                tv_info.detail_urls, tv_info.label
            ]  # noqa

    let_db.zongyi(zongyi_urls_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'let抓取完毕,耗时', utils.format_seconds(end - start)
示例#4
0
文件: start.py 项目: hncg/water
def start_let(now):
    start = int(time.time())
    print "let开始抓取 .."
    let_spi = SpiderLet()
    let_db = SerializeLet(now)

    # dianshiju
    dianshiju_urls_map = let_spi.dianshiju_urls_map()
    tv_infos = TvInfo.mget_by_platform(u'let')
    db_tv_names = [_.name for _ in tv_infos]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'let', utils.format_time(time.time(), "%Y-%m-%d"))
    for tv_info in tv_infos:
        if not dianshiju_urls_map.get(tv_info.name) and tv_info.type == u'电视剧': # noqa
            dianshiju_urls_map[tv_info.name] = [tv_info.detail_urls, tv_info.tv_id, tv_info.cast_member, tv_info.label] # noqa

    let_db.dianshiju(dianshiju_urls_map, db_tv_names, db_play_info_map)
    # zongyi
    zongyi_urls_map = let_spi.zongyi_urls_map()
    for tv_info in tv_infos:
        if not zongyi_urls_map.get(tv_info.name) and tv_info.type == u'综艺':
            zongyi_urls_map[tv_info.name] = [tv_info.detail_urls, tv_info.label] # noqa

    let_db.zongyi(zongyi_urls_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'let抓取完毕,耗时', utils.format_seconds(end - start)
示例#5
0
def start_sh(now):
    start = int(time.time())
    print "sh开始抓取 .."
    sh_spi = SpiderSh()
    sh_db = SerializeSh(now)
    # db
    pids_map = sh_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'sh')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            pids_map[tv_info.name] = tv_info.tv_id
    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'sh', utils.format_time(time.time(), "%Y-%m-%d"))
    sh_db.info_and_play(pids_map, db_tv_names, db_play_info_map)

    end = int(time.time())
    print 'sh抓取完毕,耗时', utils.format_seconds(end - start)
示例#6
0
文件: start.py 项目: hncg/water
def start_sh(now):
    start = int(time.time())
    print "sh开始抓取 .."
    sh_spi = SpiderSh()
    sh_db = SerializeSh(now)
    # db
    pids_map = sh_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'sh')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            pids_map[tv_info.name] = tv_info.tv_id
    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'sh', utils.format_time(time.time(), "%Y-%m-%d"))
    sh_db.info_and_play(pids_map, db_tv_names, db_play_info_map)

    end = int(time.time())
    print 'sh抓取完毕,耗时', utils.format_seconds(end - start)
示例#7
0
def start_yk(now):
    start = int(time.time())
    print "yk开始抓取 .."
    yk_spi = SpiderYk()
    yk_db = SerializeYk(now)

    # spider urls_map
    tv_urls_map = yk_spi.tv_urls_map()
    # db info and play
    tv_infos = TvInfo.mget_by_platform(u'yk')
    db_tv_names = [_.name for _ in tv_infos]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'yk', utils.format_time(time.time(), "%Y-%m-%d"))
    for tv_info in tv_infos:
        if not tv_urls_map.get(tv_info.name):
            tv_urls_map[tv_info.name] = tv_info.detail_urls
    yk_db.info_and_play(tv_urls_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'yk抓取完毕,耗时', utils.format_seconds(end - start)
示例#8
0
def start_mg(now):
    start = int(time.time())
    print "mg开始抓取 .."
    mg_spi = SpiderMg()
    mg_db = SerializeMg(now)
    # db
    pids_map = mg_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'mg')
    db_tv_names = [_.name for _ in tv_infos]
    reverse = {v: k for k, v in TV_TYPE_MAP.iteritems()}
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            type_n = reverse[tv_info.type]
            pids_map[tv_info.name] = [tv_info.tv_id, type_n]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'mg', utils.format_time(time.time(), "%Y-%m-%d"))
    mg_db.info_and_play(pids_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'mg抓取完毕,耗时', utils.format_seconds(end - start)
示例#9
0
文件: start.py 项目: hncg/water
def start_mg(now):
    start = int(time.time())
    print "mg开始抓取 .."
    mg_spi = SpiderMg()
    mg_db = SerializeMg(now)
    # db
    pids_map = mg_spi.pids_map()
    tv_infos = TvInfo.mget_by_platform(u'mg')
    db_tv_names = [_.name for _ in tv_infos]
    reverse = {v: k for k, v in TV_TYPE_MAP.iteritems()}
    for tv_info in tv_infos:
        if not pids_map.get(tv_info.name):
            type_n = reverse[tv_info.type]
            pids_map[tv_info.name] = [tv_info.tv_id, type_n]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'mg', utils.format_time(time.time(), "%Y-%m-%d"))
    mg_db.info_and_play(pids_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'mg抓取完毕,耗时', utils.format_seconds(end - start)
示例#10
0
文件: start.py 项目: hncg/water
def start_yk(now):
    start = int(time.time())
    print "yk开始抓取 .."
    yk_spi = SpiderYk()
    yk_db = SerializeYk(now)

    # spider urls_map
    tv_urls_map = yk_spi.tv_urls_map()
    # db info and play
    tv_infos = TvInfo.mget_by_platform(u'yk')
    db_tv_names = [_.name for _ in tv_infos]

    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'yk', utils.format_time(time.time(), "%Y-%m-%d"))
    for tv_info in tv_infos:
        if not tv_urls_map.get(tv_info.name):
            tv_urls_map[tv_info.name] = tv_info.detail_urls
    yk_db.info_and_play(tv_urls_map, db_tv_names, db_play_info_map)
    end = int(time.time())
    print 'yk抓取完毕,耗时', utils.format_seconds(end - start)
示例#11
0
def start_iqy(now):
    start = int(time.time())
    print "iqy开始抓取 .."
    iqy_spi = SpiderIqy()
    iqy_db = SerializeIqy(now)

    # dianshiju
    dianshiju_infos = iqy_spi.dianshiju_infos()
    tv_infos = TvInfo.mget_by_platform_and_type(u'iqy', u'电视剧')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not dianshiju_infos.get(tv_info.name) and tv_info.type == u'电视剧':
            dianshiju_infos[tv_info.name] = \
                [
                    {'url': [tv_info.detail_urls]},
                    {'id': tv_info.tv_id},
                    {'v_id': tv_info.vids}]
    iqy_db.dianshiju_info(dianshiju_infos, db_tv_names)

    # zongyi
    zongyi_infos = iqy_spi.zongyi_infos()
    tv_infos = TvInfo.mget_by_platform_and_type(u'iqy', u'综艺')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not zongyi_infos.get(tv_info.name) and tv_info.type == u'综艺':
            zongyi_infos[tv_info.name] = \
                [
                    tv_info.vids.split(","),
                    tv_info.current_number,
                    tv_info.description,
                    tv_info.cast_member]
    iqy_db.zongyi_info(zongyi_infos, db_tv_names)
    # play_info
    db_tv_infos = TvInfo.mget_by_platform(u'iqy')
    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'iqy', utils.format_time(time.time(), "%Y-%m-%d"))
    iqy_db.play_info(db_play_info_map, db_tv_infos)
    end = int(time.time())

    print 'iqy抓取完毕,耗时', utils.format_seconds(end - start)
示例#12
0
文件: start.py 项目: hncg/water
def start_iqy(now):
    start = int(time.time())
    print "iqy开始抓取 .."
    iqy_spi = SpiderIqy()
    iqy_db = SerializeIqy(now)

    # dianshiju
    dianshiju_infos = iqy_spi.dianshiju_infos()
    tv_infos = TvInfo.mget_by_platform_and_type(u'iqy', u'电视剧')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not dianshiju_infos.get(tv_info.name) and tv_info.type == u'电视剧':
            dianshiju_infos[tv_info.name] = \
                [
                    {'url': [tv_info.detail_urls]},
                    {'id': tv_info.tv_id},
                    {'v_id': tv_info.vids}]
    iqy_db.dianshiju_info(dianshiju_infos, db_tv_names)

    # zongyi
    zongyi_infos = iqy_spi.zongyi_infos()
    tv_infos = TvInfo.mget_by_platform_and_type(u'iqy', u'综艺')
    db_tv_names = [_.name for _ in tv_infos]
    for tv_info in tv_infos:
        if not zongyi_infos.get(tv_info.name) and tv_info.type == u'综艺':
            zongyi_infos[tv_info.name] = \
                [
                    tv_info.vids.split(","),
                    tv_info.current_number,
                    tv_info.description,
                    tv_info.cast_member]
    iqy_db.zongyi_info(zongyi_infos, db_tv_names)
    # play_info
    db_tv_infos = TvInfo.mget_by_platform(u'iqy')
    db_play_info_map = PlayInfo.mget_map_by_platform_and_time_after(
        'iqy', utils.format_time(time.time(), "%Y-%m-%d"))
    iqy_db.play_info(db_play_info_map, db_tv_infos)
    end = int(time.time())

    print 'iqy抓取完毕,耗时', utils.format_seconds(end - start)