def __init__(self): print "Do spider ablum_youku_dota." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = ['http://i.youku.com/i/UMzIzOTQwMTg0/videos'#舞儿 ,'http://i.youku.com/i/UMzIzOTQwMTg0/videos'#舞儿 ,'http://i.youku.com/i/UMTEwNDcxNDAw/videos'#离子DOTA ,'http://i.youku.com/u/UNjM3ODczNjA=/videos'#傻黑 ,'http://i.youku.com/i/UMzE2OTY2NjUy/videos'#伍声2009 ,'http://i.youku.com/u/UMjU3MzI2NDMy/videos'#小满 ,'http://i.youku.com/u/UMjg4MjQ1MzQ0/videos'#满楼水平 ,'http://i.youku.com/i/UNjAzOTE5NTI=/videos'#nada ,'http://i.youku.com/i/UMzYwNTg1NjI0/videos'#牛蛙 ,'http://i.youku.com/i/UMzcxMDA5OTI4/videos'#凯文 ,'http://i.youku.com/i/UNTA3ODY5NzUy/videos'#小乖 ,'http://i.youku.com/u/UMzcyMTMxODQ4/videos'#Pis ,'http://i.youku.com/i/UMzE3MTg2MjQ0/videos'#梅西 ,'http://i.youku.com/i/UNDI1NTMxMjMy/videos'#情书 ,'http://i.youku.com/i/UMTk4ODk3NzAw/videos'#演员 ,'http://i.youku.com/i/UMzAxOTg1MjU2/videos'#丸子 ,'http://i.youku.com/i/UMTQxMzc0MzQ2MA==/videos'#朴一生 ,'http://i.youku.com/u/UMzM2ODMxMDIw/videos'#夜流来袭 ,'http://i.youku.com/i/UMzU4NjU5ODQ4/videos'#Zero ,'http://i.youku.com/i/UMzA0MDY4OTE2/videos'#Music咖啡 ,'http://i.youku.com/i/UMTcwMDM5MjM0NA==/videos'#Colour ,'http://i.youku.com/i/UNjA0Mzc0NjY4/videos'#小K ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "ablum_youku_dota_" + self.today + ".txt")
def __init__(self): print "Do spider mgtv_variety_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = [ "http://www.mgtv.com/v/1/294178/f/3310829.html", 'http://www.mgtv.com/v/1/294178/f/3310829.html' #爱豆万万碎 , 'http://www.mgtv.com/v/1/102607/f/3303906.html' #八卦鉴定事务所 , 'http://www.mgtv.com/v/1/292503/f/3309795.html' #爱笑麻瓜秀 , 'http://www.mgtv.com/v/6/292376/f/3309069.html' #香蕉打卡 , 'http://www.mgtv.com/v/1/291580/f/3308484.html' #综艺哈哈秀 , 'http://www.mgtv.com/v/1/294293/f/3304436.html' #大本营的秘密花园 , 'http://www.mgtv.com/v/1/110061/f/3305216.html' #OMG玩美咖 , 'http://www.mgtv.com/v/1/292521/f/3304855.html' #超人妈妈带娃记 , 'http://www.mgtv.com/v/1/294266/f/3304312.html' #迷妹大本营 , 'http://www.mgtv.com/v/1/291976/f/3285994.html' #明星大侦探 , 'http://www.mgtv.com/v/1/291840/f/3181892.html' #橘子明星专访 , 'http://www.mgtv.com/v/1/292098/f/3104557.html' #敢ZUO敢为女声秀 ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "mgtv_variety_1_" + self.today + ".txt")
def __init__(self): self.base = [ { "http://www.bilibili.com/video/tv-drama-1.html#!page=1": "34" } #完结剧集 , { "http://www.bilibili.com/video/tv-drama-1.html#!page=1": "34" } #完结剧集 , { "http://www.bilibili.com/video/tv-drama-1.html#!page=2": "34" } # 完结剧集 , { "http://www.bilibili.com/video/tv-drama-1.html#!page=3": "34" } # 完结剧集 , { "http://www.bilibili.com/video/tv-drama-1.html#!page=4": "34" } # 完结剧集 , { "http://www.bilibili.com/video/tv-drama-1.html#!page=5": "34" } # 完结剧集 ] self.seedList = [] self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_bilibili_tv_" + self.today + ".txt")
def __init__(self): print "Do spider all_mgtv_movie." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = [ "http://list.mgtv.com/3/--------2835073-5-1-0--.html", "http://list.mgtv.com/3/--------2835073-5-1-0--.html", "http://list.mgtv.com/3/--------2835073-5-2-0--.html", "http://list.mgtv.com/3/--------2835073-5-3-0--.html", "http://list.mgtv.com/3/--------2835073-5-4-0--.html", "http://list.mgtv.com/3/--------2835073-5-5-0--.html", "http://list.mgtv.com/3/--------2848093-5-1-0--.html" #网络电影 , "http://list.mgtv.com/3/--------2848093-5-2-0--.html", "http://list.mgtv.com/3/--------2835073-5-3-0--.html", "http://list.mgtv.com/3/--------2835073-5-4-0--.html", "http://list.mgtv.com/3/--------2835073-5-5-0--.html" ] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.prgdata = {} self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_mgtv_movie_" + self.today + ".txt")
def __init__(self): print "Do spider all_mgtv_vipmovie." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = [ "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=1&pc=60", "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=1&pc=60", "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=2&pc=60", "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=3&pc=60", "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=4&pc=60", "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=5&pc=60" # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=6&pc=60" # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=7&pc=60" # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=8&pc=60" # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=9&pc=60" # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=10&pc=60" # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=11&pc=60" # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=12&pc=60" ] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.seqNocnt = 1 self.prgdata = {} self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_mgtv_vipmovie_" + self.today + ".txt")
def __init__(self): print "Do spider ablum_iqiyi_variety_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = copy.deepcopy(PROGRAM_SUB) self.seedList = [ "http://www.iqiyi.com/a_19rrguentx.html", "http://www.iqiyi.com/a_19rrguentx.html" #XFUN吃货俱乐部 , 'http://www.iqiyi.com/lib/m_208247014.html?src=search' #艾伦秀第13季 "http://www.iqiyi.com/a_19rrgjdzt1.html", #春色无边搞笑盘点 "http://www.iqiyi.com/a_19rrhal04x.html#vfrm=2-3-0-1", #十三亿分贝之一派方言 "http://www.iqiyi.com/a_19rrhanf9p.html", #咱们穿越吧第2季 "http://www.iqiyi.com/a_19rrhao8al.html" #星厨驾到第三季 , 'http://www.iqiyi.com/a_19rrhanya1.html#vfrm=2-3-0-1' #说出我的世界 , 'http://www.iqiyi.com/a_19rrhasbel.html#vfrm=2-3-0-1' #加油向未来 , 'http://www.iqiyi.com/a_19rrgu9s19.html#vfrm=2-3-0-1' #一呼柏应 , 'http://www.iqiyi.com/a_19rrgi7art.html#vfrm=2-3-0-1' #第一书记(2016) , 'http://www.iqiyi.com/a_19rrgucd8p.html#vfrm=2-3-0-1' #幸福在哪里 , 'http://www.iqiyi.com/a_19rrgjahfl.html#vfrm=2-3-0-1' #我是大医生(2016) , 'http://www.iqiyi.com/a_19rrh9rtx1.html' #今夜百乐门 ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "ablum_iqiyi_variety_1_" + self.today + ".txt")
def __init__(self): print "Do spider all_iqiyi_children." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = [ "http://list.iqiyi.com/www/4/---304----------4-1-1-iqiyi--.html", "http://list.iqiyi.com/www/4/---304----------4-1-1-iqiyi--.html" #0-3 , "http://list.iqiyi.com/www/4/---304----------4-2-1-iqiyi--.html" #0-3 , "http://list.iqiyi.com/www/4/---1283----------4-1-1-iqiyi--.html" #4-6 , "http://list.iqiyi.com/www/4/---1283----------4-2-1-iqiyi--.html" # 4-6 , "http://list.iqiyi.com/www/4/---305----------4-1-1-iqiyi--.html" #7-13 , "http://list.iqiyi.com/www/4/---305----------4-2-1-iqiyi--.html" # 7-13 , "http://list.iqiyi.com/www/4/---306----------4-1-1-iqiyi--.html" # 14-17 , "http://list.iqiyi.com/www/4/---306----------4-2-1-iqiyi--.html" # 14-17 ] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_iqiyi_children_" + self.today + ".txt")
def __init__(self): print "Do spider all_le_children." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = [ "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511001&md=&o=20&p=1&s=1" #0-6 , "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511001&md=&o=20&p=1&s=1", "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511001&md=&o=20&p=2&s=1", "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511001&md=&o=20&p=3&s=1", "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511002&md=&o=20&p=1&s=1" # 6-12 , "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511002&md=&o=20&p=2&s=1" # 6-12 , "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511002&md=&o=20&p=3&s=1" # 6-12 , "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511003&md=&o=20&p=1&s=1" #12-18 , "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511003&md=&o=20&p=2&s=1" # 12-18 , "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511003&md=&o=20&p=3&s=1" # 12-18 ] self.seedList = [] self.images = {} self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_le_children_" + self.today + ".txt")
def __init__(self): print "Do spider all_le_movie." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = ["http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=1&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=1&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=2&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=3&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=4&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=5&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=6&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=7&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=8&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=9&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=10&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=11&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=12&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=13&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=14&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=15&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=16&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=17&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715" ] self.seedList = [] self.images = {} self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.seqNocnt = 1 self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_le_vipmovie_" + self.today + ".txt")
def __init__(self): print "Do spider ablum_qq_wdsj." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = [ 'http://v.qq.com/vplus/xiaoben520/videos' #小本 , 'http://v.qq.com/vplus/xiaoben520/videos' #小本 , 'http://v.qq.com/vplus/chengzi233/videos' #大橙子 , 'http://v.qq.com/vplus/zhenshiqiguaile/videos' #真是奇怪了 , 'http://v.qq.com/vplus/maxkim0314/videos' #MaxKim , 'http://v.qq.com/vplus/kamu/videos' #卡慕 , 'http://v.qq.com/vplus/yanhuang/videos' #炎黄 , 'http://v.qq.com/vplus/kaiqi/videos' #凯麒 , 'http://v.qq.com/vplus/xiaobaiMC/videos' #T-RO小白 , 'http://v.qq.com/vplus/moon/videos' #明月庄主 , 'http://v.qq.com/vplus/anyijun/videos' #安逸菌 , 'http://v.qq.com/vplus/xiaoxianjun/videos' #小贤菌 , 'http://v.qq.com/vplus/dahaitv/videos' #大海 , 'http://v.qq.com/vplus/youranxiaotian/videos' #悠然小天 , 'http://v.qq.com/vplus/pinkfish0319/videos' #粉鱼 , 'http://v.qq.com/vplus/tianluo/videos' #甜萝酱 , 'http://v.qq.com/vplus/xiaoA0v0/videos' #小A , 'http://v.qq.com/vplus/biantao233/videos' #扁桃 , 'http://v.qq.com/vplus/fou4242/videos' #四二四儿 , 'http://v.qq.com/vplus/fuhao/videos' #负豪 , 'http://v.qq.com/vplus/qingyou/videos' #彼岸清幽 , 'http://v.qq.com/vplus/imaimu/videos' #叆霂 , 'http://v.qq.com/vplus/feixiongMC/videos' #飞熊TV我的世界 , 'http://v.qq.com/vplus/pika233/videos' #皮卡 , 'http://v.qq.com/vplus/xiaoyujun/videos' #小鱼 ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "ablum_qq_wdsj_" + self.today + ".txt")
def __init__(self): self.base = [ { "http://www.bilibili.com/video/movie_west_1.html#!page=1": "145" } #欧美 , { "http://www.bilibili.com/video/movie_west_1.html#!page=1": "145" } #欧美 , { "http://www.bilibili.com/video/movie_japan_1.html!page=1": "146" } #日本 , { "http://www.bilibili.com/video/movie_chinese_1.html!page=1": "147" } #国产 , { "http://www.bilibili.com/video/movie-movie-1.html!page=1": "83" } #其它 ] self.seedList = [] self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_bilibili_movie_" + self.today + ".txt")
def __init__(self): self.base = [ "http://www.acfun.tv/v/list84/index.htm#page=1" #主机单机 , "http://www.acfun.tv/v/list84/index.htm#page=1" #主机单机 , "http://www.acfun.tv/v/list83/index.htm#page=1" #游戏集锦 , "http://www.acfun.tv/v/list145/index.htm#page=1" #电子竞技 , "http://www.acfun.tv/v/list85/index.htm#page=1" #英雄联盟 , "http://www.acfun.tv/v/list170/index.htm#page=1" #守望先锋 , "http://www.acfun.tv/v/list165/index.htm#page=1" #桌游卡牌 , "http://www.acfun.tv/v/list72/index.htm#page=1" #Mugen #,"http://www.acfun.tv/v/list175/index.htm#page=1" #游戏直播 ] self.seedList = [] self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "ablum_acfun_game_" + self.today + ".txt")
def __init__(self): print "Do spider mgtv_short_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = [ "http://www.mgtv.com/v/6/101613/f/1094144.html", 'http://www.mgtv.com/v/6/101613/f/1094144.html' #妹子说热剧 , 'http://www.mgtv.com/v/6/158563/f/1777920.html' #小咖秀 , 'http://www.mgtv.com/v/6/292430/f/3115009.html' #鲜炸社 , 'http://www.mgtv.com/v/6/168030/f/1863425.html' #看够了没 , 'http://www.mgtv.com/v/6/291965/f/3144194.html' #大咖头条 , 'http://www.mgtv.com/v/6/291842/f/3012181.html' #混剪侠 , 'http://www.mgtv.com/v/6/293477/f/3207951.html' #饭爱豆 , 'http://www.mgtv.com/v/6/158354/f/2949121.html' #感觉自己萌萌哒 , 'http://www.mgtv.com/v/6/292435/f/3172352.html' #天天打娱 , 'http://www.mgtv.com/v/6/166144/f/3268355.html' #萌眼看重口 , 'http://www.mgtv.com/v/6/168584/f/2931968.html' #哔哔娱乐秀 , 'http://www.mgtv.com/v/6/167993/f/2973185.html' #马栏山牛人馆 , 'http://www.mgtv.com/v/6/294063/f/3276032.html' #饭团私货朋友圈 , 'http://www.mgtv.com/v/6/293394/f/3267841.html' #红人爱自拍 , 'http://www.mgtv.com/v/6/167785/f/3143680.html' #马栏山剪刀手 , 'http://www.mgtv.com/v/6/150882/f/2928653.html' #问题大了 , 'http://www.mgtv.com/v/6/293268/f/3300386.html' #拐猫蜜 , 'http://www.mgtv.com/v/8/157416/f/1740557.html' #女神TV , 'http://www.mgtv.com/v/6/155923/f/1755650.html' #萝莉侃剧 , 'http://www.mgtv.com/v/6/292983/f/3300867.html' #笑死不偿命 , 'http://www.mgtv.com/v/6/291094/f/3109397.html' #暴走看啥片儿第三季 , 'http://www.mgtv.com/v/6/159440/f/2954242.html' #二更视频 , 'http://www.mgtv.com/v/6/290272/f/3290851.html' #爱豆bibi社 , 'http://www.mgtv.com/v/6/294171/f/3285505.html' #毒舌烧脑研究所 ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "mgtv_short_1_" + self.today + ".txt")
def __init__(self): print "Do spider qq_tv_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = ["http://film.qq.com/cover/f/fdug0j3etx4ioja.html"] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "qq_tv_1_" + self.today + ".txt")
def __init__(self): print "Do spider all_mfsp_tv." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = [] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_mfsp_tv_" + self.today + ".txt")
def __init__(self): print "Do spider a_tmp_seed_spider." self.seedList = [] self.reSeedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "seed" + self.today + ".txt")
def __init__(self): print "Do spider ablum_iqiyi_cartoon_2." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = ["http://www.iqiyi.com/a_19rrhb3xvl.html#vfrm=2-3-0-1" #航海王 ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "ablum_iqiyi_cartoon_2_" + self.today + ".txt")
def __init__(self): print "Do spider beauty_mmwu_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = [ 'http://www.mmwu.tv/vod/opt/new' ] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "beauty_mmwu_1_" + self.today + ".txt")
def __init__(self): print "Do spider le_short_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = ["http://www.le.com/ptv/vplay/24794432.html"] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "le_short_1_" + self.today + ".txt")
def __init__(self): print "Do spider weixindianying." self.data = {} self.seedBase = ["http://www.ppypp.com/dy/index.html"] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "weixindianying" + self.today + ".txt")
def __init__(self): print "Do spider all_ppypp_movie." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = ["http://www.ppypp.com/dy/index.html"] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_ppypp_movie_" + self.today + ".txt")
def __init__(self): print "Do spider youku_short_2." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = [ "http://i.youku.com/u/UMzMzODQ1Njg5Ng", ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "youku_short_2_" + self.today + ".txt")
def __init__(self): print "Do spider beauty_youku_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = [ 'http://www.soku.com/search_video/q_%E7%BE%8E%E5%A5%B3%E5%86%99%E7%9C%9F_limitdate_0?site=14&_lg=10&orderby=2&spm=0.0.0.0.nfPeVF'#美女写真 ,'http://www.soku.com/search_video/q_%E7%BE%8E%E5%A5%B3%E5%86%99%E7%9C%9F_limitdate_0?site=14&_lg=10&orderby=2&spm=0.0.0.0.nfPeVF'#美女写真 ,'http://www.soku.com/search_video/q_%E7%BE%8E%E5%A5%B3%E7%83%AD%E8%88%9E_limitdate_0?site=14&_lg=10&orderby=2&spm=0.0.0.0.NziPkR'#美女热舞 ] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "beauty_youku_1_" + self.today + ".txt")
def __init__(self): print "Do spider all_sohu_cartoon." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = ["http://so.tv.sohu.com/list_p1115_p2_p3_p4_p5_p66_p73_p8_p9_p101_p11_p12_p13.html" #18- ,"http://so.tv.sohu.com/list_p1115_p2_p3_p4_p5_p66_p73_p8_p9_p102_p11_p12_p13.html" #18- ,"http://so.tv.sohu.com/list_p1115_p2_p3_p4_p5_p6_p73_p8_p9_p101_p11_p12_p13.html" ] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_sohu_cartoon_" + self.today + ".txt")
def __init__(self): print "Do spider all_youku_cartoon." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = ["http://list.youku.com/category/show/c_100_s_6_d_1_ag_5.html" #16~ , "http://list.youku.com/category/show/c_100_s_6_d_1_ag_5.html" , "http://list.youku.com/category/show/c_100_s_6_d_1_p_1.html" # new update ] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_youku_cartoon2_" + self.today + ".txt")
def __init__(self): print "Do spider all_iqiyi_viptv." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = ["http://www.iqiyi.com/dianshiju/VIP.html" , "http://www.iqiyi.com/dianshiju/VIP.html" ] self.seedList = [] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.seqNocnt =1 self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_iqiyi_viptv_" + self.today + ".txt")
def __init__(self): self.base=[ {"http://www.bilibili.com/video/ent-variety-1.html#!page=1": "71"} # 综艺 ,{"http://www.bilibili.com/video/ent-variety-1.html#!page=1": "71"} # 综艺 ,{"http://www.bilibili.com/video/ent-circle-1.html!page=1": "137"} # 明星 ,{"http://www.bilibili.com/video/ent-circle-1.html!page=1": "131"} # Korea相关 ] self.seedList = [] self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "amuse_bilibili_1_" + self.today + ".txt")
def __init__(self): print "Do spider ablum_youku_cartoon_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedBase = [] self.seedList = [ "http://www.youku.com/show_page/id_z31c53954e34c11e5a2a2.html" #双星之阴阳师 ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "ablum_youku_cartoon_1_" + self.today + ".txt")
def __init__(self): print "Do spider youku_tv_1." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = PROGRAM_SUB self.seedList = ["http://list.youku.com/show/id_z1800388c336c11e6b432.html" ,"http://list.youku.com/show/id_zfb0e65ccdb6a11e58bfb.html" ,"http://list.youku.com/show/id_z42b9ab741ec511e5b522.html" ,"http://list.youku.com/show/id_z45f85ed6ba6d11e5b522.html" ,"http://list.youku.com/show/id_zfacfcb0cec2511e583e8.html" ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "youku_tv_1_" + self.today + ".txt")
def __init__(self): print "Do spider iqiyi_short_3." self.program = copy.deepcopy(BASE_CONTENT["program"]) self.program_sub = copy.deepcopy(PROGRAM_SUB) self.seedList = [ #"http://www.iqiyi.com/lib/m_209866614.html",#2、晚安,朋友圈 爱奇艺 "http://www.iqiyi.com/playlist295748402.html", #6、神剧亮了 爱奇艺########### "http://www.iqiyi.com/playlist396149102.html", #17、笑不能停 爱奇艺@@@@@@@@@ ] self.dataDir = '.' + os.path.sep + 'data' self.today = time.strftime('%Y%m%d', time.localtime()) self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "iqiyi_short_3_" + self.today + ".txt")