photo_type = feed[2] s = PhotoSpider(provider = provider) s.parser.assign('photo_type', photo_type) s.setUserAgent() print feed_url for i in range(5): r = feedparser.parse(feed_url) if(r['entries']): break else: #if(r.has_key('bozo_exception')): #print r['bozo_exception'].getMessage() print 'rss feed parse error, retry...' sleep(2) for target in r['entries']: s.append(url = target['link']) print target['link'] s.proceed(1000) # QQ 新闻 queryurl = 'http://news.qq.com/photo.shtml' s = PhotoSpider(provider = 'news.qq') s.setUserAgent() s.append(url = queryurl) s.proceed(1000) # http://www.boston.com/bigpicture/ queryurl = 'http://www.boston.com/bigpicture' s = PhotoSpider(provider = 'boston')
r = feedparser.parse(rss) for target in r['entries']: #print target['link'] s.append(url = target['link']) s.proceed(1000) ''' #推他存档 #queryurl = 'http://imovie.tuita.com/archive' #6 - 电影 queryurl = 'http://jxh1964.tuita.com/archive' #6 - 电影 #queryurl = 'http://yamijazz.tuita.com/archive' #5 - 摄影 #queryurl = 'http://longmaotx.tuita.com/archive' #5 - 摄影 s = PhotoSpider(provider = 'tuita_archive') s.parser.assign('photo_type', 6) s.setUserAgent() s.append(url = queryurl) s.proceed(1000) ''' #点点存档 - 发现的接口 #queryurl = 'http://ump-cn.diandian.com/archive?lite=1&month=201203' #queryurl = 'http://allposter.diandian.com/archive?lite=1&month=201201' queryurl = 'http://movielife.diandian.com/archive?lite=1&month=201102' s = PhotoSpider(provider = 'diandian_archive') s.parser.assign('photo_type', 6) #movie s.setUserAgent() s.append(url = queryurl) s.proceed(1000) '''
#coding: utf8 from wedspider.spider import Spider from photo.photo_spider import PhotoSpider #每日更新 if __name__ == "__main__": # QQ 新闻 queryurl = 'http://news.qq.com/photo.shtml' s = PhotoSpider(provider = 'news.qq') s.setUserAgent() s.append(url = queryurl) s.proceed(1000)
photo_type = feed[2] s = PhotoSpider(provider=provider) s.parser.assign('photo_type', photo_type) s.setUserAgent() print feed_url for i in range(5): r = feedparser.parse(feed_url) if (r['entries']): break else: #if(r.has_key('bozo_exception')): #print r['bozo_exception'].getMessage() print 'rss feed parse error, retry...' sleep(2) for target in r['entries']: s.append(url=target['link']) print target['link'] s.proceed(1000) # QQ 新闻 queryurl = 'http://news.qq.com/photo.shtml' s = PhotoSpider(provider='news.qq') s.setUserAgent() s.append(url=queryurl) s.proceed(1000) # http://www.boston.com/bigpicture/ queryurl = 'http://www.boston.com/bigpicture' s = PhotoSpider(provider='boston') s.setUserAgent() s.append(url=queryurl)