def fetch_one_url(url): env_obj = SpiderEnv(url) env = yield env_obj.gen_env() urlSeek = BaseSpider(env, app) yield urlSeek.work() for url in urlSeek.urlLists: Log4Spider.infoLog(url) Log4Spider.infoLog(len(urlSeek.urlLists))
def fetch_one_url(url): env_obj = SpiderEnv(url) env = yield env_obj.gen_env() urlSeek = Jd_Home_Spider(env,app) yield urlSeek.work() for url in urlSeek.urlLists: Log4Spider.infoLog(url) Log4Spider.infoLog(len(urlSeek.urlLists))
def downPicTest(): for url in [ "http://upload-images.jianshu.io/upload_images/1679702-7e810a34f3ef8d18.jpg?imageMogr2/auto-orient/strip%7CimageView2/1/w/300/h/300", "https://asearch.alicdn.com/bao/uploaded/i1/146280142867863617/TB2.atGhFXXXXcoXXXXXXXXXXXX_!!15874628-0-saturn_solar.jpg_210x210.jpg", "http://pic18.wed114.cn/20140923/2014092312515083.jpg" ]: env_obj = SpiderEnv(url) env = yield env_obj.gen_env() spider = PicDownSpider(env, None) yield spider.work()
def fetch_one_url(url): env_obj = SpiderEnv(url) env = yield env_obj.gen_env() urlSeek = Jd_Item_Spider(env,app) yield urlSeek.work() for url in urlSeek.urlLists: Log4Spider.infoLog(url) Log4Spider.infoLog(len(urlSeek.urlLists)) global num num-=1 if num == 0: event.set()
def fetch_one_url(url): env_obj = SpiderEnv(url) env = yield env_obj.gen_env() urlSeek = Jd_Item_Spider(env, app) yield urlSeek.work() for url in urlSeek.urlLists: Log4Spider.infoLog(url) Log4Spider.infoLog(len(urlSeek.urlLists)) global num num -= 1 if num == 0: event.set()
def main(): for url in [ "http://www.jianshu.com", "http://upload-images.jianshu.io/upload_images/1679702-7e810a34f3ef8d18.jpg?imageMogr2/auto-orient/strip%7CimageView2/1/w/300/h/300" ]: env_obj = SpiderEnv(url) env = yield env_obj.gen_env() urlSeek = UrlSeekSpider(env, None) yield urlSeek.work() for url in urlSeek.urlLists: Log4Spider.infoLog(url) Log4Spider.infoLog(len(urlSeek.urlLists))
def run(self): while True: url = yield self.queue.get() Log4Spider.debugLog(self, "get url:", url) try: env = yield SpiderEnv(url).gen_env() except Exception as e: Log4Spider.errLog(self, "spider env failed url:", url, "exception:", e) continue self._find_url_handler(url) Log4Spider.infoLog(self, "url: ", url, " --- class: ", self.handler_class) spider = self.handler_class(env, self.application, **self.handler_kwargs) yield spider.work() for url in spider.urlLists: Log4Spider.debugLog(self, "put url:", url) yield self.queue.put(url)
def main(): env = SpiderEnv("http://www.taobao.com") yield env.gen_env() print(env.env)