f.write('\n') return add_items if __name__ == "__main__": obj_spider = Spider() obj_sender = Sender() url = "http://s.dianping.com/event/nanjing" while True: try: hour = int(time.strftime('%H')) print time.strftime('%Y%m%d %H:%M:%S') if hour >= 9 and hour <= 18: new_data = obj_spider.craw(url) add_items = check(new_data) if(len(add_items) > 0): print "New items!!!" title = time.strftime('%Y%m%d %H:%M:%S') + '--ubuntu' content = "\n".join(add_items.values()) obj_sender.send_email(title, content) else: print "No change." time.sleep(20) elif hour < 8 or hour > 20: time.sleep(3598) else: time.sleep(1000) except Exception, e:
f.write('\n') return add_items if __name__ == "__main__": obj_spider = Spider() obj_sender = Sender() url1 = "https://hz.lianjia.com/ershoufang/rs%E5%8D%97%E5%B2%B8%E6%99%B6%E9%83%BD%E8%8A%B1%E5%9B%AD/" url2 = "https://hz.lianjia.com/ershoufang/rs%E9%A3%8E%E6%99%AF%E8%9D%B6%E9%99%A2/" while True: try: hour = int(time.strftime('%H')) print time.strftime('%Y%m%d %H:%M:%S') if 0 < hour < 24: new_data = obj_spider.craw(url1) new_data.extend(obj_spider.craw(url2)) add_items = check(new_data) if len(add_items) > 0: print "New items!!!" title = time.strftime('%Y%m%d %H:%M:%S') content = '' for house in add_items: content += house['title'] + house['price'] + house[ 'house_info'] + '\n' obj_sender.send_email(title, content) else: print "No change." time.sleep(20) else: time.sleep(3600)
from spider import Spider from spiders.wdyw import spider as wdyw_spider # 这是程序入口 # 任务还要求实现控制台交互程序,因为比较简单,这里我就不实现了 # 创建 Spider 对象 spider = Spider() # 注册武大要闻爬虫,当然你可以实现新的爬虫,一同注册到这个 spider 对象上 spider.register('武大要闻', wdyw_spider) # 使能所有注册的爬虫 spider.enable_all() # 开始爬取,并且设置按照 date 逆序排序 spider.craw(order='date', reverse=True)
# coding: utf-8 from spider import Spider if __name__ == '__main__': root_url = "https://baike.baidu.com/item/Python" spider = Spider() spider.craw(root_url)