def topic_test(topic_url): Logging.info(u"topic_test:") topic = Topic(topic_url) topic_id = topic.get_topic_id() print topic_id topic_name = topic.get_topic_name() print topic_name question_num = topic.get_question_num() print question_num follower_num = topic.get_follower_num() print follower_num questions = topic.get_questions()
#time.sleep(0.1) def user_spider(user): DataBase.put_user_in_db(user) for follower in user.get_followers(): DataBase.put_user_in_db(follower) DataBase.put_follow_user_in_db(user,follower) if __name__ == '__main__': import sys sys.setrecursionlimit(1000000) THREADS = 10 p = mp.Pool(processes = THREADS) topic = Topic("http://www.zhihu.com/topic/19554927") if not topicBloom.is_element_exist(topic.get_topic_id()): topicBloom.insert_element(topic.get_topic_id()) Worm_status.record_status("topicBloom", topicBloom) DataBase.put_topic_in_db(topic) go = topic.get_questions() num = topic.get_question_num() while num >= 0: try: p.map(spider,itertools.islice(go,20)) except AttributeError: pass finally: num -= 20
Logging.debug("Exist Question") #time.sleep(0.1) def user_spider(user): DataBase.put_user_in_db(user) for follower in user.get_followers(): DataBase.put_user_in_db(follower) DataBase.put_follow_user_in_db(user, follower) if __name__ == '__main__': import sys sys.setrecursionlimit(1000000) THREADS = 10 p = mp.Pool(processes=THREADS) topic = Topic("http://www.zhihu.com/topic/19554927") if not topicBloom.is_element_exist(topic.get_topic_id()): topicBloom.insert_element(topic.get_topic_id()) Worm_status.record_status("topicBloom", topicBloom) DataBase.put_topic_in_db(topic) go = topic.get_questions() num = topic.get_question_num() while num >= 0: try: p.map(spider, itertools.islice(go, 20)) except AttributeError: pass finally: num -= 20
def test_topic(topic_url): Logging.info(u"topic_test:") topic = Topic(topic_url) topic.get_question_num() for topic in topic.get_child(): print topic.get_topic_name()
#!/usr/bin/env python # -*- coding:utf-8 -*- from zhihu_api.Topic import Topic from database_operation.DataBase import DataBase def store(topic): if topic is not None: for item in topic.get_child(): store(item) try: DataBase.put_topic_in_db(topic) print topic.get_topic_name() except: pass if __name__ == "__main__": import sys sys.setrecursionlimit(1000000) rootTopic = Topic("http://www.zhihu.com/topic/19776749") store(rootTopic)