pagetoken = '&pagetoken=%s' % josn_response['next_page_token'] url = re.sub(r'&pagetoken=.*', pagetoken, url) save_url_ToQueue(url) # self.parse_html(url) else: pass elif status == 'OVER_QUERY_LIMIT': self.key = self.app_keys_pop() url = re.sub(r'&key=.*&pagetoken', '&key=%s&pagetoken' % self.key, url) save_url_ToQueue(url) # self.parse_html(url) else: return # from datetime import datetime if __name__ == "__main__": # border_location_right = { # "lat": 24.970, # "lng": 121.666 # } # print gpp.get_url(app_keys[0], border_location_right, 'food') name_red = 'testA' redis_que = RedisQueues(name_red) print redis_que.length() gpp = GooglePlacesParser(redis_que) gpp.save_url_ToQueue() print redis_que.length()
# time.sleep(60) print 'the information about all people has saved!' print '完成 退出' # url = 'https://s3.amazonaws.com/codecave/tut1.html' # url= 'http://www.douban.com/group/search?cat=1019&q=%E6%9C%88%E5%AB%82' # data_words = ['妈妈','宝宝','怀孕','待产包','月子餐','月嫂'] if __name__ == '__main__': # pool = multiprocessing.Pool(processes=4) start = time.clock() dbs = MongoCRUD() data_words = ['妈妈', '宝宝', '怀孕', '待产包', '月子餐', '月嫂', '妈咪'] data_words = ['妈妈'] group_queue = RedisQueues('GROUP_QUEUE1') group_queue.delete() # all = get_url_all(data_words,group_queue) # get_url_all(data_words,group_queue,dbs) # print len(all) # print '豆瓣用户UID存入数据库' # dbs.people_url_insert(all) save_data() elapsed = (time.clock() - start) print("Time used:", elapsed) ''' people_queue = RedisQueues('PEOPLE_QUEUE1') dbs = MongoCRUD() # get_data(data_words,group_queue,people_queue) people_all =get_data(data_words,group_queue,people_queue)