Пример #1
0
                pagetoken = '&pagetoken=%s' % josn_response['next_page_token']
                url = re.sub(r'&pagetoken=.*', pagetoken, url)
                save_url_ToQueue(url)
                # self.parse_html(url)
            else:
                pass
        elif status == 'OVER_QUERY_LIMIT':
            self.key = self.app_keys_pop()
            url = re.sub(r'&key=.*&pagetoken', '&key=%s&pagetoken' % self.key,
                         url)
            save_url_ToQueue(url)
            # self.parse_html(url)
        else:
            return


# from datetime import datetime

if __name__ == "__main__":

    # border_location_right = {
    #     "lat": 24.970,
    #     "lng": 121.666
    # }
    # print gpp.get_url(app_keys[0], border_location_right, 'food')
    name_red = 'testA'
    redis_que = RedisQueues(name_red)
    print redis_que.length()
    gpp = GooglePlacesParser(redis_que)
    gpp.save_url_ToQueue()
    print redis_que.length()
Пример #2
0
        # time.sleep(60)

    print 'the information about all people has saved!'
    print '完成 退出'


# url = 'https://s3.amazonaws.com/codecave/tut1.html'
# url= 'http://www.douban.com/group/search?cat=1019&q=%E6%9C%88%E5%AB%82'
# data_words = ['妈妈','宝宝','怀孕','待产包','月子餐','月嫂']
if __name__ == '__main__':
    # pool = multiprocessing.Pool(processes=4)
    start = time.clock()
    dbs = MongoCRUD()
    data_words = ['妈妈', '宝宝', '怀孕', '待产包', '月子餐', '月嫂', '妈咪']
    data_words = ['妈妈']
    group_queue = RedisQueues('GROUP_QUEUE1')
    group_queue.delete()
    # all = get_url_all(data_words,group_queue)
    # get_url_all(data_words,group_queue,dbs)
    # print len(all)
    # print '豆瓣用户UID存入数据库'
    # dbs.people_url_insert(all)
    save_data()
    elapsed = (time.clock() - start)
    print("Time used:", elapsed)
    '''
    people_queue = RedisQueues('PEOPLE_QUEUE1')

    dbs = MongoCRUD()
    # get_data(data_words,group_queue,people_queue)
    people_all =get_data(data_words,group_queue,people_queue)