Пример #1
0
        self.user['answers'] = int(d('.profile-navbar a.item').eq(2).find('span.num').text())
        self.user['posts'] = int(d('.profile-navbar a.item').eq(3).find('span.num').text())
        self.user['collections'] = int(d('.profile-navbar a.item').eq(4).find('span.num').text())
        self.user['logs'] = int(d('.profile-navbar a.item').eq(5).find('span.num').text())

        self.user['followings_num'] = int(d('.zm-profile-side-following a.item').eq(0).find('strong').text())
        self.user['followers_num'] = int(d('.zm-profile-side-following a.item').eq(1).find('strong').text())

        self.user['weibo'] = d('a.zm-profile-header-user-weibo').attr('href') or ''

    def get(self):
        self.logger.warning('Start crawling %s', self.user_domain)
        self.get_followings_page()
        self.get_profile()
        while True:
            html = self.get_followings_json(len(self.user['followings']))
            if html:
                self.get_followings_from_html(html)
            else:
                break
            time.sleep(random.uniform(0.5, 2))
        self.logger.warning('Finish crawling %s, %s followings', self.user_domain, len(self.user['followings']))
        return self.user


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    s = Session.get()
    fc = FollowingsCrawler(s, 'zhang-wen-wen-17')
    print(fc.get())
Пример #2
0
 def __init__(self):
     self.session = Session.get()
     self.logger = logging.getLogger(__name__)
Пример #3
0
            d('.profile-navbar a.item').eq(4).find('span.num').text())
        self.user['logs'] = int(
            d('.profile-navbar a.item').eq(5).find('span.num').text())

        self.user['followings_num'] = int(
            d('.zm-profile-side-following a.item').eq(0).find('strong').text())
        self.user['followers_num'] = int(
            d('.zm-profile-side-following a.item').eq(1).find('strong').text())

        self.user['weibo'] = d('a.zm-profile-header-user-weibo').attr(
            'href') or ''

    def get(self):
        self.logger.warning('Start crawling %s', self.user_domain)
        self.get_followings_page()
        self.get_profile()

        self.logger.warning('Finish crawling profile for %s,',
                            self.user_domain)
        return self.user


if __name__ == '__main__':
    config.set_config('env', 'dev')
    logging.basicConfig(level=logging.INFO)
    s = Session.get()
    pc = ProfileCrawler(s, 'jonas-lu')
    user = pc.get()
    for p in user:
        print(p, user[p])
Пример #4
0
 def __init__(self):
     self.session = Session.get()
     self.logger = logging.getLogger(__name__)