Пример #1
0
                if user:
                    logging.exception("exception for user %s" % user.to_d())
                else:
                    logging.exception("exception and user is None")
            logging.info("api calls remaining: %d", self.twitter.remaining)
        print "slave is done"

    def crawl(self, user):
        logging.debug("visiting %s - %s", user._id, user.screen_name)
        tweets = self.twitter.save_timeline(user._id, user.last_tid)
        if tweets:
            user.last_tid = tweets[0]._id
        now = datetime.utcnow()
        last = user.last_crawl_date if user.last_crawl_date is not None else datetime(
            2010, 11, 12)
        delta = now - last
        seconds = delta.seconds + delta.days * 24 * 3600
        tph = (3600.0 * len(tweets) / seconds + user.tweets_per_hour) / 2
        user.tweets_per_hour = tph
        hours = min(settings.tweets_per_crawl / tph, settings.max_hours)
        user.next_crawl_date = now + timedelta(hours=hours)
        user.last_crawl_date = now
        user.save()


if __name__ == '__main__':
    User.database = CouchDB(settings.couchdb_root + "houtx_user", True)
    proc = CrawlMaster()
    create_slaves(CrawlSlave, proc.todo, proc.done)
    proc.run()
Пример #2
0
            except Exception as ex:
                if user:
                    logging.exception("exception for user %s"%user.to_d())
                else:
                    logging.exception("exception and user is None")
            logging.info("api calls remaining: %d",self.twitter.remaining)
        print "slave is done"

    def crawl(self, user):
        logging.debug("visiting %s - %s",user._id,user.screen_name)
        tweets = self.twitter.save_timeline(user._id, user.last_tid)
        if tweets:
            user.last_tid = tweets[0]._id
        now = datetime.utcnow()
        last = user.last_crawl_date if user.last_crawl_date is not None else datetime(2010,11,12)
        delta = now - last
        seconds = delta.seconds + delta.days*24*3600
        tph = (3600.0*len(tweets)/seconds + user.tweets_per_hour)/2
        user.tweets_per_hour = tph
        hours = min(settings.tweets_per_crawl/tph, settings.max_hours)
        user.next_crawl_date = now+timedelta(hours=hours)
        user.last_crawl_date = now
        user.save()


if __name__ == '__main__':
    User.database = CouchDB(settings.couchdb_root+"houtx_user",True)
    proc = CrawlMaster()
    create_slaves(CrawlSlave, proc.todo, proc.done)
    proc.run()
Пример #3
0
                   jobs[u].rfriends_score = RFRIEND_POINTS/len(rfriends)

        if tweets:
            ats = defaultdict(int)
            for tweet in tweets:
                for uid in tweet.mentions:
                    ats[uid]+=1
            for u,c in ats.iteritems():
                points = c*MENTION_POINTS
                if points >0:
                    jobs[u].mention_score = points

        for k,j in jobs.iteritems():
            j._id = k
            j.put(self.stalk)


if __name__ == '__main__':
    if len(sys.argv) >1:
        if sys.argv[1]=='m':
            proc = LookupMaster()
        elif sys.argv[1]=='s':
            proc = LookupSlave('x')
        elif sys.argv[1]=='c':
            create_slaves(LookupSlave, prefix="x")
    else:
        print "spawning minions!"
        create_slaves(LookupSlave)
        proc = LookupMaster()
    proc.run()
Пример #4
0
            if len(rfriends) < RFRIEND_POINTS:
                for u in rfriends:
                   jobs[u].rfriends_score = RFRIEND_POINTS/len(rfriends)

        if tweets:
            ats = defaultdict(int)
            for tweet in tweets:
                for uid in tweet.mentions:
                    ats[uid]+=1
            for u,c in ats.iteritems():
                points = c*MENTION_POINTS
                if points >0:
                    jobs[u].mention_score = points

        for k,j in jobs.iteritems():
            j._id = k
            j.put(self.stalk)


if __name__ == '__main__':
    if len(sys.argv) >1:
        if sys.argv[1]=='m':
            proc = LookupMaster()
        elif sys.argv[1]=='s':
            proc = LookupSlave('x')
    else:
        print "spawning minions!"
        create_slaves(LookupSlave)
        proc = LookupMaster()
    proc.run()
Пример #5
0
            if len(rfriends) < RFRIEND_POINTS:
                for u in rfriends:
                    jobs[u].rfriends_score = RFRIEND_POINTS / len(rfriends)

        if tweets:
            ats = defaultdict(int)
            for tweet in tweets:
                for uid in tweet.mentions:
                    ats[uid] += 1
            for u, c in ats.iteritems():
                points = c * MENTION_POINTS
                if points > 0:
                    jobs[u].mention_score = points

        for k, j in jobs.iteritems():
            j._id = k
            j.put(self.stalk)


if __name__ == '__main__':
    if len(sys.argv) > 1:
        if sys.argv[1] == 'm':
            proc = LookupMaster()
        elif sys.argv[1] == 's':
            proc = LookupSlave('x')
    else:
        print "spawning minions!"
        create_slaves(LookupSlave)
        proc = LookupMaster()
    proc.run()