示例#1
0
    def __init__(self, hashtag=None, size=10, interval=1):
        self.url = data.engine_url
        self.hashtag = hashtag
        self.size = size

        self.interval = interval
        self.leader_proc = RepeatingTimer(self.interval, self.leader_print)
示例#2
0
    def __init__(self, engine_url):
        self.url = engine_url

        self.cpt = 0  # Used to force data flushing to db
        self.interval = 1  # repeats every second by default

        # sets up logger
        self.logger = self.setup_logger()

        # element that repeats count method periodically
        self.count_unit = RepeatingTimer(self.interval, self.count)
示例#3
0
class Counter():
    def __init__(self, engine_url):
        self.url = engine_url

        self.cpt = 0  # Used to force data flushing to db
        self.interval = 1  # repeats every second by default

        # sets up logger
        self.logger = self.setup_logger()

        # element that repeats count method periodically
        self.count_unit = RepeatingTimer(self.interval, self.count)

    def setup_logger(self):
        """
        Sets up logging capabilities. Defines precisely how and where logging
        information will be displayed and saved
        """
        my_logger = logging.getLogger("Counter")

        my_logger.setLevel(logging.ERROR)
        fh = logging.FileHandler(data.log_path)  # file part of logger
        fh.setLevel(logging.ERROR)

        #ch = logging.StreamHandler()  # console part of the logger
        #ch.setLevel(logging.DEBUG)

        my_logger.addHandler(fh)
        #my_logger.addHandler(ch)
        my_logger.info("########")   # Separate sessions

        return my_logger

    def connect(self):
        """
        Separated so that the method can be run in each created thread.
        Initiates connexion to the database and starts a Session to be used to query it.
        Returns the session used to communicate with the database
        """
        # creates engine, tries to create all the tables needed later on
        engine = create_engine(self.url, echo=data.debug)
        # initiates session to the database, tries to create proper session
        Session = sessionmaker(bind=engine)

        return Session(), engine  # Bridges class to db

    def start(self):
        """
        Starts counting new members periodically
        """
        self.count_unit.start()

    def stop(self):
        """
        Stops counting new members periodically
        """
        self.count_unit.stop()

    def display_tweets(self):
        """
        debug

        Every time is it called, perform a check of the database, searches
        for elements that have not been crawled yet and displays them.
        """
        session, engine = self.connect()
        query = session.query(Tweet).order_by(Tweet.id)
        for tweet in query:
            self.logger.info(tweet.hashtag + " " + tweet.author)

        session.close()
        engine.dispose()

    def count(self):
        """
        Every time is it called, perform a check of the database, searches
        for elements that have not been crawled yet.
        They are then added to the members database.
        """
        #print('.')
        self.logger.info("((((((((((((((((((((((( COUNTING )))))))))))))))))))))))))))")
        session, engine = self.connect()

        t_query = session.query(Tweet).filter(Tweet.crawled == False).order_by(Tweet.id)
        tweets = t_query.all()
        self.logger.info("New counts to perform : %d" % (len(tweets)))
        # FIXME: This is blocking. It shouldnt!
        for tweet in tweets:
            try:
                t_hash = tweet.hashtag
                t_auth = tweet.author
                m_query = session.query(Member).filter(Member.author == t_auth).filter(Member.hashtag == t_hash)

                # Checking if we already have such a member
                reslen = len(m_query.all())
                if reslen == 1:
                    self.logger.info("Member found, updating. . .")
                    self.update(session, m_query.first(), tweet)
                elif reslen == 0:
                    self.logger.info("No member found, creating. . .")
                    self.create(session, tweet)
                else:
                    self.logger.error("ElementException :  More than one member found !")
                    raise ElementException  # FIXME : Take care
            except ElementException:
                self.invalidate(session, tweet)
                self.logger.error("ElementException :  Could not process %s !" % (tweet))

            self.commit_counts(session)

        session.close()
        engine.dispose()

    def invalidate(self, session, tweet):
        """
        Invalidates a tweet so that it is not recrawled by the counter
        and can be verified later
        """
        tweet.invalid = True
        tweet.crawled = True
        session.add(tweet)

        self.cpt += 1  # indicates that we have a candidiate for the flushing

    def update(self, session, member, tweet):
        """
        Updates member values.
        Increments counter by 1, and changes updated field
        """
        if (member.has_author() and member.has_hashtag()):
            member.update()
            session.add(member)

            # sets tweet to crawled state
            tweet.crawled = True
            session.add(tweet)

            self.cpt += 1
        else:
            self.logger.error("ElementException :  Cannot update Member, Member is not valid !")
            raise ElementException  # FIXME : Take care

    def create(self, session, tweet):
        """
        Creates a new Member using data from the given Tweet
        Called when no Member is found for the current
        author/hashtag couple.
        """
        if (tweet.has_author() and tweet.has_hashtag()):
            member = Member(tweet.author, tweet.hashtag, 1)
            session.add(member)

            # sets tweet to crawled state
            tweet.crawled = True
            session.add(tweet)

            self.cpt = 1
        else:
            self.logger.error("ElementException :  Cannot create Member, Tweet is not valid !")
            raise ElementException  # FIXME : Take care

    def member_show(self, num=20):
        """
        debug

        Returns the number of Members in table
        """
        self.logger.info("#########################################")

        session, engine = self.connect()
        self.member_count()
        query = session.query(Member).order_by(Member.id).all()
        ptr = 0
        for q in query:
            ptr += 1
            if ptr < num:
                self.logger.info(q)

        session.close()
        engine.dispose()

    def member_count(self):
        """
        debug

        Returns the number of Members in table
        """
        session, engine = self.connect()
        query = session.query(Member).order_by(Member.id).all()

        self.logger.info("Members: %d" % (len(query)))

        session.close()
        engine.dispose()

    def commit_counts(self, session):
        """
        Commits data to db if enough data has to be updated

        FIXME: By not commiting every time, we might have duplicates
        if the same guy tweets several times with the same flag
        """
        #pass
        limit = 1
        if self.cpt >= limit:
            session.commit()  # force saving changes
            self.cpt = 0
示例#4
0
class LeaderBoard():
    def __init__(self, hashtag=None, size=10, interval=1):
        self.url = data.engine_url
        self.hashtag = hashtag
        self.size = size

        self.interval = interval
        self.leader_proc = RepeatingTimer(self.interval, self.leader_print)

    def start(self):
        """
        Starts counting new members periodically
        """
        self.leader_proc.start()

    def stop(self):
        """
        Stops counting new members periodically
        """
        self.leader_proc.stop()

    def connect(self):
        """
        Initiates connexion to the database and starts a Session to be used to
        query it.
        Returns the session used to communicate with the database
        """
        # creates engine, tries to create all the tables needed later on
        engine = create_engine(self.url, echo=data.debug)
        # initiates session to the database, tries to create proper session
        Session = sessionmaker(bind=engine)

        return Session(), engine  # Bridges class to db

    def get_hashtags(self, session):
        """
        Returns a list of all current active hashtags
        Session can directly be used to database queries
        """
        h_query = session.query(TrendyHashtag).filter(TrendyHashtag.active == True).order_by(desc(TrendyHashtag.created))
        hashtags = h_query.all()
        return [h.hashtag for h in hashtags]

    def get_leaders(self):
        """
        Returns a list of the current leaders for the chosen hashtags.
        self.hashtag can be either the hashtag that has to be printed or None.
        If self.hashtag is None, the leaders for all current tracked hashtags will be outputed.


        The result will be a list of twitter usernames, with the current number
        of tweets containing the hashtag they sent.
        The list is of max size size, but can be smaller of even empty if no
        user has been detected yet.

        The result is of type :
        [[#hashtag1, [Leader1, Leader2, ...]],  [#hashtag2, [Leader1, Leader2, ...]]]
        #FIXME : Ugly and done in the plane. Get back to this soon .
        """
        session, engine = self.connect()
        leaders = []
        if self.hashtag is None:
            hashtags = self.get_hashtags(session)
        else:
            hashtags = [self.hashtag]

        for h in hashtags:
            top_members = self.get_hashtag_leaders(session, h, self.size)
            leaders.append([h, top_members])

        session.close()
        engine.dispose()

        return leaders

    def get_hashtag_leaders(self, session, hashtag, size=10):
        """
        Returns the current leaders of the competition for the given hashtag
        The result will be a list of twitter usernames, with the current number
        of tweets containing the hashtag they sent.
        The list is of max size size, but can be smaller of even empty if no
        user has been detected yet.
        """
        l_query = session.query(Member).filter(Member.hashtag == hashtag).order_by(desc(Member.count)).limit(size)
        leaders = l_query.all()
        if size > 0:
            leaders = leaders[0:size]

        return leaders