Exemplo n.º 1
0
    def test_rate_limit(self, api, wait=True, buffer=.1):
        """
        Tests whether the rate limit of the last request has been reached.
        :param api: The `tweepy` api instance.
        :param wait: A flag indicating whether to wait for the rate limit reset
                    if the rate limit has been reached.
        :param buffer: A buffer time in seconds that is added on to the waiting
                    time as an extra safety margin.
        :return: True if it is ok to proceed with the next request. False otherwise.
        """
        # Get the number of remaining requests
        remaining = int(api.last_response.headers['x-rate-limit-remaining'])
        # Check if we have reached the limit
        if remaining == 0:
            limit = int(api.last_response.headers['x-rate-limit-limit'])
            reset = int(api.last_response.headers['x-rate-limit-reset'])
            # Parse the UTC time
            reset = datetime.fromtimestamp(reset)
            # Let the user know we have reached the rate limit
            log.info("0 of %d requests remaining until %d.", limit, reset)

            if wait:
                # Determine the delay and sleep
                delay = (reset - datetime.now()).total_seconds() + buffer
                log.info("Sleeping for %d", delay)
                sleep(delay)
                # We have waited for the rate limit reset. OK to proceed.
                return True
            else:
                # We have reached the rate limit. The user needs to handle the
                # rate limit manually.
                return False

        # We have not reached the rate limit
        return True
Exemplo n.º 2
0
 def update_timestamp_data(self, timestamp):
     try:
         update_statement = self.data.update().where(
             self.data.c.data_timestamp == timestamp).values(status=True)
         self.con.execute(update_statement)
     except Exception as e:
         log.error(e)
 def on_status(self, status):
     """ Handle logic when the data coming """
     try:
         tweet = json.loads(status)
         # Update sentiment score
         tweet["sentiment"] = SentimentAnalysis.get_sentiment(tweet_text=tweet["text"])
         self.tw_store.save_tweet(tweet)
     except Exception as e:
         log.error(e)
 def on_data(self, data):
     """ Method to passes data from statuses to the on_status method"""
     if 'in_reply_to_status' in data:
         self.on_status(data)
     elif 'delete' in data:
         delete = json.loads(data)['delete']['status']
         if self.on_delete(delete['id'], delete['user_id']) is False:
             return False
     elif 'limit' in data:
         if self.on_limit(json.loads(data)['limit']['track']) is False:
             return False
     elif 'warning' in data:
         warning = json.loads(data)['warnings']
         log.warning(warning['message'])
         return False
Exemplo n.º 5
0
 def execute(self):
     """Execute the twitter crawler, loop into the keyword_list"""
     while True:
         log.info("Star crawling back....")
         delay = 600
         for keyword in self.keyword_list:
             log.info('Crawl data for %s', keyword["keyword"])
             try:
                 self.crawl(keyword)
             except Exception as e:
                 log.error('Error in Crawling process', exc_info=True)
                 log.info("Sleeping for %ds", delay)
                 sleep(delay)
         # Sleep for 10 minutes after finishing crawl all of the keyword,
         # and start over again
         log.info("Sleeping for %ds...", delay)
         sleep(delay)
Exemplo n.º 6
0
    def connect(self):
        '''Connect to database then set con and meta attributes'''

        # We connect with the help of the PostgreSQL URL
        # postgresql://federer:grandestslam@localhost:5432/tennis
        url = 'postgresql://{}:{}@{}:{}/{}'
        url = url.format(self.user, self.password, self.host, self.port,
                         self.database_name)

        try:
            # The return value of create_engine() is our connection object
            con = sqlalchemy.create_engine(url, client_encoding='utf8')

            # We then bind the connection to MetaData()
            meta = sqlalchemy.MetaData(bind=con, reflect=True)
        except Exception as ex:
            log.error(ex)
            return False

        self.con = con
        self.meta = meta

        return True
 def execute(self):
     """Execute the twitter crawler, loop into the keyword_list
     """
     listen = TwitterStream(self.tw_store)
     stream = tweepy.Stream(self.auth, listen)
     loop = True
     while loop:
         try:
             log.info("Start stream tweets data")
             stream.filter(locations=AUS_GEO_CODE)
             loop = False
             log.info("End stream tweets data")
         except Exception as e:
             log.error("There's an error, sleep for 10 minutes")
             log.error(e)
             loop = True
             stream.disconnect()
             time.sleep(600)
             continue
 def on_timeout(self):
     """ Handle time out when API reach its limit """
     log.info("API Reach its limit, sleep for 10 minutes")
     time.sleep(600)
     return
except:
    hashtags = server["twitter-hashtags"]

# get twitter-words couchdb instance
try:
    words = server.create["twitter-words"]
except:
    words = server["twitter-words"]

# get twitter-users couchdb instance
try:
    user = server.create["twitter-users"]
except:
    user = server["twitter-users"]

log.info("START - Processing analytics data")

analytic_db = AnalyticsLog(database.con, database.meta)
date_list = analytic_db.fetch_unprocessed_data()

# fetch data for individual date
for date_for_analysis in date_list:
    log.info("START - Process data for %s", date_for_analysis)
    view_data = []

    for row in db.view('_design/analytics/_view/tweets-victoria',\
                         startkey=date_for_analysis, endkey=date_for_analysis):
        view_data.append(row.value)

    log.info("Processing %d row of data", len(view_data))
import math
import couchdb

from app.sentiment_analysis import SentimentAnalysis
from app.logger import LOGGER as log

import settings

ALL_DOCS_VIEW = '_all_docs'

try:
    log.info("START db updater script")
    log.info("-----------------------")
    server = couchdb.Server(url=settings.COUCHDB_SERVER)
    db = server[settings.COUCHDB_DB]

    info = db.info()
    doc_count = info["doc_count"]
    num_per_request = 10000

    iteration = math.ceil(doc_count / num_per_request)

    for i in range(iteration):
        log.info('Run %d iteration' % i)
        for row in db.view(ALL_DOCS_VIEW,
                           limit=num_per_request,
                           skip=i * num_per_request):
            data = db.get(row.id)
            data["sentiment"] = SentimentAnalysis.get_sentiment(data["text"])
            db.save(data)
        log.info('%d iteration success')
Exemplo n.º 11
0
 def get_sentiment(tweet_text):
     try:
         analyzer = SentimentIntensityAnalyzer()
         return analyzer.polarity_scores(tweet_text)
     except Exception as e:
         log.error(e)