def get_image(image_url, actually_store=True): """ This is the job that gets queued when a tweet needs to be analyzed """ redis_queue = utils.get_rq_redis_conn() start = time.time() #Lets store some timing info image = retrieve_image(image_url) #Go get that image if image is not None: #As long as we have a vaid image and its not None (aka Null) image = process_image(image) #Do our image processings if actually_store: #If our configu file says to store the image for reals key = store_to_vipr(image) #store to vipr store_to_redis(key) #and keep track of it in redis end = time.time() # and record how long it took if random.randint(1, 10) < 5: #about 50% of the time we should redis_queue.lpush("stats:execution-times", end - start) #send in an update on execution time redis_queue.incr("stats:tweets-processed") #and also record that we processed another tweet.
def watch_stream(): twitter_creds = utils.twitter_creds() redis_queue = utils.get_rq_redis_conn() hashtag = redis_queue.get("hashtag") q = utils.get_rq() twitter_api = TwitterAPI( consumer_key=twitter_creds['consumer_key'].encode('ascii','ignore'), consumer_secret=twitter_creds['consumer_secret'].encode('ascii','ignore'), access_token_key=twitter_creds['access_token'].encode('ascii','ignore'), access_token_secret=twitter_creds['token_secret'].encode('ascii','ignore') ) #setup the twitter streaming connectors. watcher_logger.info("Waiting for tweets...") while True: try: for tweet in twitter_api.request('statuses/filter', {'track': hashtag}).get_iterator(): #for each one of thise if hashtag != redis_queue.get("hashtag"): watcher_logger.info("Hashtag changed from {}, breaking loop to restart with new hashtag".format(hashtag)) hashtag = redis_queue.get("hashtag") break #watcher_logger.debug("Tweet Received: {}".format(hashtag)) #Log it redis_queue.incr("stats:tweets") #Let Redis know we got another one. watcher_logger.debug("received tweet with tag {}".format(hashtag)) try: if tweet['entities']['media'][0]['type'] == 'photo': #Look for the photo. If its not there, will throw a KeyError, caught below if 'retweeted' not in tweet: watcher_logger.info("Tweet was a RT - ignoring") continue watcher_logger.info("Dispatching tweet ({}) with URL {}".format(hashtag,tweet['entities']['media'][0]['media_url'])) # log it q.enqueue( get_image, tweet['entities']['media'][0]['media_url'], ttl=60, result_ttl=60, timeout=60 ) #add a job to the queue, calling get_image() with the image URL and a timeout of 60s except KeyError as e: watcher_logger.debug("Caught a key error for tweet, expected behavior, so ignoring: {}".format(e.message)) except Exception as e: watcher_logger.critical("UNEXPECTED EXCEPTION: {}".format(e)) except httplib.IncompleteRead as e: watcher_logger.warn("HTTP Exception {}".format(e)) except ProtocolError as e: watcher_logger.warn("Protocol Exception {}".format(e))
from rq.decorators import job #funtion decoration import dweepy #see dweet.io import logging, logging.config import utils import datetime logging.config.dictConfig(utils.get_log_dict()) logger = logging.getLogger('vascodagama.dashboard') redis_images = utils.get_images_redis_conn() r = utils.get_rq_redis_conn() q = utils.get_rq() #Setup our redis and RQ connections. see twitter_watch for more details. configstuff = utils.configstuff() @job("dashboard", connection=r, timeout=10, result_ttl=10) def send_update(metric, value): #method for sending updates about metrics as needed. logger.debug("Sending update for {}: {}".format(metric, value)) dweepy.dweet_for(configstuff['dweet_thing'], {metric: value}) def update_dashboard(): # the primary function. logger.info("updating")
import json #json functions logging.config.dictConfig(utils.get_log_dict()) worker_logger = logging.getLogger("vascodagama.worker") watcher_logger = logging.getLogger("vascodagama.watcher") logger = logging.getLogger('vascodagama.images') #setup flask app = Flask(__name__) #connect to redis redis_images = utils.get_images_redis_conn() #Setup a connection that will be used by RQ (each redis connection instance only talks to 1 DB) redis_queue = utils.get_rq_redis_conn() #gets a list of random URLS from refis. def get_random_urls(count=100): pipe_keys = redis_images.pipeline() #setup 2 batches pipe_urls = redis_images.pipeline() keys = [] for i in range(0, count): # get 'count' random keys pipe_keys.randomkey() for key in pipe_keys.execute(): #for each one of those random keys pipe_urls.hget(key, "url") #get the URL property. urls = pipe_urls.execute() #the list of URLs is the result. return list(set(urls)) #return it.
import json #json functions logging.config.dictConfig(utils.get_log_dict()) worker_logger = logging.getLogger("vascodagama.worker") watcher_logger = logging.getLogger("vascodagama.watcher") logger = logging.getLogger('vascodagama.images') #setup flask app = Flask(__name__) #connect to redis redis_images = utils.get_images_redis_conn() #Setup a connection that will be used by RQ (each redis connection instance only talks to 1 DB) redis_queue = utils.get_rq_redis_conn() #gets a list of random URLS from refis. def get_random_urls(count=100): pipe_keys = redis_images.pipeline() #setup 2 batches pipe_urls = redis_images.pipeline() keys = [] for i in range(0,count): # get 'count' random keys pipe_keys.randomkey() for key in pipe_keys.execute(): #for each one of those random keys pipe_urls.hget(key,"url") #get the URL property. urls = pipe_urls.execute() #the list of URLs is the result. return list(set(urls)) #return it.
from rq.decorators import job #funtion decoration import dweepy #see dweet.io import logging, logging.config import utils import datetime logging.config.dictConfig(utils.get_log_dict()) logger = logging.getLogger('vascodagama.dashboard') redis_images = utils.get_images_redis_conn() r = utils.get_rq_redis_conn() q = utils.get_rq() #Setup our redis and RQ connections. see twitter_watch for more details. configstuff = utils.configstuff() @job("dashboard", connection=r, timeout=10, result_ttl=10) def send_update(metric, value): #method for sending updates about metrics as needed. logger.debug("Sending update for {}: {}".format(metric, value)) dweepy.dweet_for(configstuff['dweet_thing'], {metric: value}) def update_dashboard(): # the primary function. logger.info("updating") #For each one of the metrics, collected the data and issue a job to actually send that out. get_queue_len() # logging.debug("{}: {}".format("tweets in queue",tweets_in_queue))