def returns_new_instance_if_config_provided(self): Application._instance = None config = MagicMock() inst1 = Application.instance(config) inst2 = Application.instance(config) self.assertNotEqual(inst1, inst2)
def fetch_articles(num_urls=None): app = Application.instance() most_popular = app.providers["nytimes_mostpopular"] if not num_urls: num_urls = len(most_popular.api_urls) data = most_popular.fetch_many(num_urls) app.article_store.save_articles("nytimes_mostpopular", data, most_popular.config["category_max_articles"])
def main(): num_changes = 0 num_articles = 0 num_dupes_removed = 0 config = get_periodic_shell_config() app = Application.instance(config) most_popular = app.providers["nytimes_mostpopular"] redis = Redis(connection_pool=app._redis_pool) sorted_sets = redis.keys("sorted*") sorted_sets.sort() for sorted_name in sorted_sets: collection, category = LABEL_PATTERN.match(sorted_name).groups() label = ".".join([collection, category]) logger.info("Making changes for: {0}".format(label)) data = redis.zrange(sorted_name, 0, -1, withscores=True) old_url = None new_url = None for article_json, score in data: num_articles += 1 article = json.loads(article_json) old_url = article["url"] uri = furl(old_url) for key, val in most_popular.config.get("url_decoration", {}).iteritems(): uri.args[key] = val new_url = uri.url if new_url != old_url: num_changes += 1 article_json = article_json.decode("utf8") new_article_json = article_json.replace(old_url.replace("/", "\\/"), new_url.replace("/", "\\/")) # persist change in sorted set redis.zrem(sorted_name, article_json) redis.zadd(sorted_name, new_article_json, score) # persis change in set set_name = "set.{0}".format(label) redis.srem(set_name, old_url) redis.sadd(set_name, new_url) sorted_num = redis.zcard(sorted_name) set_num = redis.scard(set_name) count = int(redis.hget(".".join(["counts", collection]), category)) if (set_num != sorted_num): logger.warn("set/sortedset mismatch: set:{0} sorted:{1}".format(set_num, sorted_num)) if set_num != count: num_dupes_removed += abs(count - set_num) redis.hset(collection, category, set_num) logger.info("num article changes: {0}/{1}".format(num_changes, num_articles)) logger.info("num dupes removed: {0}".format(num_dupes_removed))
def personalize(): """ Return results based on user query Request Content-Type must be set to application/json The payload must be a json ascii string. e.g. {"Arts":0.9,"Autos":0.9} """ limit = request.args.get("limit", 20) try: limit = int(limit) except ValueError: return Response(response=ERR_INVALID_QUERY, status=400) if limit < 0: return Response(response=ERR_INVALID_QUERY, status=400) elif limit > MAX_RESPONSE_SIZE: limit = MAX_RESPONSE_SIZE query = request.get_json() if query is None: return Response(response=ERR_NO_QUERY, status=400) elif type(query) != dict: return Response(response=ERR_INVALID_QUERY, status=400) numbers = {} weight_total = 0.0 for category, weight in query.iteritems(): if type(weight) not in (float, int) or weight < 0 or weight > 1: return Response(response=ERR_INVALID_QUERY, status=400) # weight_total should always be float weight_total += weight for category, weight in query.iteritems(): numbers[category] = int(math.ceil(weight / weight_total * limit)) # collect all recommendations for each category app = Application.instance() recommendations = [] for category in numbers: recommendations.extend( app.article_store.fetch("nytimes_mostpopular", category, limit=numbers[category], withscores=True)) # sort and deduplicate recommendations. store results in articles array recommendations.sort(key=lambda x: x["score"], reverse=True) articles = [] url_set = set() for article in recommendations: if article["url"] not in url_set: del article["score"] articles.append(article) url_set.add(article["url"]) # inforce articles limit if len(articles) >= limit: break return jsonify(d=articles, num_articles=len(articles))
def index(): """ Return a listing of available interests """ app = Application.instance() categories = app.article_store.get_category_counts("nytimes_mostpopular") for category, score in categories.iteritems(): categories[category] = int(score) return jsonify(d=categories)
def main(): config = get_periodic_shell_config() app = Application.instance(config) most_popular = app.providers["nytimes_mostpopular"] num_urls = len(most_popular.api_urls) logger.info("Populating data store with data with {0} API calls".format(num_urls)) logger.info("Fetching data") data = most_popular.fetch_many(num_urls) if config.server["purge"]: logger.info("Clearing the datastore") app.article_store.clear_all() logger.info("Saving data") app.article_store.save_articles("nytimes_mostpopular", data)
def personalize(): """ Return results based on user query Request Content-Type must be set to application/json The payload must be a json ascii string. e.g. {"Arts":0.9,"Autos":0.9} """ limit = request.args.get("limit", 20) try: limit = int(limit) except ValueError: return Response(response=ERR_INVALID_QUERY, status=400) if limit < 0: return Response(response=ERR_INVALID_QUERY, status=400) elif limit > MAX_RESPONSE_SIZE: limit = MAX_RESPONSE_SIZE query = request.get_json() if query is None: return Response(response=ERR_NO_QUERY, status=400) elif type(query) != dict: return Response(response=ERR_INVALID_QUERY, status=400) numbers = {} weight_total = 0 for category, weight in query.iteritems(): if type(weight) not in (float, int) or weight < 0 or weight > 1: return Response(response=ERR_INVALID_QUERY, status=400) weight_total += weight for category, weight in query.iteritems(): numbers[category] = int(math.floor(weight/weight_total*limit)) articles = [] url_set = set() app = Application.instance() sorted_categories = sorted(numbers, reverse=True) for category in sorted_categories: batch = app.article_store.fetch("nytimes_mostpopular", category, numbers[category]) for article in batch: if article["url"] not in url_set: articles.append(article) url_set.add(article["url"]) return jsonify(d=articles,num_articles=len(articles))
def main(): config = get_periodic_shell_config() app = Application.instance(config) most_popular = app.providers["nytimes_mostpopular"] num_urls = len(most_popular.api_urls) logger.info( "Populating data store with data with {0} API calls".format(num_urls)) logger.info("Fetching data") data = most_popular.fetch_many(num_urls) if config.server["purge"]: logger.info("Clearing the datastore") app.article_store.clear_all() logger.info("Saving data") app.article_store.save_articles("nytimes_mostpopular", data)
def fetch_interest(interest_name): """ Returns articles belonging to the provided interest name """ limit = request.args.get("limit", 20) try: limit = int(limit) except ValueError: return Response(response=ERR_INVALID_QUERY, status=400) if limit < 0: return Response(response=ERR_INVALID_QUERY, status=400) elif limit > MAX_RESPONSE_SIZE: limit = MAX_RESPONSE_SIZE app = Application.instance() articles = app.article_store.fetch("nytimes_mostpopular", interest_name, limit) return jsonify(d=articles,num_articles=len(articles))
def fetch_interest(interest_name): """ Returns articles belonging to the provided interest name """ limit = request.args.get("limit", 20) try: limit = int(limit) except ValueError: return Response(response=ERR_INVALID_QUERY, status=400) if limit < 0: return Response(response=ERR_INVALID_QUERY, status=400) elif limit > MAX_RESPONSE_SIZE: limit = MAX_RESPONSE_SIZE app = Application.instance() articles = app.article_store.fetch("nytimes_mostpopular", interest_name, limit) return jsonify(d=articles, num_articles=len(articles))
def main(): num_changes = 0 num_articles = 0 num_dupes_removed = 0 config = get_periodic_shell_config() app = Application.instance(config) most_popular = app.providers["nytimes_mostpopular"] redis = Redis(connection_pool=app._redis_pool) sorted_sets = redis.keys("sorted*") sorted_sets.sort() for sorted_name in sorted_sets: collection, category = LABEL_PATTERN.match(sorted_name).groups() label = ".".join([collection, category]) logger.info("Making changes for: {0}".format(label)) data = redis.zrange(sorted_name, 0, -1, withscores=True) old_url = None new_url = None for article_json, score in data: num_articles += 1 article = json.loads(article_json) old_url = article["url"] uri = furl(old_url) for key, val in most_popular.config.get("url_decoration", {}).iteritems(): uri.args[key] = val new_url = uri.url if new_url != old_url: num_changes += 1 article_json = article_json.decode("utf8") new_article_json = article_json.replace( old_url.replace("/", "\\/"), new_url.replace("/", "\\/")) # persist change in sorted set redis.zrem(sorted_name, article_json) redis.zadd(sorted_name, new_article_json, score) # persis change in set set_name = "set.{0}".format(label) redis.srem(set_name, old_url) redis.sadd(set_name, new_url) sorted_num = redis.zcard(sorted_name) set_num = redis.scard(set_name) count = int( redis.hget(".".join(["counts", collection]), category)) if (set_num != sorted_num): logger.warn( "set/sortedset mismatch: set:{0} sorted:{1}".format( set_num, sorted_num)) if set_num != count: num_dupes_removed += abs(count - set_num) redis.hset(collection, category, set_num) logger.info("num article changes: {0}/{1}".format(num_changes, num_articles)) logger.info("num dupes removed: {0}".format(num_dupes_removed))
from up.headliner import Application from up.headliner.utils import read_config_file from up.headliner import http config = read_config_file() app = Application.instance(config) http.load_routes(config.server["routes"]) print "starting app" webapp = http.webapp
def main(): config = get_http_config() Application.instance(config) http.load_routes(config.server["routes"]) http.webapp.run(**config.server["http"])