示例#1
0
    def returns_new_instance_if_config_provided(self):
        Application._instance = None

        config = MagicMock()
        inst1 = Application.instance(config)
        inst2 = Application.instance(config)

        self.assertNotEqual(inst1, inst2)
示例#2
0
def fetch_articles(num_urls=None):
    app = Application.instance()
    most_popular = app.providers["nytimes_mostpopular"]
    if not num_urls:
        num_urls = len(most_popular.api_urls)
    data = most_popular.fetch_many(num_urls)
    app.article_store.save_articles("nytimes_mostpopular", data, most_popular.config["category_max_articles"])
def main():
    num_changes = 0
    num_articles = 0
    num_dupes_removed = 0

    config = get_periodic_shell_config()
    app = Application.instance(config)
    most_popular = app.providers["nytimes_mostpopular"]

    redis = Redis(connection_pool=app._redis_pool)
    sorted_sets = redis.keys("sorted*")
    sorted_sets.sort()
    for sorted_name in sorted_sets:
        collection, category = LABEL_PATTERN.match(sorted_name).groups()
        label = ".".join([collection, category])

        logger.info("Making changes for: {0}".format(label))

        data = redis.zrange(sorted_name, 0, -1, withscores=True)

        old_url = None
        new_url = None
        for article_json, score in data:
            num_articles += 1
            article = json.loads(article_json)
            old_url = article["url"]

            uri = furl(old_url)
            for key, val in most_popular.config.get("url_decoration", {}).iteritems():
                uri.args[key] = val
            new_url = uri.url

            if new_url != old_url:
                num_changes += 1
                article_json = article_json.decode("utf8")
                new_article_json = article_json.replace(old_url.replace("/", "\\/"), new_url.replace("/", "\\/"))

                # persist change in sorted set
                redis.zrem(sorted_name, article_json)
                redis.zadd(sorted_name, new_article_json, score)

                # persis change in set
                set_name = "set.{0}".format(label)
                redis.srem(set_name, old_url)
                redis.sadd(set_name, new_url)

                sorted_num = redis.zcard(sorted_name)
                set_num = redis.scard(set_name)
                count = int(redis.hget(".".join(["counts", collection]), category))

                if (set_num != sorted_num):
                    logger.warn("set/sortedset mismatch: set:{0} sorted:{1}".format(set_num, sorted_num))

                if set_num != count:
                    num_dupes_removed += abs(count - set_num)
                    redis.hset(collection, category, set_num)

    logger.info("num article changes: {0}/{1}".format(num_changes, num_articles))
    logger.info("num dupes removed: {0}".format(num_dupes_removed))
示例#4
0
def personalize():
    """
    Return results based on user query
    Request Content-Type must be set to application/json
    The payload must be a json ascii string.

    e.g. {"Arts":0.9,"Autos":0.9}
    """
    limit = request.args.get("limit", 20)
    try:
        limit = int(limit)
    except ValueError:
        return Response(response=ERR_INVALID_QUERY, status=400)

    if limit < 0:
        return Response(response=ERR_INVALID_QUERY, status=400)
    elif limit > MAX_RESPONSE_SIZE:
        limit = MAX_RESPONSE_SIZE

    query = request.get_json()
    if query is None:
        return Response(response=ERR_NO_QUERY, status=400)
    elif type(query) != dict:
        return Response(response=ERR_INVALID_QUERY, status=400)

    numbers = {}
    weight_total = 0.0
    for category, weight in query.iteritems():
        if type(weight) not in (float, int) or weight < 0 or weight > 1:
            return Response(response=ERR_INVALID_QUERY, status=400)
        # weight_total should always be float
        weight_total += weight
    for category, weight in query.iteritems():
        numbers[category] = int(math.ceil(weight / weight_total * limit))

    # collect all recommendations for each category
    app = Application.instance()
    recommendations = []
    for category in numbers:
        recommendations.extend(
            app.article_store.fetch("nytimes_mostpopular",
                                    category,
                                    limit=numbers[category],
                                    withscores=True))

    # sort and deduplicate recommendations. store results in articles array
    recommendations.sort(key=lambda x: x["score"], reverse=True)
    articles = []
    url_set = set()
    for article in recommendations:
        if article["url"] not in url_set:
            del article["score"]
            articles.append(article)
            url_set.add(article["url"])
        # inforce articles limit
        if len(articles) >= limit:
            break

    return jsonify(d=articles, num_articles=len(articles))
示例#5
0
def index():
    """
    Return a listing of available interests
    """
    app = Application.instance()
    categories = app.article_store.get_category_counts("nytimes_mostpopular")
    for category, score in categories.iteritems():
        categories[category] = int(score)
    return jsonify(d=categories)
示例#6
0
def index():
    """
    Return a listing of available interests
    """
    app = Application.instance()
    categories = app.article_store.get_category_counts("nytimes_mostpopular")
    for category, score in categories.iteritems():
        categories[category] = int(score)
    return jsonify(d=categories)
def main():
    config = get_periodic_shell_config()
    app = Application.instance(config)
    most_popular = app.providers["nytimes_mostpopular"]

    num_urls = len(most_popular.api_urls)
    logger.info("Populating data store with data with {0} API calls".format(num_urls))

    logger.info("Fetching data")
    data = most_popular.fetch_many(num_urls)

    if config.server["purge"]:
        logger.info("Clearing the datastore")
        app.article_store.clear_all()

    logger.info("Saving data")
    app.article_store.save_articles("nytimes_mostpopular", data)
示例#8
0
def personalize():
    """
    Return results based on user query
    Request Content-Type must be set to application/json
    The payload must be a json ascii string.

    e.g. {"Arts":0.9,"Autos":0.9}
    """
    limit = request.args.get("limit", 20)
    try:
        limit = int(limit)
    except ValueError:
        return Response(response=ERR_INVALID_QUERY, status=400)

    if limit < 0:
        return Response(response=ERR_INVALID_QUERY, status=400)
    elif limit > MAX_RESPONSE_SIZE:
        limit = MAX_RESPONSE_SIZE

    query = request.get_json()
    if query is None:
        return Response(response=ERR_NO_QUERY, status=400)
    elif type(query) != dict:
        return Response(response=ERR_INVALID_QUERY, status=400)

    numbers = {}
    weight_total = 0
    for category, weight in query.iteritems():
        if type(weight) not in (float, int) or weight < 0 or weight > 1:
            return Response(response=ERR_INVALID_QUERY, status=400)
        weight_total += weight
    for category, weight in query.iteritems():
        numbers[category] = int(math.floor(weight/weight_total*limit))

    articles = []
    url_set = set()
    app = Application.instance()
    sorted_categories = sorted(numbers, reverse=True)
    for category in sorted_categories:
        batch = app.article_store.fetch("nytimes_mostpopular", category, numbers[category])
        for article in batch:
            if article["url"] not in url_set:
                articles.append(article)
                url_set.add(article["url"])

    return jsonify(d=articles,num_articles=len(articles))
示例#9
0
def main():
    config = get_periodic_shell_config()
    app = Application.instance(config)
    most_popular = app.providers["nytimes_mostpopular"]

    num_urls = len(most_popular.api_urls)
    logger.info(
        "Populating data store with data with {0} API calls".format(num_urls))

    logger.info("Fetching data")
    data = most_popular.fetch_many(num_urls)

    if config.server["purge"]:
        logger.info("Clearing the datastore")
        app.article_store.clear_all()

    logger.info("Saving data")
    app.article_store.save_articles("nytimes_mostpopular", data)
示例#10
0
def fetch_interest(interest_name):
    """
    Returns articles belonging to the provided interest name
    """
    limit = request.args.get("limit", 20)
    try:
        limit = int(limit)
    except ValueError:
        return Response(response=ERR_INVALID_QUERY, status=400)

    if limit < 0:
        return Response(response=ERR_INVALID_QUERY, status=400)
    elif limit > MAX_RESPONSE_SIZE:
        limit = MAX_RESPONSE_SIZE

    app = Application.instance()
    articles = app.article_store.fetch("nytimes_mostpopular", interest_name, limit)
    return jsonify(d=articles,num_articles=len(articles))
示例#11
0
def fetch_interest(interest_name):
    """
    Returns articles belonging to the provided interest name
    """
    limit = request.args.get("limit", 20)
    try:
        limit = int(limit)
    except ValueError:
        return Response(response=ERR_INVALID_QUERY, status=400)

    if limit < 0:
        return Response(response=ERR_INVALID_QUERY, status=400)
    elif limit > MAX_RESPONSE_SIZE:
        limit = MAX_RESPONSE_SIZE

    app = Application.instance()
    articles = app.article_store.fetch("nytimes_mostpopular", interest_name,
                                       limit)
    return jsonify(d=articles, num_articles=len(articles))
def main():
    num_changes = 0
    num_articles = 0
    num_dupes_removed = 0

    config = get_periodic_shell_config()
    app = Application.instance(config)
    most_popular = app.providers["nytimes_mostpopular"]

    redis = Redis(connection_pool=app._redis_pool)
    sorted_sets = redis.keys("sorted*")
    sorted_sets.sort()
    for sorted_name in sorted_sets:
        collection, category = LABEL_PATTERN.match(sorted_name).groups()
        label = ".".join([collection, category])

        logger.info("Making changes for: {0}".format(label))

        data = redis.zrange(sorted_name, 0, -1, withscores=True)

        old_url = None
        new_url = None
        for article_json, score in data:
            num_articles += 1
            article = json.loads(article_json)
            old_url = article["url"]

            uri = furl(old_url)
            for key, val in most_popular.config.get("url_decoration",
                                                    {}).iteritems():
                uri.args[key] = val
            new_url = uri.url

            if new_url != old_url:
                num_changes += 1
                article_json = article_json.decode("utf8")
                new_article_json = article_json.replace(
                    old_url.replace("/", "\\/"), new_url.replace("/", "\\/"))

                # persist change in sorted set
                redis.zrem(sorted_name, article_json)
                redis.zadd(sorted_name, new_article_json, score)

                # persis change in set
                set_name = "set.{0}".format(label)
                redis.srem(set_name, old_url)
                redis.sadd(set_name, new_url)

                sorted_num = redis.zcard(sorted_name)
                set_num = redis.scard(set_name)
                count = int(
                    redis.hget(".".join(["counts", collection]), category))

                if (set_num != sorted_num):
                    logger.warn(
                        "set/sortedset mismatch: set:{0} sorted:{1}".format(
                            set_num, sorted_num))

                if set_num != count:
                    num_dupes_removed += abs(count - set_num)
                    redis.hset(collection, category, set_num)

    logger.info("num article changes: {0}/{1}".format(num_changes,
                                                      num_articles))
    logger.info("num dupes removed: {0}".format(num_dupes_removed))
示例#13
0
from up.headliner import Application
from up.headliner.utils import read_config_file
from up.headliner import http

config = read_config_file()
app = Application.instance(config)
http.load_routes(config.server["routes"])
print "starting app"
webapp = http.webapp
示例#14
0
def main():
    config = get_http_config()
    Application.instance(config)
    http.load_routes(config.server["routes"])
    http.webapp.run(**config.server["http"])
示例#15
0
def main():
    config = get_http_config()
    Application.instance(config)
    http.load_routes(config.server["routes"])
    http.webapp.run(**config.server["http"])