示例#1
0
    def set_present_date(self, present_date):
        if present_date and str(present_date).lower() != 'none':
            if present_date.tzinfo is None:
                present_date = pytz.utc.localize(present_date)
            self._present_date = present_date
        else:
            self._present_date = dt.datetime.now(pytz.utc)

        self.short_term_cutoff_date = self._present_date - dt.timedelta(self.short_term_window)
        self.short_term_cutoff_date = du.get_day(self.short_term_cutoff_date)

        self.long_term_cutoff_date = self._present_date - dt.timedelta(self.long_term_window)
        self.long_term_cutoff_date = du.get_day(self.long_term_cutoff_date)

        self.popularity_cutoff_date = self._present_date - dt.timedelta(self.popularity_window)
        self.popularity_cutoff_date = du.get_day(self.popularity_cutoff_date)
示例#2
0
    def test_present_date(self):
        """ Tests the update of long and short term cutoff dates based on an updated value of the present date.
        """
        new_date = dt.datetime(1988, 11, 6, 10, 0)
        self.context.set_present_date(new_date)
        localized_day = du.get_day(pytz.utc.localize(new_date))

        new_short_term_cutoff_date = localized_day - dt.timedelta(days=self.context.short_term_window)
        new_long_term_cutoff_date = localized_day - dt.timedelta(days=self.context.long_term_window)
        nose.tools.eq_(self.context.short_term_cutoff_date, new_short_term_cutoff_date)
        nose.tools.eq_(self.context.long_term_cutoff_date, new_long_term_cutoff_date)
def flush_summaries(database, popularity_summaries_by_product):
    log.info("Saving %d summaries..." % len(popularity_summaries_by_product))

    bulk_op = database.popularities_summary.initialize_unordered_bulk_op()

    where = {"p_id": {"$in": list(popularity_summaries_by_product.keys())}}
    fields = {"p_id": True, "count": True, "first": True, "latest": True, "_id": False}

    cursor = database.popularities_summary.find(where, fields)
    current_summaries_by_product = {rec["p_id"]: rec for rec in cursor}

    for product, popularity_summary in popularity_summaries_by_product.items():
        current_summary = current_summaries_by_product.get(product, {
            "count": 0,
            "first": pytz.utc.localize(dt.datetime(3000, 1, 1)),
            "latest": pytz.utc.localize(dt.datetime(1, 1, 1))})

        current_count = current_summary["count"]
        current_first = current_summary["first"]
        current_latest = current_summary["latest"]

        first = min(current_first, popularity_summary["first"])
        latest = max(current_latest, popularity_summary["latest"])
        new_count = current_count + popularity_summary["count"]

        if first != current_first or latest != current_latest or new_count != current_count:
            first_day = du.get_day(first)
            latest_day = du.get_day(latest)
            day_span = (latest_day - first_day).days + 1
            new_popularity = new_count / day_span

            spec = {"p_id": product}
            update_clause = {"$set": {"first": first_day,
                                      "latest": latest_day,
                                      "count": new_count,
                                      "popularity": new_popularity}}
            bulk_op.find(spec).upsert().update(update_clause)

    bulk_op.execute()

    popularity_summaries_by_product.clear()
def fetch_activities_cursor(database, latest_processed_activity_date, max_date, activity_types):
    where = {"activity": {"$in": activity_types}}
    if latest_processed_activity_date is not None:
        where["day"] = {"$gte": du.get_day(latest_processed_activity_date)}
    if max_date is not None:
        date_clause = where.get("created_at", {})
        date_clause.update({"$lt": max_date})
        where["created_at"] = date_clause
    fields = {"_id": False,
              "activity": True,
              "created_at": True,
              "external_user_id": True,
              "external_product_id": True}
    cursor = database.activities_summary.find(
        where, fields,
        timeout=False)
    return cursor
示例#5
0
def flush_summaries(database, latest_activity_by_user_and_product):
    log.info("Saving %d summaries..." % len(latest_activity_by_user_and_product))

    bulk_op = database.activities_summary.initialize_unordered_bulk_op()

    for user_and_product, activity in latest_activity_by_user_and_product.items():
        # upserts the (u,p) pair
        spec = {"external_user_id": user_and_product[0],
                "external_product_id": user_and_product[1]}
        operator = "$set"
        activity_date = activity["created_at"]
        day = du.get_day(activity_date)
        update_clause = {operator: {"activity": activity["activity"],
                                    "day": day,
                                    "created_at": activity_date}}
        bulk_op.find(spec).upsert().update(update_clause)

    bulk_op.execute()

    latest_activity_by_user_and_product.clear()