Пример #1
0
def update_downloads(ids, **kw):
    client = get_monolith_client()
    today = datetime.date.today()
    count = 0

    for app in Webapp.objects.filter(id__in=ids).no_transforms():

        kwargs = {'app-id': app.id}

        # Get weekly downloads.
        #
        # If we query monolith with interval=week and the past 7 days
        # crosses a Monday, Monolith splits the counts into two. We want
        # the sum over the past week so we need to `sum` these.
        try:
            weekly = sum(
                c['count'] for c in
                client('app_installs', days_ago(7).date(), today, 'week',
                       **kwargs)
                if c.get('count'))
        except ValueError as e:
            task_log.info('Call to ES failed: {0}'.format(e))
            weekly = 0

        # Get total downloads.
        #
        # The monolith client lib doesn't handle this for us so we send a raw
        # ES query to Monolith.
        query = {'query': {'match_all': {}},
                 'facets': {
                     'installs': {
                         'statistical': {'field': 'app_installs'},
                         'facet_filter': {'term': kwargs}}},
                 'size': 0}
        try:
            resp = client.raw(query)
            total = resp.get('facets', {}).get('installs', {}).get('total', 0)
        except Exception as e:
            task_log.info('Call to ES failed: {0}'.format(e))
            total = 0

        # Update Webapp object, if needed.
        update = False
        signal = False
        if weekly != app.weekly_downloads:
            update = True
            signal = True
        if total != app.total_downloads:
            update = True

        if update:
            # Note: Calling `update` will trigger a reindex on the app if
            # `_signal` is True. Since we only index `weekly_downloads`, we
            # can skip reindexing if this hasn't changed.
            count += 1
            app.update(weekly_downloads=weekly, total_downloads=total,
                       _signal=signal)

    task_log.info('App downloads updated for %s out of %s apps.'
                  % (count, len(ids)))
Пример #2
0
    def get(self, request, metric):
        if metric not in STATS:
            raise http.Http404('No metric by that name.')

        if not waffle.switch_is_active('stats-api'):
            raise NotImplemented('Stats not enabled for this host.')

        # Perform form validation.
        form = GlobalStatsForm(request.GET)
        if not form.is_valid():
            raise ParseError(dict(form.errors.items()))

        data = form.cleaned_data
        client = get_monolith_client()

        try:
            metric_data = list(client(STATS[metric]['metric'],
                                      data.get('start'), data.get('end'),
                                      data.get('interval')))
        except ValueError:
            # This occurs if monolith doesn't have our metric and we get an
            # elasticsearch SearchPhaseExecutionException error.
            log.info('Monolith ValueError for metric {0}'.format(
                STATS[metric]['metric']))
            raise ParseError('Invalid metric at this time. Try again later.')

        return Response({'objects': metric_data})
Пример #3
0
 def get_client(self):
     try:
         client = get_monolith_client()
     except requests.ConnectionError as e:
         log.info('Monolith connection error: {0}'.format(e))
         raise ServiceUnavailable
     return client
Пример #4
0
def _monolith_site_query(period, start, end, field):
    fields = {
        'mmo_total_visitors': 'visits',
        'apps_count_installed': 'app_installs',
        'apps_review_count_new': 'review_count',
        'mmo_user_count_new': 'new_user_count',
        'apps_count_new': 'app_count',
        'mmo_user_count_total': 'total_user_count'
    }

    # Getting data from the monolith server.
    client = get_monolith_client()

    if period == 'date':
        period = 'day'

    def _get_data():
        for result in client(fields[field], start, end, interval=period):
            yield {
                'date': result['date'].strftime('%Y-%m-%d'),
                'data': {
                    field: result['count']
                }
            }

    try:
        return list(_get_data()), _CACHED_KEYS
    except ValueError, e:
        if len(e.args) > 0:
            logger.error(e.args[0])
        return [], _CACHED_KEYS
Пример #5
0
def _monolith_site_query(period, start, end, field):
    fields = {'mmo_total_visitors': 'visits',
              'apps_count_installed': 'app_installs',
              'apps_review_count_new': 'review_count',
              'mmo_user_count_new': 'user_count',
              'apps_count_new': 'app_count',
              'mmo_user_count_total': 'total_user_count'}

    # Getting data from the monolith server.
    client = get_monolith_client()

    if period == 'date':
        period = 'day'

    # The start date is not included in the range.
    # The end date is included.
    start = start + timedelta(days=1)

    def _get_data():
        for result in client(fields[field], start, end, interval=period,
                             strict_range=False):
            yield {'date': result['date'].strftime('%Y-%m-%d'),
                   'data': {field: result['count']}}

    try:
        return list(_get_data()), _CACHED_KEYS
    except ValueError, e:
        if len(e.args) > 0:
            logger.error(e.args[0])
        return [], _CACHED_KEYS
Пример #6
0
    def get(self, request, metric):
        if metric not in STATS:
            raise http.Http404("No metric by that name.")

        if not waffle.switch_is_active("stats-api"):
            raise NotImplemented("Stats not enabled for this host.")

        # Perform form validation.
        form = GlobalStatsForm(request.GET)
        if not form.is_valid():
            raise ParseError(dict(form.errors.items()))

        data = form.cleaned_data
        client = get_monolith_client()

        try:
            metric_data = list(
                client(STATS[metric]["metric"], data.get("start"), data.get("end"), data.get("interval"))
            )
        except ValueError:
            # This occurs if monolith doesn't have our metric and we get an
            # elasticsearch SearchPhaseExecutionException error.
            log.info("Monolith ValueError for metric {0}".format(STATS[metric]["metric"]))
            raise ParseError("Invalid metric at this time. Try again later.")

        return Response({"objects": metric_data})
Пример #7
0
 def get_client(self):
     try:
         client = get_monolith_client()
     except requests.ConnectionError as e:
         log.info('Monolith connection error: {0}'.format(e))
         raise ServiceUnavailable
     return client
Пример #8
0
def _get_monolith_data(stat, start, end, interval, dimensions):
    # If stat has a 'lines' attribute, it's a multi-line graph. Do a
    # request for each item in 'lines' and compose them in a single
    # response.
    try:
        client = get_monolith_client()
    except requests.ConnectionError as e:
        log.info('Monolith connection error: {0}'.format(e))
        raise ServiceUnavailable

    try:
        data = {}
        if 'lines' in stat:
            for line_name, line_dimension in stat['lines'].items():
                dimensions.update(line_dimension)
                data[line_name] = list(client(stat['metric'], start, end,
                                              interval, **dimensions))

        else:
            data['objects'] = list(client(stat['metric'], start, end, interval,
                                          **dimensions))

    except ValueError as e:
        # This occurs if monolith doesn't have our metric and we get an
        # elasticsearch SearchPhaseExecutionException error.
        log.info('Monolith ValueError for metric {0}: {1}'.format(
            stat['metric'], e))
        raise ParseError('Invalid metric at this time. Try again later.')

    return data
Пример #9
0
def _get_monolith_data(stat, start, end, interval, dimensions):
    # If stat has a 'lines' attribute, it's a multi-line graph. Do a
    # request for each item in 'lines' and compose them in a single
    # response.
    client = get_monolith_client()
    try:
        data = {}
        if 'lines' in stat:
            for line_name, line_dimension in stat['lines'].items():
                dimensions.update(line_dimension)
                data[line_name] = list(
                    client(stat['metric'], start, end, interval, **dimensions))

        else:
            data['objects'] = list(
                client(stat['metric'], start, end, interval, **dimensions))

    except ValueError as e:
        # This occurs if monolith doesn't have our metric and we get an
        # elasticsearch SearchPhaseExecutionException error.
        log.info('Monolith ValueError for metric {0}: {1}'.format(
            stat['metric'], e))
        raise ParseError('Invalid metric at this time. Try again later.')

    return data
Пример #10
0
    def get(self, request, pk):
        app = self.get_object()

        try:
            client = get_monolith_client()
        except requests.ConnectionError as e:
            log.info('Monolith connection error: {0}'.format(e))
            raise ServiceUnavailable

        # Note: We have to do this as separate requests so that if one fails
        # the rest can still be returned.
        data = {}
        for metric, stat in APP_STATS_TOTAL.items():
            data[metric] = {}
            query = {
                'query': {
                    'match_all': {}
                },
                'facets': {
                    metric: {
                        'statistical': {
                            'field': stat['metric']
                        },
                        'facet_filter': {
                            'term': {
                                'app-id': app.id
                            }
                        }
                    }
                },
                'size': 0
            }

            try:
                resp = client.raw(query)
            except ValueError as e:
                log.info('Received value error from monolith client: %s' % e)
                continue

            for metric, facet in resp.get('facets', {}).items():
                count = facet.get('count', 0)

                # We filter out facets with count=0 to avoid returning things
                # like `'max': u'-Infinity'`.
                if count > 0:
                    for field in ('max', 'mean', 'min', 'std_deviation',
                                  'sum_of_squares', 'total', 'variance'):
                        value = facet.get(field)
                        if value is not None:
                            data[metric][field] = value

        return Response(data)
Пример #11
0
    def get(self, request, pk):
        app = self.get_object()

        try:
            client = get_monolith_client()
        except requests.ConnectionError as e:
            log.info('Monolith connection error: {0}'.format(e))
            raise ServiceUnavailable

        # Note: We have to do this as separate requests so that if one fails
        # the rest can still be returned.
        data = {}
        for metric, stat in APP_STATS_TOTAL.items():
            data[metric] = {}
            query = {
                'query': {
                    'match_all': {}
                },
                'facets': {
                    metric: {
                        'statistical': {
                            'field': stat['metric']
                        },
                        'facet_filter': {
                            'term': {
                                'app-id': app.id
                            }
                        }
                    }
                },
                'size': 0
            }

            try:
                resp = client.raw(query)
            except ValueError as e:
                log.info('Received value error from monolith client: %s' % e)
                continue

            for metric, facet in resp.get('facets', {}).items():
                count = facet.get('count', 0)

                # We filter out facets with count=0 to avoid returning things
                # like `'max': u'-Infinity'`.
                if count > 0:
                    for field in ('max', 'mean', 'min', 'std_deviation',
                                  'sum_of_squares', 'total', 'variance'):
                        value = facet.get(field)
                        if value is not None:
                            data[metric][field] = value

        return Response(data)
Пример #12
0
def _get_trending(app_id, region=None):
    """
    Calculate trending.

    a = installs from 7 days ago to now
    b = installs from 28 days ago to 8 days ago, averaged per week

    trending = (a - b) / b if a > 100 and b > 1 else 0

    """
    client = get_monolith_client()

    kwargs = {"app-id": app_id}
    if region:
        kwargs["region"] = region.slug

    today = datetime.datetime.today()

    # If we query monolith with interval=week and the past 7 days
    # crosses a Monday, Monolith splits the counts into two. We want
    # the sum over the past week so we need to `sum` these.
    try:
        count_1 = sum(
            c["count"] for c in client("app_installs", days_ago(7), today, "week", **kwargs) if c.get("count")
        )
    except ValueError as e:
        task_log.info("Call to ES failed: {0}".format(e))
        count_1 = 0

    # If count_1 isn't more than 100, stop here to avoid extra Monolith calls.
    if not count_1 > 100:
        return 0.0

    # Get the average installs for the prior 3 weeks. Don't use the `len` of
    # the returned counts because of week boundaries.
    try:
        count_3 = (
            sum(
                c["count"]
                for c in client("app_installs", days_ago(28), days_ago(8), "week", **kwargs)
                if c.get("count")
            )
            / 3
        )
    except ValueError as e:
        task_log.info("Call to ES failed: {0}".format(e))
        count_3 = 0

    if count_3 > 1:
        return (count_1 - count_3) / count_3
    else:
        return 0.0
Пример #13
0
    def get(self, request, metric):
        if metric not in STATS:
            raise http.Http404('No metric by that name.')

        if not waffle.switch_is_active('stats-api'):
            raise NotImplemented('Stats not enabled for this host.')

        stat = STATS[metric]

        # Perform form validation.
        form = GlobalStatsForm(request.GET)
        if not form.is_valid():
            raise ParseError(dict(form.errors.items()))

        qs = form.cleaned_data
        client = get_monolith_client()

        dimensions = {}
        if 'dimensions' in stat:
            for key, default in stat['dimensions'].items():
                val = request.GET.get(key, default)
                if val is not None:
                    # Avoid passing kwargs to the monolith client when the
                    # dimension is None to avoid facet filters being applied.
                    dimensions[key] = request.GET.get(key, default)

        # If stat has a 'lines' attribute, it's a multi-line graph. Do a
        # request for each item in 'lines' and compose them in a single
        # response.
        try:
            data = {}
            if 'lines' in stat:
                for line_name, line_dimension in stat['lines'].items():
                    dimensions.update(line_dimension)
                    data[line_name] = list(
                        client(stat['metric'], qs.get('start'), qs.get('end'),
                               qs.get('interval'), **dimensions))

            else:
                data['objects'] = list(
                    client(stat['metric'], qs.get('start'), qs.get('end'),
                           qs.get('interval'), **dimensions))

        except ValueError as e:
            # This occurs if monolith doesn't have our metric and we get an
            # elasticsearch SearchPhaseExecutionException error.
            log.info('Monolith ValueError for metric {0}: {1}'.format(
                stat['metric'], e))
            raise ParseError('Invalid metric at this time. Try again later.')

        return Response(data)
Пример #14
0
    def get(self, request, metric):
        if metric not in STATS:
            raise http.Http404('No metric by that name.')

        if not waffle.switch_is_active('stats-api'):
            raise NotImplemented('Stats not enabled for this host.')

        stat = STATS[metric]

        # Perform form validation.
        form = GlobalStatsForm(request.GET)
        if not form.is_valid():
            raise ParseError(dict(form.errors.items()))

        qs = form.cleaned_data
        client = get_monolith_client()

        dimensions = {}
        if 'dimensions' in stat:
            for key, default in stat['dimensions'].items():
                val = request.GET.get(key, default)
                if val is not None:
                    # Avoid passing kwargs to the monolith client when the
                    # dimension is None to avoid facet filters being applied.
                    dimensions[key] = request.GET.get(key, default)

        # If stat has a 'lines' attribute, it's a multi-line graph. Do a
        # request for each item in 'lines' and compose them in a single
        # response.
        try:
            data = {}
            if 'lines' in stat:
                for line_name, line_dimension in stat['lines'].items():
                    dimensions.update(line_dimension)
                    data[line_name] = list(
                        client(stat['metric'], qs.get('start'), qs.get('end'),
                               qs.get('interval'), **dimensions))

            else:
                data['objects'] = list(
                    client(stat['metric'], qs.get('start'), qs.get('end'),
                           qs.get('interval'), **dimensions))

        except ValueError as e:
            # This occurs if monolith doesn't have our metric and we get an
            # elasticsearch SearchPhaseExecutionException error.
            log.info('Monolith ValueError for metric {0}: {1}'.format(
                stat['metric'], e))
            raise ParseError('Invalid metric at this time. Try again later.')

        return Response(data)
Пример #15
0
def _get_trending(app_id, region=None):
    """
    Calculate trending.

    a = installs from 7 days ago to now
    b = installs from 28 days ago to 8 days ago, averaged per week

    trending = (a - b) / b if a > 100 and b > 1 else 0

    """
    client = get_monolith_client()

    kwargs = {'app-id': app_id}
    if region:
        kwargs['region'] = region.slug

    today = datetime.datetime.today()
    days_ago = lambda d: today - datetime.timedelta(days=d)

    # If we query monolith with interval=week and the past 7 days
    # crosses a Monday, Monolith splits the counts into two. We want
    # the sum over the past week so we need to `sum` these.
    try:
        count_1 = sum(
            c['count'] for c in
            client('app_installs', days_ago(7), today, 'week', **kwargs)
            if c.get('count'))
    except ValueError as e:
        task_log.info('Call to ES failed: {0}'.format(e))
        count_1 = 0

    # If count_1 isn't more than 100, stop here to avoid extra Monolith calls.
    if not count_1 > 100:
        return 0.0

    # Get the average installs for the prior 3 weeks. Don't use the `len` of
    # the returned counts because of week boundaries.
    try:
        count_3 = sum(
            c['count'] for c in
            client('app_installs', days_ago(28), days_ago(8), 'week', **kwargs)
            if c.get('count')) / 3
    except ValueError as e:
        task_log.info('Call to ES failed: {0}'.format(e))
        count_3 = 0

    if count_3 > 1:
        return (count_1 - count_3) / count_3
    else:
        return 0.0
Пример #16
0
def _get_trending(app_id, region=None):
    """
    Calculate trending.

    a = installs from 7 days ago to now
    b = installs from 28 days ago to 8 days ago, averaged per week

    trending = (a - b) / b if a > 100 and b > 1 else 0

    """
    client = get_monolith_client()

    kwargs = {'app-id': app_id}
    if region:
        kwargs['region'] = region.slug

    today = datetime.datetime.today()
    days_ago = lambda d: today - datetime.timedelta(days=d)

    # If we query monolith with interval=week and the past 7 days
    # crosses a Monday, Monolith splits the counts into two. We want
    # the sum over the past week so we need to `sum` these.
    try:
        count_1 = sum(
            c['count'] for c in
            client('app_installs', days_ago(7), today, 'week', **kwargs)
            if c.get('count'))
    except ValueError as e:
        log.info('Call to ES failed: {0}'.format(e))
        count_1 = 0

    # Get the average installs for the prior 3 weeks. Don't use the `len` of
    # the returned counts because of week boundaries.
    try:
        count_3 = sum(
            c['count'] for c in
            client('app_installs', days_ago(28), days_ago(8), 'week', **kwargs)
            if c.get('count')) / 3
    except ValueError as e:
        log.info('Call to ES failed: {0}'.format(e))
        count_3 = 0

    if count_1 > 100 and count_3 > 1:
        return (count_1 - count_3) / count_3
    else:
        return 0.0
Пример #17
0
    def get_detail(self, request, **kwargs):
        metric = kwargs.get('metric')
        if metric not in STATS:
            raise ImmediateHttpResponse(response=http.HttpNotFound())

        # Trigger form validation which doesn't normally happen for GETs.
        bundle = self.build_bundle(data=request.GET, request=request)
        self.is_valid(bundle, request)

        start = bundle.data.get('start')
        end = bundle.data.get('end')
        interval = bundle.data.get('interval')

        client = get_monolith_client()

        data = list(client(STATS[metric]['metric'], start, end, interval))
        to_be_serialized = self.alter_list_data_to_serialize(
            request, {'objects': data})

        return self.create_response(request, to_be_serialized)
Пример #18
0
    def get_detail(self, request, **kwargs):
        metric = kwargs.get('metric')
        if metric not in STATS:
            raise ImmediateHttpResponse(response=http.HttpNotFound())

        # Trigger form validation which doesn't normally happen for GETs.
        bundle = self.build_bundle(data=request.GET, request=request)
        self.is_valid(bundle, request)

        start = bundle.data.get('start')
        end = bundle.data.get('end')
        interval = bundle.data.get('interval')

        client = get_monolith_client()

        data = list(client(STATS[metric]['metric'], start, end, interval))
        to_be_serialized = self.alter_list_data_to_serialize(
            request, {'objects': data})

        return self.create_response(request, to_be_serialized)
Пример #19
0
def _get_monolith_data(stat, start, end, interval, dimensions):
    # If stat has a 'lines' attribute, it's a multi-line graph. Do a
    # request for each item in 'lines' and compose them in a single
    # response.
    try:
        client = get_monolith_client()
    except requests.ConnectionError as e:
        log.info('Monolith connection error: {0}'.format(e))
        raise ServiceUnavailable

    def _coerce(data):
        for key, coerce in stat.get('coerce', {}).items():
            if data.get(key):
                data[key] = coerce(data[key])

        return data

    try:
        data = {}
        if 'lines' in stat:
            for line_name, line_dimension in stat['lines'].items():
                dimensions.update(line_dimension)
                data[line_name] = map(
                    _coerce,
                    client(stat['metric'], start, end, interval, **dimensions))

        else:
            data['objects'] = map(
                _coerce,
                client(stat['metric'], start, end, interval, **dimensions))

    except ValueError as e:
        # This occurs if monolith doesn't have our metric and we get an
        # elasticsearch SearchPhaseExecutionException error.
        log.info('Monolith ValueError for metric {0}: {1}'.format(
            stat['metric'], e))
        raise ParseError('Invalid metric at this time. Try again later.')

    return data
Пример #20
0
def _get_trending(app_id, region=None):
    """
    Calculate trending.

    a = installs from 7 days ago to now
    b = installs from 28 days ago to 8 days ago, averaged per week

    trending = (a - b) / b if a > 100 and b > 1 else 0

    """
    client = get_monolith_client()

    kwargs = {'app-id': app_id}
    if region:
        kwargs['region'] = region.slug

    today = datetime.datetime.today()
    days_ago = lambda d: today - datetime.timedelta(days=d)

    # If we query monolith with interval=week and the past 7 days
    # crosses a Monday, Monolith splits the counts into two. We want
    # the sum over the past week so we need to `sum` these.
    count_1 = sum(
        c['count']
        for c in client('app_installs', days_ago(7), today, 'week', **kwargs))

    # Get the average installs for the prior 3 weeks. Don't use the `len` of
    # the returned counts because of week boundaries.
    counts_3 = list(
        client('app_installs', days_ago(28), days_ago(8), 'week', **kwargs))
    count_3 = sum(c['count'] for c in counts_3) / 3

    if count_1 > 100 and count_3 > 1:
        return (count_1 - count_3) / count_3
    else:
        return 0.0
Пример #21
0
def get_series_line(model, group, primary_field=None, extra_fields=None,
                    extra_values=None, **filters):
    """
    Get a generator of dicts for the stats model given by the filters, made
    to fit into Highchart's datetime line graph.

    primary_field takes a field name that can be referenced by the key 'count'
    extra_fields takes a list of fields that can be found in the index
    on top of date and count and can be seen in the output
    extra_values is a list of constant values added to each line
    """
    if not extra_fields:
        extra_fields = []

    extra_values = extra_values or {}

    if waffle.switch_is_active('monolith-stats'):
        keys = {Installed: 'app_installs',
                UpdateCount: 'updatecount_XXX',
                Contribution: 'contribution_XXX',
                InappPayment: 'inapppayment_XXX'}

        # Getting data from the monolith server.
        client = get_monolith_client()

        field = keys[model]
        start, end = filters['date__range']

        if group == 'date':
            group = 'day'

        try:
            for result in client(field, start, end, interval=group,
                                 addon_id=filters['addon']):
                res = {'count': result['count']}
                for extra_field in extra_fields:
                    res[extra_field] = result[extra_field]
                date_ = date(*result['date'].timetuple()[:3])
                res['end'] = res['date'] = date_
                res.update(extra_values)
                yield res
        except ValueError as e:
            if len(e.args) > 0:
                logger.error(e.args[0])

    else:
        # Pull data out of ES
        data = list((model.search().order_by('-date').filter(**filters)
            .values_dict('date', 'count', primary_field, *extra_fields))[:365])

        # Pad empty data with dummy dicts.
        days = [datum['date'].date() for datum in data]
        fields = []
        if primary_field:
            fields.append(primary_field)
        if extra_fields:
            fields += extra_fields
        data += pad_missing_stats(days, group, filters.get('date__range'),
                                  fields)

        # Sort in descending order.
        data = sorted(data, key=lambda document: document['date'],
                      reverse=True)

        # Generate dictionary with options from ES document
        for val in data:
            # Convert the datetimes to a date.
            date_ = date(*val['date'].timetuple()[:3])
            if primary_field and primary_field != 'count':
                rv = dict(count=val[primary_field], date=date_, end=date_)
            else:
                rv = dict(count=val['count'], date=date_, end=date_)
            for extra_field in extra_fields:
                rv[extra_field] = val[extra_field]
            rv.update(extra_values)
            yield rv
Пример #22
0
def update_downloads(ids, **kw):
    client = get_monolith_client()
    count = 0

    for app in Webapp.objects.filter(id__in=ids).no_transforms():

        appid = {'app-id': app.id}

        # Get weekly downloads.
        query = {
            'query': {
                'match_all': {}
            },
            'facets': {
                'installs': {
                    'date_histogram': {
                        'value_field': 'app_installs',
                        'interval': 'week',
                        'key_field': 'date',
                    },
                    'facet_filter': {
                        'and':
                        [{
                            'term': appid
                        },
                         {
                             'range': {
                                 'date': {
                                     'gte':
                                     days_ago(8).date().strftime('%Y-%m-%d'),
                                     'lte':
                                     days_ago(1).date().strftime('%Y-%m-%d'),
                                 }
                             }
                         }]
                    }
                }
            },
            'size': 0
        }

        try:
            resp = client.raw(query)
            # If we query monolith with interval=week and the past 7 days
            # crosses a Monday, Monolith splits the counts into two. We want
            # the sum over the past week so we need to `sum` these.
            weekly = sum(c['total'] for c in resp.get('facets', {}).get(
                'installs', {}).get('entries') if c.get('total'))
        except Exception as e:
            task_log.info('Call to ES failed: {0}'.format(e))
            weekly = 0

        # Get total downloads.
        query = {
            'query': {
                'match_all': {}
            },
            'facets': {
                'installs': {
                    'statistical': {
                        'field': 'app_installs'
                    },
                    'facet_filter': {
                        'term': appid
                    }
                }
            },
            'size': 0
        }
        try:
            resp = client.raw(query)
            total = resp.get('facets', {}).get('installs', {}).get('total', 0)
        except Exception as e:
            task_log.info('Call to ES failed: {0}'.format(e))
            total = 0

        # Update Webapp object, if needed.
        update = False
        signal = False
        if weekly != app.weekly_downloads:
            update = True
            signal = True
        if total != app.total_downloads:
            update = True

        if update:
            # Note: Calling `update` will trigger a reindex on the app if
            # `_signal` is True. Since we only index `weekly_downloads`, we
            # can skip reindexing if this hasn't changed.
            count += 1
            app.update(weekly_downloads=weekly,
                       total_downloads=total,
                       _signal=signal)

    task_log.info('App downloads updated for %s out of %s apps.' %
                  (count, len(ids)))
Пример #23
0
def update_downloads(ids, **kw):
    client = get_monolith_client()
    count = 0

    for app in Webapp.objects.filter(id__in=ids).no_transforms():

        appid = {'app-id': app.id}

        # Get weekly downloads.
        query = {
            'query': {'match_all': {}},
            'facets': {
                'installs': {
                    'date_histogram': {
                        'value_field': 'app_installs',
                        'interval': 'week',
                        'key_field': 'date',
                    },
                    'facet_filter': {
                        'and': [
                            {'term': appid},
                            {'range': {'date': {
                                'gte': days_ago(8).date().strftime('%Y-%m-%d'),
                                'lte': days_ago(1).date().strftime('%Y-%m-%d'),
                            }}}
                        ]
                    }
                }
            },
            'size': 0}

        try:
            resp = client.raw(query)
            # If we query monolith with interval=week and the past 7 days
            # crosses a Monday, Monolith splits the counts into two. We want
            # the sum over the past week so we need to `sum` these.
            weekly = sum(
                c['total'] for c in
                resp.get('facets', {}).get('installs', {}).get('entries')
                if c.get('total'))
        except Exception as e:
            task_log.info('Call to ES failed: {0}'.format(e))
            weekly = 0

        # Get total downloads.
        query = {'query': {'match_all': {}},
                 'facets': {
                     'installs': {
                         'statistical': {'field': 'app_installs'},
                         'facet_filter': {'term': appid}}},
                 'size': 0}
        try:
            resp = client.raw(query)
            total = resp.get('facets', {}).get('installs', {}).get('total', 0)
        except Exception as e:
            task_log.info('Call to ES failed: {0}'.format(e))
            total = 0

        # Update Webapp object, if needed.
        update = False
        signal = False
        if weekly != app.weekly_downloads:
            update = True
            signal = True
        if total != app.total_downloads:
            update = True

        if update:
            # Note: Calling `update` will trigger a reindex on the app if
            # `_signal` is True. Since we only index `weekly_downloads`, we
            # can skip reindexing if this hasn't changed.
            count += 1
            app.update(weekly_downloads=weekly, total_downloads=total,
                       _signal=signal)

    task_log.info('App downloads updated for %s out of %s apps.'
                  % (count, len(ids)))
Пример #24
0
def _get_installs(app_id):
    """
    Calculate popularity of app for all regions and per region.

    Returns value in the format of::

        {'all': <global installs>,
         <region_slug>: <regional installs>,
         ...}

    """
    # How many days back do we include when calculating popularity.
    POPULARITY_PERIOD = 90

    client = get_monolith_client()

    popular = {
        'filter': {
            'range': {
                'date': {
                    'gte': days_ago(POPULARITY_PERIOD).date().isoformat(),
                    'lte': days_ago(1).date().isoformat()
                }
            }
        },
        'aggs': {
            'total_installs': {
                'sum': {
                    'field': 'app_installs'
                }
            }
        }
    }

    query = {
        'query': {
            'filtered': {
                'query': {'match_all': {}},
                'filter': {'term': {'app-id': app_id}}
            }
        },
        'aggregations': {
            'popular': popular,
            'region': {
                'terms': {
                    'field': 'region',
                    # Add size so we get all regions, not just the top 10.
                    'size': len(mkt.regions.ALL_REGIONS)
                },
                'aggregations': {
                    'popular': popular
                }
            }
        },
        'size': 0
    }

    try:
        res = client.raw(query)
    except ValueError as e:
        task_log.error('Error response from Monolith: {0}'.format(e))
        return {}

    if 'aggregations' not in res:
        task_log.error('No installs for app {}'.format(app_id))
        return {}

    results = {
        'all': res['aggregations']['popular']['total_installs']['value']
    }

    if 'region' in res['aggregations']:
        for regional_res in res['aggregations']['region']['buckets']:
            region_slug = regional_res['key']
            popular = regional_res['popular']['total_installs']['value']
            results[region_slug] = popular

    return results
Пример #25
0
def _get_trending(app_id):
    """
    Calculate trending for app for all regions and per region.

    a = installs from 8 days ago to 1 day ago
    b = installs from 29 days ago to 9 days ago, averaged per week
    trending = (a - b) / b if a > 100 and b > 1 else 0

    Returns value in the format of::

        {'all': <global trending score>,
         <region_slug>: <regional trending score>,
         ...}

    """
    # How many app installs are required in the prior week to be considered
    # "trending". Adjust this as total Marketplace app installs increases.
    #
    # Note: AMO uses 1000.0 for add-ons.
    PRIOR_WEEK_INSTALL_THRESHOLD = 100.0

    client = get_monolith_client()

    week1 = {
        'filter': {
            'range': {
                'date': {
                    'gte': days_ago(8).date().isoformat(),
                    'lte': days_ago(1).date().isoformat()
                }
            }
        },
        'aggs': {
            'total_installs': {
                'sum': {
                    'field': 'app_installs'
                }
            }
        }
    }
    week3 = {
        'filter': {
            'range': {
                'date': {
                    'gte': days_ago(29).date().isoformat(),
                    'lte': days_ago(9).date().isoformat()
                }
            }
        },
        'aggs': {
            'total_installs': {
                'sum': {
                    'field': 'app_installs'
                }
            }
        }
    }

    query = {
        'query': {
            'filtered': {
                'query': {'match_all': {}},
                'filter': {'term': {'app-id': app_id}}
            }
        },
        'aggregations': {
            'week1': week1,
            'week3': week3,
            'region': {
                'terms': {
                    'field': 'region',
                    # Add size so we get all regions, not just the top 10.
                    'size': len(mkt.regions.ALL_REGIONS)
                },
                'aggregations': {
                    'week1': week1,
                    'week3': week3
                }
            }
        },
        'size': 0
    }

    try:
        res = client.raw(query)
    except ValueError as e:
        task_log.error('Error response from Monolith: {0}'.format(e))
        return {}

    if 'aggregations' not in res:
        task_log.error('No installs for app {}'.format(app_id))
        return {}

    def _score(week1, week3):
        # If last week app installs are < 100, this app isn't trending.
        if week1 < PRIOR_WEEK_INSTALL_THRESHOLD:
            return 0.0

        score = 0.0
        if week3 > 1.0:
            score = (week1 - week3) / week3
        if score < 0.0:
            score = 0.0
        return score

    # Global trending score.
    week1 = res['aggregations']['week1']['total_installs']['value']
    week3 = res['aggregations']['week3']['total_installs']['value'] / 3.0

    if week1 < PRIOR_WEEK_INSTALL_THRESHOLD:
        # If global installs over the last week aren't over 100, we
        # short-circuit and return a zero-like value as this is not a trending
        # app by definition. Since global installs aren't above 100, per-region
        # installs won't be either.
        return {}

    results = {
        'all': _score(week1, week3)
    }

    if 'region' in res['aggregations']:
        for regional_res in res['aggregations']['region']['buckets']:
            region_slug = regional_res['key']
            week1 = regional_res['week1']['total_installs']['value']
            week3 = regional_res['week3']['total_installs']['value'] / 3.0
            results[region_slug] = _score(week1, week3)

    return results
Пример #26
0
def update_downloads(ids, **kw):
    client = get_monolith_client()
    count = 0

    for app in Webapp.objects.filter(id__in=ids).no_transforms():

        appid = {"app-id": app.id}

        # Get weekly downloads.
        query = {
            "query": {"match_all": {}},
            "facets": {
                "installs": {
                    "date_histogram": {"value_field": "app_installs", "interval": "week", "key_field": "date"},
                    "facet_filter": {
                        "and": [
                            {"term": appid},
                            {
                                "range": {
                                    "date": {
                                        "gte": days_ago(8).date().strftime("%Y-%m-%d"),
                                        "lte": days_ago(1).date().strftime("%Y-%m-%d"),
                                    }
                                }
                            },
                        ]
                    },
                }
            },
            "size": 0,
        }

        try:
            resp = client.raw(query)
            # If we query monolith with interval=week and the past 7 days
            # crosses a Monday, Monolith splits the counts into two. We want
            # the sum over the past week so we need to `sum` these.
            weekly = sum(
                c["total"] for c in resp.get("facets", {}).get("installs", {}).get("entries") if c.get("total")
            )
        except Exception as e:
            task_log.info("Call to ES failed: {0}".format(e))
            weekly = 0

        # Get total downloads.
        query = {
            "query": {"match_all": {}},
            "facets": {"installs": {"statistical": {"field": "app_installs"}, "facet_filter": {"term": appid}}},
            "size": 0,
        }
        try:
            resp = client.raw(query)
            total = resp.get("facets", {}).get("installs", {}).get("total", 0)
        except Exception as e:
            task_log.info("Call to ES failed: {0}".format(e))
            total = 0

        # Update Webapp object, if needed.
        update = False
        signal = False
        if weekly != app.weekly_downloads:
            update = True
            signal = True
        if total != app.total_downloads:
            update = True

        if update:
            # Note: Calling `update` will trigger a reindex on the app if
            # `_signal` is True. Since we only index `weekly_downloads`, we
            # can skip reindexing if this hasn't changed.
            count += 1
            app.update(weekly_downloads=weekly, total_downloads=total, _signal=signal)

    task_log.info("App downloads updated for %s out of %s apps." % (count, len(ids)))
Пример #27
0
def update_downloads(ids, **kw):
    client = get_monolith_client()
    today = datetime.date.today()
    count = 0

    for app in Webapp.objects.filter(id__in=ids).no_transforms():

        kwargs = {'app-id': app.id}

        # Get weekly downloads.
        #
        # If we query monolith with interval=week and the past 7 days
        # crosses a Monday, Monolith splits the counts into two. We want
        # the sum over the past week so we need to `sum` these.
        try:
            weekly = sum(
                c['count']
                for c in client('app_installs',
                                days_ago(7).date(), today, 'week', **kwargs)
                if c.get('count'))
        except ValueError as e:
            task_log.info('Call to ES failed: {0}'.format(e))
            weekly = 0

        # Get total downloads.
        #
        # The monolith client lib doesn't handle this for us so we send a raw
        # ES query to Monolith.
        query = {
            'query': {
                'match_all': {}
            },
            'facets': {
                'installs': {
                    'statistical': {
                        'field': 'app_installs'
                    },
                    'facet_filter': {
                        'term': kwargs
                    }
                }
            },
            'size': 0
        }
        try:
            resp = client.raw(query)
            total = resp.get('facets', {}).get('installs', {}).get('total', 0)
        except Exception as e:
            task_log.info('Call to ES failed: {0}'.format(e))
            total = 0

        # Update Webapp object, if needed.
        update = False
        signal = False
        if weekly != app.weekly_downloads:
            update = True
            signal = True
        if total != app.total_downloads:
            update = True

        if update:
            # Note: Calling `update` will trigger a reindex on the app if
            # `_signal` is True. Since we only index `weekly_downloads`, we
            # can skip reindexing if this hasn't changed.
            count += 1
            app.update(weekly_downloads=weekly,
                       total_downloads=total,
                       _signal=signal)

    task_log.info('App downloads updated for %s out of %s apps.' %
                  (count, len(ids)))
Пример #28
0
def get_series_line(model,
                    group,
                    primary_field=None,
                    extra_fields=None,
                    extra_values=None,
                    **filters):
    """
    Get a generator of dicts for the stats model given by the filters, made
    to fit into Highchart's datetime line graph.

    primary_field takes a field name that can be referenced by the key 'count'
    extra_fields takes a list of fields that can be found in the index
    on top of date and count and can be seen in the output
    extra_values is a list of constant values added to each line
    """
    if not extra_fields:
        extra_fields = []

    extra_values = extra_values or {}

    if waffle.switch_is_active('monolith-stats'):
        keys = {
            Installed: 'apps_installs',
            UpdateCount: 'updatecount_XXX',
            Contribution: 'contribution_XXX',
            InappPayment: 'inapppayment_XXX'
        }

        # Getting data from the monolith server.
        client = get_monolith_client()

        field = keys[model]
        start, end = filters['date__range']

        if group == 'date':
            group = 'day'

        for result in client(field,
                             start,
                             end,
                             interval=group,
                             addon_id=filters['addon']):
            res = {'count': result['count']}
            for extra_field in extra_fields:
                res[extra_field] = result[extra_field]
            date_ = date(*result['date'].timetuple()[:3])
            res['end'] = res['date'] = date_
            res.update(extra_values)
            yield res
    else:
        # Pull data out of ES
        data = list(
            (model.search().order_by('-date').filter(**filters).values_dict(
                'date', 'count', primary_field, *extra_fields))[:365])

        # Pad empty data with dummy dicts.
        days = [datum['date'].date() for datum in data]
        fields = []
        if primary_field:
            fields.append(primary_field)
        if extra_fields:
            fields += extra_fields
        data += pad_missing_stats(days, group, filters.get('date__range'),
                                  fields)

        # Sort in descending order.
        data = sorted(data,
                      key=lambda document: document['date'],
                      reverse=True)

        # Generate dictionary with options from ES document
        for val in data:
            # Convert the datetimes to a date.
            date_ = date(*val['date'].timetuple()[:3])
            if primary_field and primary_field != 'count':
                rv = dict(count=val[primary_field], date=date_, end=date_)
            else:
                rv = dict(count=val['count'], date=date_, end=date_)
            for extra_field in extra_fields:
                rv[extra_field] = val[extra_field]
            rv.update(extra_values)
            yield rv
Пример #29
0
def _get_trending(app_id):
    """
    Calculate trending for app for all regions and per region.

    a = installs from 8 days ago to 1 day ago
    b = installs from 29 days ago to 9 days ago, averaged per week
    trending = (a - b) / b if a > 100 and b > 1 else 0

    Returns value in the format of::

        {'all': <global trending score>,
         <region_slug>: <regional trending score>,
         ...}

    """
    # How many app installs are required in the prior week to be considered
    # "trending". Adjust this as total Marketplace app installs increases.
    #
    # Note: AMO uses 1000.0 for add-ons.
    PRIOR_WEEK_INSTALL_THRESHOLD = 100.0

    client = get_monolith_client()

    week1 = {
        'filter': {
            'range': {
                'date': {
                    'gte': days_ago(8).date().isoformat(),
                    'lte': days_ago(1).date().isoformat()
                }
            }
        },
        'aggs': {
            'total_installs': {
                'sum': {
                    'field': 'app_installs'
                }
            }
        }
    }
    week3 = {
        'filter': {
            'range': {
                'date': {
                    'gte': days_ago(29).date().isoformat(),
                    'lte': days_ago(9).date().isoformat()
                }
            }
        },
        'aggs': {
            'total_installs': {
                'sum': {
                    'field': 'app_installs'
                }
            }
        }
    }

    query = {
        'query': {
            'filtered': {
                'query': {
                    'match_all': {}
                },
                'filter': {
                    'term': {
                        'app-id': app_id
                    }
                }
            }
        },
        'aggregations': {
            'week1': week1,
            'week3': week3,
            'region': {
                'terms': {
                    'field': 'region',
                    # Add size so we get all regions, not just the top 10.
                    'size': len(mkt.regions.ALL_REGIONS)
                },
                'aggregations': {
                    'week1': week1,
                    'week3': week3
                }
            }
        },
        'size': 0
    }

    try:
        res = client.raw(query)
    except ValueError as e:
        task_log.error('Error response from Monolith: {0}'.format(e))
        return {}

    if 'aggregations' not in res:
        task_log.error('No installs for app {}'.format(app_id))
        return {}

    def _score(week1, week3):
        # If last week app installs are < 100, this app isn't trending.
        if week1 < PRIOR_WEEK_INSTALL_THRESHOLD:
            return 0.0

        score = 0.0
        if week3 > 1.0:
            score = (week1 - week3) / week3
        if score < 0.0:
            score = 0.0
        return score

    # Global trending score.
    week1 = res['aggregations']['week1']['total_installs']['value']
    week3 = res['aggregations']['week3']['total_installs']['value'] / 3.0

    if week1 < PRIOR_WEEK_INSTALL_THRESHOLD:
        # If global installs over the last week aren't over 100, we
        # short-circuit and return a zero-like value as this is not a trending
        # app by definition. Since global installs aren't above 100, per-region
        # installs won't be either.
        return {}

    results = {'all': _score(week1, week3)}

    if 'region' in res['aggregations']:
        for regional_res in res['aggregations']['region']['buckets']:
            region_slug = regional_res['key']
            week1 = regional_res['week1']['total_installs']['value']
            week3 = regional_res['week3']['total_installs']['value'] / 3.0
            results[region_slug] = _score(week1, week3)

    return results
Пример #30
0
def _get_installs(app_id):
    """
    Calculate popularity of app for all regions and per region.

    Returns value in the format of::

        {'all': <global installs>,
         <region_slug>: <regional installs>,
         ...}

    """
    # How many days back do we include when calculating popularity.
    POPULARITY_PERIOD = 90

    client = get_monolith_client()

    popular = {
        'filter': {
            'range': {
                'date': {
                    'gte': days_ago(POPULARITY_PERIOD).date().isoformat(),
                    'lte': days_ago(1).date().isoformat()
                }
            }
        },
        'aggs': {
            'total_installs': {
                'sum': {
                    'field': 'app_installs'
                }
            }
        }
    }

    query = {
        'query': {
            'filtered': {
                'query': {
                    'match_all': {}
                },
                'filter': {
                    'term': {
                        'app-id': app_id
                    }
                }
            }
        },
        'aggregations': {
            'popular': popular,
            'region': {
                'terms': {
                    'field': 'region',
                    # Add size so we get all regions, not just the top 10.
                    'size': len(mkt.regions.ALL_REGIONS)
                },
                'aggregations': {
                    'popular': popular
                }
            }
        },
        'size': 0
    }

    try:
        res = client.raw(query)
    except ValueError as e:
        task_log.error('Error response from Monolith: {0}'.format(e))
        return {}

    if 'aggregations' not in res:
        task_log.error('No installs for app {}'.format(app_id))
        return {}

    results = {
        'all': res['aggregations']['popular']['total_installs']['value']
    }

    if 'region' in res['aggregations']:
        for regional_res in res['aggregations']['region']['buckets']:
            region_slug = regional_res['key']
            popular = regional_res['popular']['total_installs']['value']
            results[region_slug] = popular

    return results