def index_finance_total_by_currency(addons, **kw): """ Bug 757581 Total finance stats, currency breakdown. """ index = kw.get('index', Contribution._get_index()) es = amo.search.get_es() log.info('Indexing total financial stats by currency for %s apps.' % len(addons)) for addon in addons: # Get all contributions for given add-on. qs = Contribution.objects.filter(addon=addon, uuid=None) if not qs.exists(): continue # Get list of distinct currencies. currencies = set(qs.values_list('currency', flat=True)) for currency in currencies: try: key = ord_word('cur' + str(addon) + currency.lower()) data = search.get_finance_total( qs, addon, 'currency', currency=currency) for index in get_indices(index): if not already_indexed(Contribution, data, index): Contribution.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_currency.retry(args=[addons], exc=exc, **kw) raise
def index_finance_total_by_src(addons, **kw): """ Bug 758059 Total finance stats, source breakdown. """ index = kw.get('index', Contribution._get_index()) es = elasticutils.get_es() log.info('Indexing total financial stats by source for %s apps.' % len(addons)) for addon in addons: # Get all contributions for given add-on. qs = Contribution.objects.filter(addon=addon, uuid=None) if not qs.exists(): continue # Get list of distinct sources. sources = set(qs.values_list('source', flat=True)) for source in sources: try: key = ord_word('src' + str(addon) + str(source)) data = search.get_finance_total(qs, addon, 'source', source=source) for index in get_indices(index): if not already_indexed(Contribution, data, index): Contribution.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_src.retry(args=[addons], exc=exc, **kw) raise
def index_finance_total_inapp(addons, **kw): """ Bug 758071 Aggregates financial stats from all of the contributions for in-apps. """ index = kw.get('index', InappPayment._get_index()) es = amo.search.get_es() log.info('Indexing total financial in-app stats for %s apps.' % len(addons)) for addon in addons: # Get all in-app names for given addon. inapps = set(InappPayment.objects.filter(config__addon=addon). values_list('name', flat=True)) for inapp_name in inapps: # Get all in-app payments for given in-app. qs = InappPayment.objects.filter(name=inapp_name, contribution__uuid=None) if not qs.exists(): continue try: key = ord_word('totinapp' + str(addon) + inapp_name) data = search.get_finance_total_inapp(qs, addon, inapp_name) for index in get_indices(index): if not already_indexed(InappPayment, data, index): InappPayment.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_inapp.retry(args=[addons], exc=exc, **kw) raise
def index_collection_counts(ids, **kw): index = kw.pop('index', None) indices = get_indices(index) es = amo.search.get_es() qs = CollectionCount.objects.filter(collection__in=ids) if qs: log.info('Indexing %s addon collection counts: %s' % (qs.count(), qs[0].date)) try: for collection_count in qs: collection = collection_count.collection_id key = '%s-%s' % (collection, collection_count.date) filters = dict(collection=collection, date=collection_count.date) data = search.extract_addon_collection( collection_count, AddonCollectionCount.objects.filter(**filters), CollectionStats.objects.filter(**filters)) for index in indices: CollectionCount.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_collection_counts.retry(args=[ids], exc=exc) raise
def index_installed_daily(ids, **kw): """ Takes a list of Installed ids and uses its addon and date fields to index stats for that day. ids -- ids of mkt.webapps.Installed objects """ from mkt.webapps.models import Installed index = kw.get('index', Installed._get_index()) es = amo.search.get_es() # Get Installed's qs = (Installed.objects.filter(id__in=set(ids)). order_by('-created').values('addon', 'created')) log.info('[%s] Indexing %s installed counts for daily stats.' % (qs[0]['created'], len(qs))) addons_dates = defaultdict(lambda: defaultdict(dict)) for installed in qs: addon = installed['addon'] date = installed['created'].strftime('%Y%m%d') try: if not date in addons_dates[addon]: key = ord_word('ins' + str(addon) + str(date)) data = search.get_installed_daily(installed) for index in get_indices(index): if not already_indexed(Installed, data, index): Installed.index(data, bulk=True, id=key, index=index) addons_dates[addon][date] = 0 es.flush_bulk(forced=True) except Exception, exc: index_installed_daily.retry(args=[ids], exc=exc, **kw) raise
def index_finance_total_inapp_by_src(addons, **kw): """ Total finance in-app stats, src breakdown. """ index = kw.get('index', InappPayment._get_index()) es = amo.search.get_es() log.info('Indexing total financial in-app stats by src for %s apps.' % len(addons)) for addon in addons: # Get all in-app names for given addon. inapps = set( InappPayment.objects.filter(config__addon=addon).values_list( 'name', flat=True)) for inapp_name in inapps: # Get all in-app payments for given in-app. qs = InappPayment.objects.filter(name=inapp_name, contribution__uuid=None) if not qs.exists(): continue # Get a list of distinct sources for given in-app. sources = set(qs.values_list('contribution__source', flat=True)) for source in sources: try: key = ord_word('srcinapp' + str(addon) + inapp_name + source.lower()) try: data = search.get_finance_total_inapp(qs, addon, inapp_name, 'source', source=source) for index in get_indices(index): if not already_indexed(InappPayment, data, index): InappPayment.index(data, bulk=True, id=key, index=index) except Exception, e: # We ignore this error for now. See #805181 pass es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_src.retry(args=[addons], exc=exc, **kw) raise
def index_webapps(ids, **kw): task_log.info('Indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids))) index = kw.pop('index', WebappIndexer.get_index()) # Note: If reindexing is currently occurring, `get_indices` will return # more than one index. indices = get_indices(index) es = WebappIndexer.get_es(urls=settings.ES_URLS) qs = Webapp.indexing_transformer(Webapp.uncached.filter(id__in=ids)) for obj in qs: doc = WebappIndexer.extract_document(obj.id, obj) for idx in indices: WebappIndexer.index(doc, id_=obj.id, es=es, index=idx)
def unindex_webapps(ids, **kw): task_log.info("Un-indexing apps %s-%s. [%s]" % (ids[0], ids[-1], len(ids))) index = kw.pop("index", WebappIndexer.get_index()) # Note: If reindexing is currently occurring, `get_indices` will return # more than one index. indices = get_indices(index) es = WebappIndexer.get_es(urls=settings.ES_URLS) for id_ in ids: for idx in indices: try: WebappIndexer.unindex(id_=id_, es=es, index=idx) except ElasticHttpNotFoundError: # Ignore if it's not there. task_log.info(u"[Webapp:%s] Unindexing app but not found in index" % id_)
def index_finance_total_inapp_by_currency(addons, **kw): """ Bug 758071 Total finance in-app stats, currency breakdown. """ index = kw.get('index', InappPayment._get_index()) es = amo.search.get_es() log.info('Indexing total financial in-app stats by currency for %s apps.' % len(addons)) for addon in addons: # Get all in-app names for given addon. inapps = set( InappPayment.objects.filter(config__addon=addon).values_list( 'name', flat=True)) for inapp_name in inapps: # Get all in-app payments for given in-app. qs = InappPayment.objects.filter(name=inapp_name, contribution__uuid=None) if not qs.exists(): continue # Get a list of distinct currencies for given in-app. currencies = set( qs.values_list('contribution__currency', flat=True)) for currency in currencies: try: key = ord_word('curinapp' + str(addon) + inapp_name + currency.lower()) data = search.get_finance_total_inapp(qs, addon, inapp_name, 'currency', currency=currency) for index in get_indices(index): if not already_indexed(InappPayment, data, index): InappPayment.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_currency.retry(args=[addons], exc=exc, **kw) raise
def unindex_webapps(ids, **kw): task_log.info('Un-indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids))) index = kw.pop('index', WebappIndexer.get_index()) # Note: If reindexing is currently occurring, `get_indices` will return # more than one index. indices = get_indices(index) es = WebappIndexer.get_es(urls=settings.ES_URLS) for id_ in ids: for idx in indices: try: WebappIndexer.unindex(id_=id_, es=es, index=idx) except ElasticHttpNotFoundError: # Ignore if it's not there. task_log.info( u'[Webapp:%s] Unindexing app but not found in index' % id_)
def index_finance_total_inapp_by_src(addons, **kw): """ Total finance in-app stats, src breakdown. """ index = kw.get('index', InappPayment._get_index()) es = amo.search.get_es() log.info('Indexing total financial in-app stats by src for %s apps.' % len(addons)) for addon in addons: # Get all in-app names for given addon. inapps = set(InappPayment.objects.filter(config__addon=addon). values_list('name', flat=True)) for inapp_name in inapps: # Get all in-app payments for given in-app. qs = InappPayment.objects.filter(name=inapp_name, contribution__uuid=None) if not qs.exists(): continue # Get a list of distinct sources for given in-app. sources = set(qs.values_list('contribution__source', flat=True)) for source in sources: try: key = ord_word('srcinapp' + str(addon) + inapp_name + source.lower()) try: data = search.get_finance_total_inapp( qs, addon, inapp_name, 'source', source=source) for index in get_indices(index): if not already_indexed(InappPayment, data, index): InappPayment.index(data, bulk=True, id=key, index=index) except Exception, e: # We ignore this error for now. See #805181 pass es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_src.retry(args=[addons], exc=exc, **kw) raise
def index_finance_daily(ids, **kw): """ Bug 748015 Takes a list of Contribution ids and uses its addon and date fields to index stats for that day. Contribution stats by addon-date unique pair. Uses a nested dictionary to not index duplicate contribution with same addon/date pairs. For each addon-date, it stores the addon in the dict as a top level key with a dict as its value. And it stores the date in the add-on's dict as a second level key. To check if an addon-date pair has been already index, it looks up the dict[addon][date] to see if the key exists. This adds some speed up when batch processing. ids -- ids of apps.stats.Contribution objects """ index = kw.get('index', Contribution._get_index()) es = amo.search.get_es() # Get contributions. qs = (Contribution.objects.filter(id__in=ids).order_by('created').values( 'addon', 'created')) log.info('[%s] Indexing %s contributions for daily stats.' % (qs[0]['created'], len(ids))) addons_dates = defaultdict(lambda: defaultdict(dict)) for contribution in qs: addon = contribution['addon'] date = contribution['created'].strftime('%Y%m%d') try: # Date for add-on not processed, index it and give it key. if not date in addons_dates[addon]: key = ord_word('fin' + str(addon) + str(date)) data = search.get_finance_daily(contribution) for index in get_indices(index): if not already_indexed(Contribution, data, index): Contribution.index(data, bulk=True, id=key, index=index) addons_dates[addon][date] = 0 es.flush_bulk(forced=True) except Exception, exc: index_finance_daily.retry(args=[ids], exc=exc, **kw) raise
def index_update_counts(ids, **kw): index = kw.pop('index', None) indices = get_indices(index) es = amo.search.get_es() qs = UpdateCount.objects.filter(id__in=ids) if qs: log.info('Indexing %s updates for %s.' % (qs.count(), qs[0].date)) try: for update in qs: key = '%s-%s' % (update.addon_id, update.date) data = search.extract_update_count(update) for index in indices: UpdateCount.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_update_counts.retry(args=[ids], exc=exc, **kw) raise
def index_finance_daily(ids, **kw): """ Bug 748015 Takes a list of Contribution ids and uses its addon and date fields to index stats for that day. Contribution stats by addon-date unique pair. Uses a nested dictionary to not index duplicate contribution with same addon/date pairs. For each addon-date, it stores the addon in the dict as a top level key with a dict as its value. And it stores the date in the add-on's dict as a second level key. To check if an addon-date pair has been already index, it looks up the dict[addon][date] to see if the key exists. This adds some speed up when batch processing. ids -- ids of apps.stats.Contribution objects """ index = kw.get('index', Contribution._get_index()) es = amo.search.get_es() # Get contributions. qs = (Contribution.objects.filter(id__in=ids) .order_by('created').values('addon', 'created')) log.info('[%s] Indexing %s contributions for daily stats.' % (qs[0]['created'], len(ids))) addons_dates = defaultdict(lambda: defaultdict(dict)) for contribution in qs: addon = contribution['addon'] date = contribution['created'].strftime('%Y%m%d') try: # Date for add-on not processed, index it and give it key. if not date in addons_dates[addon]: key = ord_word('fin' + str(addon) + str(date)) data = search.get_finance_daily(contribution) for index in get_indices(index): if not already_indexed(Contribution, data, index): Contribution.index(data, bulk=True, id=key, index=index) addons_dates[addon][date] = 0 es.flush_bulk(forced=True) except Exception, exc: index_finance_daily.retry(args=[ids], exc=exc, **kw) raise
def index_download_counts(ids, **kw): index = kw.pop('index', None) indices = get_indices(index) es = amo.search.get_es() qs = DownloadCount.objects.filter(id__in=ids) if qs: log.info('Indexing %s downloads for %s.' % (qs.count(), qs[0].date)) try: for dl in qs: key = '%s-%s' % (dl.addon_id, dl.date) data = search.extract_download_count(dl) for index in indices: DownloadCount.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_download_counts.retry(args=[ids], exc=exc) raise
def index_finance_total_inapp_by_currency(addons, **kw): """ Bug 758071 Total finance in-app stats, currency breakdown. """ index = kw.get('index', InappPayment._get_index()) es = amo.search.get_es() log.info('Indexing total financial in-app stats by currency for %s apps.' % len(addons)) for addon in addons: # Get all in-app names for given addon. inapps = set(InappPayment.objects.filter(config__addon=addon). values_list('name', flat=True)) for inapp_name in inapps: # Get all in-app payments for given in-app. qs = InappPayment.objects.filter(name=inapp_name, contribution__uuid=None) if not qs.exists(): continue # Get a list of distinct currencies for given in-app. currencies = set(qs.values_list('contribution__currency', flat=True)) for currency in currencies: try: key = ord_word('curinapp' + str(addon) + inapp_name + currency.lower()) data = search.get_finance_total_inapp( qs, addon, inapp_name, 'currency', currency=currency) for index in get_indices(index): if not already_indexed(InappPayment, data, index): InappPayment.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_finance_total_by_currency.retry(args=[addons], exc=exc, **kw) raise
def index_finance_total(addons, **kw): """ Aggregates financial stats from all of the contributions for a given app. """ index = kw.get('index', Contribution._get_index()) es = amo.search.get_es() log.info('Indexing total financial stats for %s apps.' % len(addons)) for addon in addons: # Get all contributions for given add-on. qs = Contribution.objects.filter(addon=addon, uuid=None) if not qs.exists(): continue try: key = ord_word('tot' + str(addon)) data = search.get_finance_total(qs, addon) for index in get_indices(index): if not already_indexed(Contribution, data, index): Contribution.index(data, bulk=True, id=key, index=index) es.flush_bulk(forced=True) except Exception, exc: index_finance_total.retry(args=[addons], exc=exc, **kw) raise
def compatibility_report(index=None, aliased=True): docs = defaultdict(dict) indices = get_indices(index) # Gather all the data for the index. for app in amo.APP_USAGE: versions = [c for c in settings.COMPAT if c['app'] == app.id] log.info(u'Making compat report for %s.' % app.pretty) latest = UpdateCount.objects.aggregate(d=Max('date'))['d'] qs = UpdateCount.objects.filter(addon__appsupport__app=app.id, addon__disabled_by_user=False, addon__status__in=amo.VALID_STATUSES, addon___current_version__isnull=False, date=latest) updates = dict(qs.values_list('addon', 'count')) for chunk in amo.utils.chunked(updates.items(), 50): chunk = dict(chunk) for addon in Addon.objects.filter(id__in=chunk): doc = docs[addon.id] doc.update(id=addon.id, slug=addon.slug, guid=addon.guid, self_hosted=addon.is_selfhosted(), binary=addon.binary_components, name=unicode(addon.name), created=addon.created, current_version=addon.current_version.version, current_version_id=addon.current_version.pk) doc['count'] = chunk[addon.id] doc.setdefault('top_95', defaultdict(lambda: defaultdict(dict))) doc.setdefault('top_95_all', {}) doc.setdefault('usage', {})[app.id] = updates[addon.id] doc.setdefault('works', {}).setdefault(app.id, {}) # Populate with default counts for all app versions. for ver in versions: doc['works'][app.id][vint(ver['main'])] = { 'success': 0, 'failure': 0, 'total': 0, 'failure_ratio': 0.0, } # Group reports by `major`.`minor` app version. reports = (CompatReport.objects .filter(guid=addon.guid, app_guid=app.guid) .values_list('app_version', 'works_properly') .annotate(Count('id'))) for ver, works_properly, cnt in reports: ver = vint(floor_version(ver)) major = [v['main'] for v in versions if vint(v['previous']) < ver <= vint(v['main'])] if major: w = doc['works'][app.id][vint(major[0])] # Tally number of success and failure reports. w['success' if works_properly else 'failure'] += cnt w['total'] += cnt # Calculate % of incompatibility reports. w['failure_ratio'] = w['failure'] / float(w['total']) if app not in addon.compatible_apps: continue compat = addon.compatible_apps[app] d = {'min': compat.min.version_int, 'max': compat.max.version_int} doc.setdefault('support', {})[app.id] = d doc.setdefault('max_version', {})[app.id] = compat.max.version total = sum(updates.values()) # Remember the total so we can show % of usage later. compat_total, created = CompatTotals.objects.safer_get_or_create( app=app.id, defaults={'total': total}) if not created: compat_total.update(total=total) # Figure out which add-ons are in the top 95% for this app. running_total = 0 for addon, count in sorted(updates.items(), key=lambda x: x[1], reverse=True): running_total += count docs[addon]['top_95_all'][app.id] = running_total < (.95 * total) # Mark the top 95% of add-ons compatible with the previous version for each # app + version combo. for compat in settings.COMPAT: app, ver = compat['app'], vint(compat['previous']) # Find all the docs that have a max_version compatible with ver. supported = [doc for doc in docs.values() if app in doc.get('support', {}) and doc['support'][app]['max'] >= ver] # Sort by count so we can get the top 95% most-used add-ons. supported = sorted(supported, key=lambda d: d['count'], reverse=True) total = sum(doc['count'] for doc in supported) # Figure out which add-ons are in the top 95% for this app + version. running_total = 0 for doc in supported: running_total += doc['count'] doc['top_95'][app][ver] = running_total < (.95 * total) # Send it all to the index. for chunk in amo.utils.chunked(docs.values(), 150): for doc in chunk: for index in indices: AppCompat.index(doc, id=doc['id'], bulk=True, index=index) elasticutils.get_es().flush_bulk(forced=True)
def compatibility_report(index=None): docs = defaultdict(dict) indices = get_indices(index) # Gather all the data for the index. for app in amo.APP_USAGE: versions = [c for c in amo.COMPAT if c['app'] == app.id] log.info(u'Making compat report for %s.' % app.pretty) latest = UpdateCount.objects.aggregate(d=Max('date'))['d'] qs = UpdateCount.objects.filter(addon__appsupport__app=app.id, addon__disabled_by_user=False, addon__status__in=amo.VALID_STATUSES, addon___current_version__isnull=False, date=latest) updates = dict(qs.values_list('addon', 'count')) for chunk in amo.utils.chunked(updates.items(), 50): chunk = dict(chunk) for addon in Addon.objects.filter(id__in=chunk): doc = docs[addon.id] doc.update(id=addon.id, slug=addon.slug, guid=addon.guid, binary=addon.binary_components, name=unicode(addon.name), created=addon.created, current_version=addon.current_version.version, current_version_id=addon.current_version.pk) doc['count'] = chunk[addon.id] doc.setdefault('top_95', defaultdict(lambda: defaultdict(dict))) doc.setdefault('top_95_all', {}) doc.setdefault('usage', {})[app.id] = updates[addon.id] doc.setdefault('works', {}).setdefault(app.id, {}) # Populate with default counts for all app versions. for ver in versions: doc['works'][app.id][vint(ver['main'])] = { 'success': 0, 'failure': 0, 'total': 0, 'failure_ratio': 0.0, } # Group reports by `major`.`minor` app version. reports = (CompatReport.objects.filter( guid=addon.guid, app_guid=app.guid).values_list( 'app_version', 'works_properly').annotate(Count('id'))) for ver, works_properly, cnt in reports: ver = vint(floor_version(ver)) major = [ v['main'] for v in versions if vint(v['previous']) < ver <= vint(v['main']) ] if major: w = doc['works'][app.id][vint(major[0])] # Tally number of success and failure reports. w['success' if works_properly else 'failure'] += cnt w['total'] += cnt # Calculate % of incompatibility reports. w['failure_ratio'] = w['failure'] / float(w['total']) if app not in addon.compatible_apps: continue compat = addon.compatible_apps[app] d = { 'min': compat.min.version_int, 'max': compat.max.version_int } doc.setdefault('support', {})[app.id] = d doc.setdefault('max_version', {})[app.id] = compat.max.version total = sum(updates.values()) # Remember the total so we can show % of usage later. compat_total, created = CompatTotals.objects.safer_get_or_create( app=app.id, defaults={'total': total}) if not created: compat_total.update(total=total) # Figure out which add-ons are in the top 95% for this app. running_total = 0 for addon, count in sorted(updates.items(), key=lambda x: x[1], reverse=True): running_total += count docs[addon]['top_95_all'][app.id] = running_total < (.95 * total) # Mark the top 95% of add-ons compatible with the previous version for each # app + version combo. for compat in amo.COMPAT: app, ver = compat['app'], vint(compat['previous']) # Find all the docs that have a max_version compatible with ver. supported = [ doc for doc in docs.values() if app in doc.get('support', {}) and doc['support'][app]['max'] >= ver ] # Sort by count so we can get the top 95% most-used add-ons. supported = sorted(supported, key=lambda d: d['count'], reverse=True) total = sum(doc['count'] for doc in supported) # Figure out which add-ons are in the top 95% for this app + version. running_total = 0 for doc in supported: running_total += doc['count'] doc['top_95'][app][ver] = running_total < (.95 * total) # Send it all to the index. for chunk in amo.utils.chunked(docs.values(), 150): for doc in chunk: for index in indices: AppCompat.index(doc, id=doc['id'], refresh=False, index=index) es = amo.search.get_es() es.indices.refresh()