def analytics_hourly_histogram(request): """Shows an hourly histogram for the last 5 days of all responses""" template = 'analytics/analyzer/hourly_histogram.html' date_end = smart_date(request.GET.get('date_end', None), fallback=None) if date_end is None: date_end = date.today() date_start = date_end - timedelta(days=5) search = ResponseMappingType.search() filters = F(created__gte=date_start, created__lte=date_end) search.filter(filters) hourly_histogram = search.facet_raw( hourly={ 'date_histogram': { 'interval': 'hour', 'field': 'created' }, 'facet_filter': search._process_filters(filters.filters) }).facet_counts() hourly_data = dict( (p['time'], p['count']) for p in hourly_histogram['hourly']) hour = 60 * 60 * 1000.0 zero_fill(date_start, date_end, [hourly_data], spacing=hour) # FIXME: This is goofy. After zero_fill, we end up with a bunch of # trailing zeros for reasons I don't really understand, so instead # of fixing that, I'm just going to remove them here. hourly_data = sorted(hourly_data.items()) while hourly_data and hourly_data[-1][1] == 0: hourly_data.pop(-1) histogram = [ { 'label': 'Hourly', 'name': 'hourly', 'data': hourly_data }, ] return render(request, template, { 'histogram': histogram, 'start_date': date_start, 'end_date': date_end })
def analytics_hourly_histogram(request): """Shows an hourly histogram for the last 5 days of all responses""" template = 'analytics/analyzer/hourly_histogram.html' date_end = smart_date( request.GET.get('date_end', None), fallback=None) if date_end is None: date_end = date.today() date_start = date_end - timedelta(days=5) search = ResponseMappingType.search() filters = F(created__gte=date_start, created__lte=date_end) search.filter(filters) hourly_histogram = search.facet_raw( hourly={ 'date_histogram': {'interval': 'hour', 'field': 'created'}, 'facet_filter': search._process_filters(filters.filters) }).facet_counts() hourly_data = dict((p['time'], p['count']) for p in hourly_histogram['hourly']) hour = 60 * 60 * 1000.0 zero_fill(date_start, date_end, [hourly_data], spacing=hour) # FIXME: This is goofy. After zero_fill, we end up with a bunch of # trailing zeros for reasons I don't really understand, so instead # of fixing that, I'm just going to remove them here. hourly_data = sorted(hourly_data.items()) while hourly_data and hourly_data[-1][1] == 0: hourly_data.pop(-1) histogram = [ {'label': 'Hourly', 'name': 'hourly', 'data': hourly_data}, ] return render(request, template, { 'histogram': histogram, 'start_date': date_start, 'end_date': date_end })
def test_zerofill(self): start = datetime(2012, 1, 1) end = datetime(2012, 1, 7) data1 = { epoch_milliseconds(datetime(2012, 1, 3)): 1, epoch_milliseconds(datetime(2012, 1, 5)): 1, } data2 = { epoch_milliseconds(datetime(2012, 1, 2)): 1, epoch_milliseconds(datetime(2012, 1, 5)): 1, epoch_milliseconds(datetime(2012, 1, 10)): 1, } zero_fill(start, end, [data1, data2]) for day in range(1, 7): millis = epoch_milliseconds(datetime(2012, 1, day)) assert millis in data1, 'Day %s was not zero filled.' % day assert millis in data2, 'Day %s was not zero filled.' % day
def analytics_search(request): template = 'analytics/analyzer/search.html' output_format = request.GET.get('format', None) page = smart_int(request.GET.get('page', 1), 1) # Note: If we add additional querystring fields, we need to add # them to generate_dashboard_url. search_happy = request.GET.get('happy', None) search_platform = request.GET.get('platform', None) search_locale = request.GET.get('locale', None) search_product = request.GET.get('product', None) search_version = request.GET.get('version', None) search_query = request.GET.get('q', None) search_date_start = smart_date( request.GET.get('date_start', None), fallback=None) search_date_end = smart_date( request.GET.get('date_end', None), fallback=None) selected = request.GET.get('selected', None) search_has_email = request.GET.get('has_email', None) search_country = request.GET.get('country', None) search_domain = request.GET.get('domain', None) search_api = smart_int(request.GET.get('api', None), fallback=None) search_source = request.GET.get('source', None) search_campaign = request.GET.get('campaign', None) search_organic = request.GET.get('organic', None) filter_data = [] current_search = {'page': page} search = ResponseDocType.docs.search() f = F() # If search happy is '0' or '1', set it to False or True, respectively. search_happy = {'0': False, '1': True}.get(search_happy, None) if search_happy in [False, True]: f &= F('term', happy=search_happy) current_search['happy'] = int(search_happy) # If search has_email is '0' or '1', set it to False or True, # respectively. search_has_email = {'0': False, '1': True}.get(search_has_email, None) if search_has_email in [False, True]: f &= F('term', has_email=search_has_email) current_search['has_email'] = int(search_has_email) def unknown_to_empty(text): """Convert "Unknown" to "" to support old links""" return u'' if text.lower() == u'unknown' else text if search_platform is not None: f &= F('term', platform=unknown_to_empty(search_platform)) current_search['platform'] = search_platform if search_locale is not None: f &= F('term', locale=unknown_to_empty(search_locale)) current_search['locale'] = search_locale if search_product is not None: f &= F('term', product=unknown_to_empty(search_product)) current_search['product'] = search_product # Only show the version if there's a product. if search_version is not None: # Note: We only filter on version if we're filtering on # product. f &= F('term', version=unknown_to_empty(search_version)) current_search['version'] = search_version # Only show the country if the product is Firefox OS. if search_country is not None and search_product == 'Firefox OS': f &= F('term', country=unknown_to_empty(search_country)) current_search['country'] = search_country if search_domain is not None: f &= F('term', url_domain=unknown_to_empty(search_domain)) current_search['domain'] = search_domain if search_api is not None: f &= F('term', api=search_api) current_search['api'] = search_api if search_date_start is None and search_date_end is None: selected = '7d' if search_date_end is None: search_date_end = date.today() if search_date_start is None: search_date_start = search_date_end - timedelta(days=7) # If the start and end dates are inverted, switch them into proper # chronological order search_date_start, search_date_end = sorted( [search_date_start, search_date_end]) current_search['date_end'] = search_date_end.strftime('%Y-%m-%d') f &= F('range', created={'lte': search_date_end}) current_search['date_start'] = search_date_start.strftime('%Y-%m-%d') f &= F('range', created={'gte': search_date_start}) if search_query: current_search['q'] = search_query search = search.query('simple_query_string', query=search_query, fields=['description']) if search_source is not None: f &= F('term', source=search_source) current_search['source'] = search_source if search_campaign is not None: f &= F('term', campaign=search_campaign) current_search['campaign'] = search_campaign search_organic = {'0': False, '1': True}.get(search_organic, None) if search_organic in [False, True]: f &= F('term', organic=search_organic) current_search['organic'] = int(search_organic) search = search.filter(f).sort('-created') # If they're asking for a CSV export, then send them to the export # screen. if output_format == 'csv': return _analytics_search_export(request, search) original_search = search._clone() # Search results and pagination if page < 1: page = 1 page_count = 50 start = page_count * (page - 1) end = start + page_count search_count = search.count() search_results = search.fields('id')[start:end].execute() opinion_page_ids = [mem['id'][0] for mem in search_results] # We convert what we get back from ES to what's in the db so we # can get all the information. opinion_page = Response.objects.filter(id__in=opinion_page_ids) # Add navigation aggregations counts = { 'happy': {}, 'has_email': {}, 'platform': {}, 'locale': {}, 'country': {}, 'product': {}, 'version': {}, 'url_domain': {}, 'api': {}, 'source': {}, 'campaign': {}, 'organic': {}, } for name in counts.keys(): search.aggs.bucket(name, 'terms', field=name, size=1000) results = search.execute() # Extract the value and doc_count for the various facets we do # faceted navigation on. for name in counts.keys(): buckets = getattr(results.aggregations, name)['buckets'] for bucket in buckets: key = bucket['key'] # Convert from 'T'/'F' to True/False if key in ['T', 'F']: key = (key == 'T') counts[name][key] = bucket['doc_count'] def empty_to_unknown(text): return 'Unknown' if text == u'' else text filter_data.extend([ counts_to_options( counts['happy'].items(), name='happy', display='Sentiment', display_map={True: 'Happy', False: 'Sad'}, value_map={True: 1, False: 0}, checked=search_happy), counts_to_options( counts['has_email'].items(), name='has_email', display='Has email', display_map={True: 'Yes', False: 'No'}, value_map={True: 1, False: 0}, checked=search_has_email), counts_to_options( counts['product'].items(), name='product', display='Product', display_map=empty_to_unknown, checked=search_product) ]) # Only show the version if we're showing a specific # product. if search_product: filter_data.append( counts_to_options( counts['version'].items(), name='version', display='Version', display_map=empty_to_unknown, checked=search_version) ) # Only show the country if the product is Firefox OS. if search_product == 'Firefox OS': filter_data.append( counts_to_options( counts['country'].items(), name='country', display='Country', checked=search_country, display_map=country_name), ) filter_data.extend( [ counts_to_options( counts['platform'].items(), name='platform', display='Platform', display_map=empty_to_unknown, checked=search_platform), counts_to_options( counts['locale'].items(), name='locale', display='Locale', checked=search_locale, display_map=locale_name), counts_to_options( counts['url_domain'].items(), name='domain', display='Domain', checked=search_domain, display_map=empty_to_unknown), counts_to_options( counts['api'].items(), name='api', display='API version', checked=search_api, display_map=empty_to_unknown), counts_to_options( counts['organic'].items(), name='organic', display='Organic', display_map={True: 'Yes', False: 'No'}, value_map={True: 1, False: 0}, checked=search_organic), counts_to_options( counts['source'].items(), name='source', display='Source', checked=search_source, display_map=empty_to_unknown), counts_to_options( counts['campaign'].items(), name='campaign', display='Campaign', checked=search_campaign, display_map=empty_to_unknown), ] ) # Histogram data happy_data = [] sad_data = [] (original_search.aggs .bucket('histogram', 'date_histogram', field='created', interval='day') .bucket('per_sentiment', 'terms', field='happy')) results = original_search.execute() buckets = results.aggregations['histogram']['buckets'] happy_data = {} sad_data = {} for bucket in buckets: # value -> count val_counts = dict( (item['key'], item['doc_count']) for item in bucket['per_sentiment']['buckets'] ) # key is ms since epoch here which is what the frontend wants, so # we can just leave it. happy_data[bucket['key']] = val_counts.get('T', 0) sad_data[bucket['key']] = val_counts.get('F', 0) zero_fill(search_date_start, search_date_end, [happy_data, sad_data]) histogram = [ {'label': 'Happy', 'name': 'happy', 'data': sorted(happy_data.items())}, {'label': 'Sad', 'name': 'sad', 'data': sorted(sad_data.items())}, ] return render(request, template, { 'opinions': opinion_page, 'opinion_count': search_count, 'filter_data': filter_data, 'histogram': histogram, 'page': page, 'prev_page': page - 1 if start > 0 else None, 'next_page': page + 1 if end < search_count else None, 'current_search': current_search, 'selected': selected, })
def dashboard(request): template = 'analytics/dashboard.html' output_format = request.GET.get('format', None) page = smart_int(request.GET.get('page', 1), 1) # Note: If we add additional querystring fields, we need to add # them to generate_dashboard_url. search_happy = request.GET.get('happy', None) search_platform = request.GET.get('platform', None) search_locale = request.GET.get('locale', None) search_product = request.GET.get('product', None) search_version = request.GET.get('version', None) search_query = request.GET.get('q', None) search_date_start = smart_date( request.GET.get('date_start', None), fallback=None) search_date_end = smart_date( request.GET.get('date_end', None), fallback=None) search_bigram = request.GET.get('bigram', None) selected = request.GET.get('selected', None) filter_data = [] current_search = {'page': page} search = ResponseMappingType.search() f = F() # If search happy is '0' or '1', set it to False or True, respectively. search_happy = {'0': False, '1': True}.get(search_happy, None) if search_happy in [False, True]: f &= F(happy=search_happy) current_search['happy'] = int(search_happy) def unknown_to_empty(text): """Convert "Unknown" to "" to support old links""" return u'' if text.lower() == u'unknown' else text if search_platform is not None: f &= F(platform=unknown_to_empty(search_platform)) current_search['platform'] = search_platform if search_locale is not None: f &= F(locale=unknown_to_empty(search_locale)) current_search['locale'] = search_locale visible_products = [ prod.encode('utf-8') for prod in Product.objects.public().values_list('db_name', flat=True) ] # This covers the "unknown" product which is also visible. visible_products.append('') if search_product in visible_products: f &= F(product=unknown_to_empty(search_product)) current_search['product'] = search_product if search_version is not None: # Note: We only filter on version if we're filtering on # product. f &= F(version=unknown_to_empty(search_version)) current_search['version'] = search_version else: f &= F(product__in=visible_products) if search_date_start is None and search_date_end is None: selected = '7d' if search_date_end is None: search_date_end = date.today() if search_date_start is None: search_date_start = search_date_end - timedelta(days=7) # If the start and end dates are inverted, switch them into proper # chronoligcal order search_date_start, search_date_end = sorted( [search_date_start, search_date_end]) # Restrict the frontpage dashboard to only show the last 6 months # of data six_months_ago = date.today() - timedelta(days=180) search_date_start = max(six_months_ago, search_date_start) search_date_end = max(search_date_start, search_date_end) current_search['date_end'] = search_date_end.strftime('%Y-%m-%d') f &= F(created__lte=search_date_end) current_search['date_start'] = search_date_start.strftime('%Y-%m-%d') f &= F(created__gte=search_date_start) if search_query: current_search['q'] = search_query search = search.query(description__sqs=search_query) if search_bigram is not None: f &= F(description_bigrams=search_bigram) filter_data.append({ 'display': _('Bigram'), 'name': 'bigram', 'options': [{ 'count': 'all', 'name': search_bigram, 'display': search_bigram, 'value': search_bigram, 'checked': True }] }) search = search.filter(f).order_by('-created') # If the user asked for a feed, give him/her a feed! if output_format == 'atom': return generate_atom_feed(request, search) elif output_format == 'json': return generate_json_feed(request, search) # Search results and pagination if page < 1: page = 1 page_count = 20 start = page_count * (page - 1) end = start + page_count search_count = search.count() opinion_page = search[start:end] # Navigation facet data facets = search.facet( 'happy', 'platform', 'locale', 'product', 'version', size=1000, filtered=bool(search._process_filters(f.filters))) # This loop does two things. First it maps 'T' -> True and 'F' -> # False. This is probably something EU should be doing for # us. Second, it restructures the data into a more convenient # form. counts = { 'happy': {}, 'platform': {}, 'locale': {}, 'product': {}, 'version': {} } happy_sad_filter = request.GET.get('happy', None) if happy_sad_filter: if happy_sad_filter == '1': counts['happy'] = {True: 0} elif happy_sad_filter == '0': counts['happy'] = {False: 0} if search_platform: counts['platform'] = {search_platform: 0} if search_locale: counts['locale'] = {search_locale: 0} if search_product: counts['product'] = {search_product: 0} if search_version: counts['version'] = {search_version: 0} for param, terms in facets.facet_counts().items(): for term in terms: name = term['term'] if name.upper() == 'T': name = True elif name.upper() == 'F': name = False counts[param][name] = term['count'] def empty_to_unknown(text): return _('Unknown') if text == u'' else text filter_data.extend([ counts_to_options( counts['happy'].items(), name='happy', display=_('Sentiment'), display_map={True: _('Happy'), False: _('Sad')}, value_map={True: 1, False: 0}, checked=search_happy), counts_to_options( counts['product'].items(), name='product', display=_('Product'), display_map=empty_to_unknown, checked=search_product) ]) # Only show the version if we're showing a specific # product. if search_product: filter_data.append( counts_to_options( counts['version'].items(), name='version', display=_('Version'), display_map=empty_to_unknown, checked=search_version) ) else: filter_data.append({ 'display': _('Version'), 'note': _('Select product to see version breakdown') }) filter_data.extend( [ counts_to_options( counts['platform'].items(), name='platform', display=_('Platform'), display_map=empty_to_unknown, checked=search_platform), counts_to_options( counts['locale'].items(), name='locale', display=_('Locale'), checked=search_locale, display_map=locale_name), ] ) # Histogram data happy_data = [] sad_data = [] happy_f = f & F(happy=True) sad_f = f & F(happy=False) histograms = search.facet_raw( happy={ 'date_histogram': {'interval': 'day', 'field': 'created'}, 'facet_filter': search._process_filters(happy_f.filters) }, sad={ 'date_histogram': {'interval': 'day', 'field': 'created'}, 'facet_filter': search._process_filters(sad_f.filters) }, ).facet_counts() # p['time'] is number of milliseconds since the epoch. Which is # convenient, because that is what the front end wants. happy_data = dict((p['time'], p['count']) for p in histograms['happy']) sad_data = dict((p['time'], p['count']) for p in histograms['sad']) zero_fill(search_date_start, search_date_end, [happy_data, sad_data]) histogram = [ {'label': _('Happy'), 'name': 'happy', 'data': sorted(happy_data.items())}, {'label': _('Sad'), 'name': 'sad', 'data': sorted(sad_data.items())}, ] return render(request, template, { 'opinions': opinion_page, 'opinion_count': search_count, 'filter_data': filter_data, 'histogram': histogram, 'page': page, 'prev_page': page - 1 if start > 0 else None, 'next_page': page + 1 if end < search_count else None, 'current_search': current_search, 'selected': selected, 'atom_url': generate_dashboard_url(request), })
def get(self, request): # FIXME: Rewrite this to use aggs and allow multiple layers. search = models.ResponseDocType.docs.search() f = F() if 'happy' in request.GET: happy = {'0': False, '1': True}.get(request.GET['happy'], None) if happy is not None: f &= F('term', happy=happy) if 'platforms' in request.GET: platforms = request.GET['platforms'].split(',') if platforms: f &= F('terms', platform=platforms) if 'locales' in request.GET: locales = request.GET['locales'].split(',') if locales: f &= F('terms', locale=locales) if 'products' in request.GET: products = request.GET['products'].split(',') if products: f &= F('terms', product=products) if 'versions' in request.GET: versions = request.GET['versions'].split(',') if versions: f &= F('terms', version=versions) if 'source' in request.GET: # FIXME: Having a , in the source is valid, so this might not work # right. sources = request.GET['source'].split(',') if sources: f &= F('terms', source=sources) if 'api' in request.GET: # The int (as a str) or "None" apis = request.GET['api'].split(',') if apis: f &= F('terms', api=apis) date_start = smart_date(request.GET.get('date_start', None)) date_end = smart_date(request.GET.get('date_end', None)) delta = smart_timedelta(request.GET.get('date_delta', None)) # Default to 7d. if not date_start and not date_end: delta = delta or smart_timedelta('7d') if delta is not None: if date_end is not None: date_start = date_end - delta elif date_start is not None: date_end = date_start + delta else: date_end = date.today() date_start = date_end - delta # If there's no end, then the end is today. if not date_end: date_end = date.today() # Restrict to a 6 month range. Must have a start date. if (date_end - date_start) > timedelta(days=180): date_end = date_start + timedelta(days=180) # date_start up to but not including date_end. f &= F('range', created={'gte': date_start, 'lt': date_end}) search_query = request.GET.get('q', None) if search_query is not None: search = search.query( 'simple_query_string', query=search_query, fields=['description']) search = search.filter(f) # FIXME: improve validation interval = request.GET.get('interval', 'day') if interval not in ('hour', 'day'): interval = 'day' search.aggs.bucket( 'histogram', 'date_histogram', field='created', interval=interval ) resp = search.execute() data = dict((p['key'], p['doc_count']) for p in resp.aggregations['histogram']['buckets']) zero_fill(date_start, date_end - timedelta(days=1), [data]) return rest_framework.response.Response({ 'results': sorted(data.items()) })
def product_dashboard_firefox(request, prod): # Note: Not localized because it's ultra-alpha. template = 'analytics/product_dashboard_firefox.html' current_search = {} search_query = request.GET.get('q', None) if search_query: current_search['q'] = search_query search_date_end = smart_date( request.GET.get('date_end', None), fallback=None) if search_date_end is None: search_date_end = date.today() current_search['date_end'] = search_date_end.strftime('%Y-%m-%d') search_date_start = smart_date( request.GET.get('date_start', None), fallback=None) if search_date_start is None: search_date_start = search_date_end - timedelta(days=7) current_search['date_start'] = search_date_start.strftime('%Y-%m-%d') histogram = generate_totals_histogram( search_date_start, search_date_end, search_query, prod) # FIXME: This is lame, but we need to make sure the item we're # looking at is the totals. assert histogram[1]['name'] == 'total' totals_sum = sum([p[1] for p in histogram[1]['data']]) search = ResponseMappingType.search() if search_query: search = search.query(description__sqs=search_query) base_f = F() base_f &= F(product=prod.db_name) base_f &= F(created__gte=search_date_start) base_f &= F(created__lt=search_date_end) search = search.filter(base_f) # Figure out the list of platforms and versions for this range. plats_and_vers = (search .facet('platform', 'version', size=50) .facet_counts()) # Figure out the "by platform" histogram platforms = [part['term'] for part in plats_and_vers['platform']] platform_facet = {} for plat in platforms: plat_f = base_f & F(platform=plat) platform_facet[plat if plat else 'unknown'] = { 'date_histogram': {'interval': 'day', 'field': 'created'}, 'facet_filter': search._process_filters(plat_f.filters) } platform_counts = search.facet_raw(**platform_facet).facet_counts() platforms_histogram = [] for key in platform_counts.keys(): data = dict((p['time'], p['count']) for p in platform_counts[key]) sum_counts = sum([p['count'] for p in platform_counts[key]]) if sum_counts < (totals_sum * 0.02): # Skip platforms where the number of responses is less than # 2% of the total. continue zero_fill(search_date_start, search_date_end, [data]) platforms_histogram.append({ 'name': key, 'label': key, 'data': sorted(data.items()), 'lines': {'show': True, 'fill': False}, 'points': {'show': True}, }) # Figure out the "by version" histogram versions = [part['term'] for part in plats_and_vers['version']] version_facet = {} for vers in versions: vers_f = base_f & F(version=vers) version_facet['v' + vers if vers else 'unknown'] = { 'date_histogram': {'interval': 'day', 'field': 'created'}, 'facet_filter': search._process_filters(vers_f.filters) } version_counts = search.facet_raw(**version_facet).facet_counts() versions_histogram = [] for key in version_counts.keys(): data = dict((p['time'], p['count']) for p in version_counts[key]) sum_counts = sum([p['count'] for p in version_counts[key]]) if sum_counts < (totals_sum * 0.02): # Skip versions where the number of responses is less than # 2% of the total. continue zero_fill(search_date_start, search_date_end, [data]) versions_histogram.append({ 'name': key, 'label': key, 'data': sorted(data.items()), 'lines': {'show': True, 'fill': False}, 'points': {'show': True}, }) return render(request, template, { 'start_date': search_date_start, 'end_date': search_date_end, 'current_search': current_search, 'platforms_histogram': platforms_histogram, 'versions_histogram': versions_histogram, 'histogram': histogram, 'product': prod })
def generate_totals_histogram(search_date_start, search_date_end, search_query, prod): # Note: Not localized because it's ultra-alpha. search_date_start = search_date_start - timedelta(days=1) search = ResponseMappingType.search() if search_query: search = search.query(description__sqs=search_query) f = F() f &= F(product=prod.db_name) f &= F(created__gte=search_date_start) f &= F(created__lt=search_date_end) happy_f = f & F(happy=True) totals_histogram = search.facet_raw( total={ 'date_histogram': {'interval': 'day', 'field': 'created'}, 'facet_filter': search._process_filters(f.filters) }, happy={ 'date_histogram': {'interval': 'day', 'field': 'created'}, 'facet_filter': search._process_filters(happy_f.filters) }, ).facet_counts() totals_data = dict((p['time'], p['count']) for p in totals_histogram['total']) zero_fill(search_date_start, search_date_end, [totals_data]) totals_data = sorted(totals_data.items()) happy_data = dict((p['time'], p['count']) for p in totals_histogram['happy']) zero_fill(search_date_start, search_date_end, [happy_data]) happy_data = sorted(happy_data.items()) up_deltas = [] down_deltas = [] for i, hap in enumerate(happy_data): if i == 0: continue yesterday = 0 today = 0 # Figure out yesterday and today as a percent to one # significant digit. if happy_data[i-1][1] and totals_data[i-1][1]: yesterday = ( int(happy_data[i-1][1] * 1.0 / totals_data[i-1][1] * 1000) / 10.0 ) if happy_data[i][1] and totals_data[i][1]: today = ( int(happy_data[i][1] * 1.0 / totals_data[i][1] * 1000) / 10.0 ) if (today - yesterday) >= 0: up_deltas.append((happy_data[i][0], today - yesterday)) else: down_deltas.append((happy_data[i][0], today - yesterday)) # Nix the first total because it's not in our date range totals_data = totals_data[1:] histogram = [ { 'name': 'zero', 'data': [(totals_data[0][0], 0), (totals_data[-1][0], 0)], 'yaxis': 2, 'lines': {'show': True, 'fill': False, 'lineWidth': 1, 'shadowSize': 0}, 'color': '#dddddd', }, { 'name': 'total', 'label': 'Total # responses', 'data': totals_data, 'yaxis': 1, 'lines': {'show': True, 'fill': False}, 'points': {'show': True}, 'color': '#3E72BF', }, { 'name': 'updeltas', 'label': 'Percent change in sentiment upwards', 'data': up_deltas, 'yaxis': 2, 'bars': {'show': True, 'lineWidth': 3}, 'points': {'show': True}, 'color': '#55E744', }, { 'name': 'downdeltas', 'label': 'Percent change in sentiment downwards', 'data': down_deltas, 'yaxis': 2, 'bars': {'show': True, 'lineWidth': 3}, 'points': {'show': True}, 'color': '#E73E3E', } ] return histogram
def analytics_search(request): template = 'analytics/analyzer/search.html' output_format = request.GET.get('format', None) page = smart_int(request.GET.get('page', 1), 1) # Note: If we add additional querystring fields, we need to add # them to generate_dashboard_url. search_happy = request.GET.get('happy', None) search_platform = request.GET.get('platform', None) search_locale = request.GET.get('locale', None) search_product = request.GET.get('product', None) search_version = request.GET.get('version', None) search_query = request.GET.get('q', None) search_date_start = smart_date( request.GET.get('date_start', None), fallback=None) search_date_end = smart_date( request.GET.get('date_end', None), fallback=None) search_bigram = request.GET.get('bigram', None) selected = request.GET.get('selected', None) search_has_email = request.GET.get('has_email', None) search_country = request.GET.get('country', None) search_domain = request.GET.get('domain', None) search_api = smart_int(request.GET.get('api', None), fallback=None) search_source = request.GET.get('source', None) search_campaign = request.GET.get('campaign', None) search_organic = request.GET.get('organic', None) filter_data = [] current_search = {'page': page} search = ResponseDocType.docs.search() f = F() # If search happy is '0' or '1', set it to False or True, respectively. search_happy = {'0': False, '1': True}.get(search_happy, None) if search_happy in [False, True]: f &= F('term', happy=search_happy) current_search['happy'] = int(search_happy) # If search has_email is '0' or '1', set it to False or True, # respectively. search_has_email = {'0': False, '1': True}.get(search_has_email, None) if search_has_email in [False, True]: f &= F('term', has_email=search_has_email) current_search['has_email'] = int(search_has_email) def unknown_to_empty(text): """Convert "Unknown" to "" to support old links""" return u'' if text.lower() == u'unknown' else text if search_platform is not None: f &= F('term', platform=unknown_to_empty(search_platform)) current_search['platform'] = search_platform if search_locale is not None: f &= F('term', locale=unknown_to_empty(search_locale)) current_search['locale'] = search_locale if search_product is not None: f &= F('term', product=unknown_to_empty(search_product)) current_search['product'] = search_product # Only show the version if there's a product. if search_version is not None: # Note: We only filter on version if we're filtering on # product. f &= F('term', version=unknown_to_empty(search_version)) current_search['version'] = search_version # Only show the country if the product is Firefox OS. if search_country is not None and search_product == 'Firefox OS': f &= F('term', country=unknown_to_empty(search_country)) current_search['country'] = search_country if search_domain is not None: f &= F('term', url_domain=unknown_to_empty(search_domain)) current_search['domain'] = search_domain if search_api is not None: f &= F('term', api=search_api) current_search['api'] = search_api if search_date_start is None and search_date_end is None: selected = '7d' if search_date_end is None: search_date_end = date.today() if search_date_start is None: search_date_start = search_date_end - timedelta(days=7) # If the start and end dates are inverted, switch them into proper # chronological order search_date_start, search_date_end = sorted( [search_date_start, search_date_end]) current_search['date_end'] = search_date_end.strftime('%Y-%m-%d') f &= F('range', created={'lte': search_date_end}) current_search['date_start'] = search_date_start.strftime('%Y-%m-%d') f &= F('range', created={'gte': search_date_start}) if search_query: current_search['q'] = search_query search = search.query('simple_query_string', query=search_query, fields=['description']) if search_bigram is not None: f &= F('terms', description_bigrams=search_bigram) filter_data.append({ 'display': 'Bigram', 'name': 'bigram', 'options': [{ 'count': 'all', 'name': search_bigram, 'display': search_bigram, 'value': search_bigram, 'checked': True }] }) if search_source is not None: f &= F('term', source=search_source) current_search['source'] = search_source if search_campaign is not None: f &= F('term', campaign=search_campaign) current_search['campaign'] = search_campaign search_organic = {'0': False, '1': True}.get(search_organic, None) if search_organic in [False, True]: f &= F('term', organic=search_organic) current_search['organic'] = int(search_organic) search = search.filter(f).sort('-created') # If they're asking for a CSV export, then send them to the export # screen. if output_format == 'csv': return _analytics_search_export(request, search) original_search = search._clone() # Search results and pagination if page < 1: page = 1 page_count = 50 start = page_count * (page - 1) end = start + page_count search_count = search.count() search_results = search.fields('id')[start:end].execute() opinion_page_ids = [mem['id'][0] for mem in search_results] # We convert what we get back from ES to what's in the db so we # can get all the information. opinion_page = Response.objects.filter(id__in=opinion_page_ids) # Add navigation aggregations counts = { 'happy': {}, 'has_email': {}, 'platform': {}, 'locale': {}, 'country': {}, 'product': {}, 'version': {}, 'url_domain': {}, 'api': {}, 'source': {}, 'campaign': {}, 'organic': {}, } for name in counts.keys(): search.aggs.bucket(name, 'terms', field=name, size=1000) results = search.execute() # Extract the value and doc_count for the various facets we do # faceted navigation on. for name in counts.keys(): buckets = getattr(results.aggregations, name)['buckets'] for bucket in buckets: key = bucket['key'] # Convert from 'T'/'F' to True/False if key in ['T', 'F']: key = (key == 'T') counts[name][key] = bucket['doc_count'] def empty_to_unknown(text): return 'Unknown' if text == u'' else text filter_data.extend([ counts_to_options( counts['happy'].items(), name='happy', display='Sentiment', display_map={True: 'Happy', False: 'Sad'}, value_map={True: 1, False: 0}, checked=search_happy), counts_to_options( counts['has_email'].items(), name='has_email', display='Has email', display_map={True: 'Yes', False: 'No'}, value_map={True: 1, False: 0}, checked=search_has_email), counts_to_options( counts['product'].items(), name='product', display='Product', display_map=empty_to_unknown, checked=search_product) ]) # Only show the version if we're showing a specific # product. if search_product: filter_data.append( counts_to_options( counts['version'].items(), name='version', display='Version', display_map=empty_to_unknown, checked=search_version) ) # Only show the country if the product is Firefox OS. if search_product == 'Firefox OS': filter_data.append( counts_to_options( counts['country'].items(), name='country', display='Country', checked=search_country, display_map=country_name), ) filter_data.extend( [ counts_to_options( counts['platform'].items(), name='platform', display='Platform', display_map=empty_to_unknown, checked=search_platform), counts_to_options( counts['locale'].items(), name='locale', display='Locale', checked=search_locale, display_map=locale_name), counts_to_options( counts['url_domain'].items(), name='domain', display='Domain', checked=search_domain, display_map=empty_to_unknown), counts_to_options( counts['api'].items(), name='api', display='API version', checked=search_api, display_map=empty_to_unknown), counts_to_options( counts['organic'].items(), name='organic', display='Organic', display_map={True: 'Yes', False: 'No'}, value_map={True: 1, False: 0}, checked=search_organic), counts_to_options( counts['source'].items(), name='source', display='Source', checked=search_source, display_map=empty_to_unknown), counts_to_options( counts['campaign'].items(), name='campaign', display='Campaign', checked=search_campaign, display_map=empty_to_unknown), ] ) # Histogram data happy_data = [] sad_data = [] (original_search.aggs .bucket('histogram', 'date_histogram', field='created', interval='day') .bucket('per_sentiment', 'terms', field='happy') ) results = original_search.execute() buckets = results.aggregations['histogram']['buckets'] happy_data = {} sad_data = {} for bucket in buckets: # value -> count val_counts = dict( (item['key'], item['doc_count']) for item in bucket['per_sentiment']['buckets'] ) # key is ms since epoch here which is what the frontend wants, so # we can just leave it. happy_data[bucket['key']] = val_counts.get('T', 0) sad_data[bucket['key']] = val_counts.get('F', 0) zero_fill(search_date_start, search_date_end, [happy_data, sad_data]) histogram = [ {'label': 'Happy', 'name': 'happy', 'data': sorted(happy_data.items())}, {'label': 'Sad', 'name': 'sad', 'data': sorted(sad_data.items())}, ] return render(request, template, { 'opinions': opinion_page, 'opinion_count': search_count, 'filter_data': filter_data, 'histogram': histogram, 'page': page, 'prev_page': page - 1 if start > 0 else None, 'next_page': page + 1 if end < search_count else None, 'current_search': current_search, 'selected': selected, })
def get(self, request): search = models.ResponseMappingType.search() f = F() if 'happy' in request.GET: happy = {'0': False, '1': True}.get(request.GET['happy'], None) if happy is not None: f &= F(happy=happy) if 'platforms' in request.GET: platforms = request.GET['platforms'].split(',') if platforms: f &= F(platform__in=platforms) if 'locales' in request.GET: locales = request.GET['locales'].split(',') if locales: f &= F(locale__in=locales) if 'products' in request.GET: products = request.GET['products'].split(',') if products: f &= F(product__in=products) if 'versions' in request.GET: versions = request.GET['versions'].split(',') if versions: f &= F(version__in=versions) date_start = smart_date(request.GET.get('date_start', None)) date_end = smart_date(request.GET.get('date_end', None)) delta = smart_timedelta(request.GET.get('date_delta', None)) # Default to 7d. if not date_start and not date_end: delta = delta or smart_timedelta('7d') if delta is not None: if date_end is not None: date_start = date_end - delta elif date_start is not None: date_end = date_start + delta else: date_end = date.today() date_start = date_end - delta # If there's no end, then the end is today. if not date_end: date_end = date.today() # Restrict to a 6 month range. Must have a start date. if (date_end - date_start) > timedelta(days=180): date_end = date_start + timedelta(days=180) # date_start up to but not including date_end. f &= F(created__gte=date_start, created__lt=date_end) search_query = request.GET.get('q', None) if search_query is not None: search = search.query(description__sqs=search_query) search = search.filter(f) # FIXME: improve validation interval = request.GET.get('interval', 'day') if interval not in ('hour', 'day'): interval = 'day' histograms = search.facet_raw( counts={ 'date_histogram': {'interval': interval, 'field': 'created'}, 'facet_filter': search._process_filters(f.filters) } ).facet_counts() data = dict((p['time'], p['count']) for p in histograms['counts']) zero_fill(date_start, date_end - timedelta(days=1), [data]) return rest_framework.response.Response({ 'results': sorted(data.items()) })
def get(self, request): # FIXME: Rewrite this to use aggs and allow multiple layers. search = models.ResponseDocType.docs.search() f = F() if 'happy' in request.GET: happy = {'0': False, '1': True}.get(request.GET['happy'], None) if happy is not None: f &= F('term', happy=happy) if 'platforms' in request.GET: platforms = request.GET['platforms'].split(',') if platforms: f &= F('terms', platform=platforms) if 'locales' in request.GET: locales = request.GET['locales'].split(',') if locales: f &= F('terms', locale=locales) if 'products' in request.GET: products = request.GET['products'].split(',') if products: f &= F('terms', product=products) if 'versions' in request.GET: versions = request.GET['versions'].split(',') if versions: f &= F('terms', version=versions) date_start = smart_date(request.GET.get('date_start', None)) date_end = smart_date(request.GET.get('date_end', None)) delta = smart_timedelta(request.GET.get('date_delta', None)) # Default to 7d. if not date_start and not date_end: delta = delta or smart_timedelta('7d') if delta is not None: if date_end is not None: date_start = date_end - delta elif date_start is not None: date_end = date_start + delta else: date_end = date.today() date_start = date_end - delta # If there's no end, then the end is today. if not date_end: date_end = date.today() # Restrict to a 6 month range. Must have a start date. if (date_end - date_start) > timedelta(days=180): date_end = date_start + timedelta(days=180) # date_start up to but not including date_end. f &= F('range', created={'gte': date_start, 'lt': date_end}) search_query = request.GET.get('q', None) if search_query is not None: search = search.query('simple_query_string', query=search_query, fields=['description']) search = search.filter(f) # FIXME: improve validation interval = request.GET.get('interval', 'day') if interval not in ('hour', 'day'): interval = 'day' search.aggs.bucket('histogram', 'date_histogram', field='created', interval=interval) resp = search.execute() data = dict((p['key'], p['doc_count']) for p in resp.aggregations['histogram']['buckets']) zero_fill(date_start, date_end - timedelta(days=1), [data]) return rest_framework.response.Response( {'results': sorted(data.items())})