def augment_response(collection, query, response): # HTML escaping if not query.get('download'): id_field = collection.get('idField', '') for doc in response['response']['docs']: link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) elif 'link' in doc: meta = {'type': 'link', 'link': doc['link']} link = get_data_link(meta) for field, value in doc.items(): if isinstance(value, numbers.Number): escaped_value = value elif field == '_childDocuments_': # Nested documents escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(escape(val), errors='replace') for val in value] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value doc['externalLink'] = link doc['details'] = [] doc['hueId'] = smart_unicode(doc.get(id_field, '')) if 'moreLikeThis' in response and response['moreLikeThis'][doc['hueId']].get('numFound'): _doc = response['moreLikeThis'][doc['hueId']] doc['_childDocuments_'] = _doc['docs'] doc['numFound'] = _doc['numFound'] del response['moreLikeThis'][doc['hueId']] highlighted_fields = list(response.get('highlighting', {}).keys()) if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.items(): _hls = [ escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls ] escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")
def test_get_data_link(): assert_equal(None, get_data_link({})) assert_equal('gethue.com', get_data_link({'type': 'link', 'link': 'gethue.com'})) assert_equal('/hbase/#Cluster/document_demo/query/20150527', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527'})) assert_equal('/hbase/#Cluster/document_demo/query/20150527[f1]', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527', 'fam': 'f1'})) assert_equal('/hbase/#Cluster/document_demo/query/20150527[f1:c1]', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527', 'fam': 'f1', 'col': 'c1'})) assert_equal('/filebrowser/view=/data/hue/1', get_data_link({'type': 'hdfs', 'path': '/data/hue/1'})) assert_equal('/metastore/table/default/sample_07', get_data_link({'type': 'hive', 'database': 'default', 'table': 'sample_07'}))
def test_get_data_link(): assert_equal(None, get_data_link({})) assert_equal("gethue.com", get_data_link({"type": "link", "link": "gethue.com"})) assert_equal( "/hbase/#Cluster/document_demo/query/20150527", get_data_link({"type": "hbase", "table": "document_demo", "row_key": "20150527"}), ) assert_equal( "/hbase/#Cluster/document_demo/query/20150527[f1]", get_data_link({"type": "hbase", "table": "document_demo", "row_key": "20150527", "fam": "f1"}), ) assert_equal( "/hbase/#Cluster/document_demo/query/20150527[f1:c1]", get_data_link({"type": "hbase", "table": "document_demo", "row_key": "20150527", "fam": "f1", "col": "c1"}), ) assert_equal("/filebrowser/view=/data/hue/1", get_data_link({"type": "hdfs", "path": "/data/hue/1"})) assert_equal( "/metastore/table/default/sample_07", get_data_link({"type": "hive", "database": "default", "table": "sample_07"}), )
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] # Date range if collection_facet['properties']['isDate']: dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate'] not in ('count', 'unique'): counts = [_v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count'])] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics _series = collections.defaultdict(list) for f in counts: for bucket in (f['d2']['buckets'] if 'd2' in f else []): _series[bucket['val']].append(f['val']) _series[bucket['val']].append(bucket['d2'] if 'd2' in bucket else bucket['count']) for name, val in _series.iteritems(): _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate'] not in ('count', 'unique'): # Single dimension or dimension 2 with analytics dimension = 1 counts = [_v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count'])] counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), counts) else: # Dimension 1 with counts and 2 with analytics dimension = 2 counts = _augment_stats_2d(name, facet, counts, selected_values) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value if not query.get('download'): link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] cols = ['%(field)s' % facet, 'count(%(field)s)' % facet] last_x_col = 0 last_xx_col = 0 for i, f in enumerate(facet['properties']['facets']): if f['aggregate']['function'] == 'count': cols.append(f['field']) last_xx_col = last_x_col last_x_col = i + 2 cols.append(SolrApi._get_aggregate_function(f)) rows = [] # For dim in dimensions # Number or Date range if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field': dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1: column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] # 'count(%s)' % legend agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) _series = collections.defaultdict(list) for row in rows: for i, cell in enumerate(row): if i > last_x_col: legend = cols[i] if last_xx_col != last_x_col: legend = '%s %s' % (cols[i], row[last_x_col]) _series[legend].append(row[last_xx_col]) _series[legend].append(cell) for name, val in _series.iteritems(): _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif collection_facet['properties'].get('isOldPivot'): facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count'] column = 'count' agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) # _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) print counts # count = response['facets'][name] # _convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) dimension = len(facet_fields) elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1): # Dimension 1 with 1 count or agg dimension = 1 column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = pairwise2(legend, selected_values.get(facet['id'], []), counts) else: # Dimension 2 with analytics or 1 with N aggregates dimension = 2 agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']]) counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension, 'response': {'response': {'start': 0, 'numFound': response['facets'][name]['numBuckets']}}, # Todo * nested buckets + offsets 'docs': [dict(zip(cols, row)) for row in rows], 'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols] } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping if not query.get('download'): id_field = collection.get('idField', '') for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif field == '_childDocuments_': # Nested documents escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(escape(val), errors='replace') for val in value] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] doc['hueId'] = smart_unicode(doc.get(id_field, '')) highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2( facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name][ 'counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts'][ 'facet_queries']: for name, value in response['facet_counts'][ 'facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response[ 'facet_counts'] and name in response['facet_counts'][ 'facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d( name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] # Date range if collection_facet['properties']['isDate']: dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet['properties'][ 'facets'] or collection_facet['properties'][ 'facets'][0]['aggregate'] not in ('count', 'unique'): counts = [ _v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count']) ] counts = range_pair( facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics _series = collections.defaultdict(list) for f in counts: for bucket in (f['d2']['buckets'] if 'd2' in f else []): _series[bucket['val']].append(f['val']) _series[bucket['val']].append( bucket['d2'] if 'd2' in bucket else bucket['count']) for name, val in _series.iteritems(): _c = range_pair( facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif not collection_facet['properties'][ 'facets'] or collection_facet['properties']['facets'][ 0]['aggregate'] not in ('count', 'unique'): # Single dimension or dimension 2 with analytics dimension = 1 counts = [ _v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count']) ] counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), counts) else: # Dimension 1 with counts and 2 with analytics dimension = 2 counts = _augment_stats_2d(name, facet, counts, selected_values) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping if not query.get('download'): for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [ smart_unicode(val, errors='replace') for val in value ] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode( doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode( doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [ escape(smart_unicode( hl, errors='replace')).replace( '<em>', '<em>').replace( '</em>', '</em>') for hl in hls ] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response['warning'] = _( "The Solr schema requires an id field for performing the result highlighting" ) if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def augment_solr_response(response, collection, query): augmented = response augmented["normalized_facets"] = [] NAME = "%(field)s-%(id)s" normalized_facets = [] selected_values = dict([(fq["id"], fq["filter"]) for fq in query["fqs"]]) if response and response.get("facet_counts"): for facet in collection["facets"]: category = facet["type"] if category == "field" and response["facet_counts"]["facet_fields"]: name = NAME % facet collection_facet = get_facet_field(category, name, collection["facets"]) counts = pairwise2( facet["field"], selected_values.get(facet["id"], []), response["facet_counts"]["facet_fields"][name] ) if collection_facet["properties"]["sort"] == "asc": counts.reverse() facet = { "id": collection_facet["id"], "field": facet["field"], "type": category, "label": collection_facet["label"], "counts": counts, } normalized_facets.append(facet) elif (category == "range" or category == "range-up") and response["facet_counts"]["facet_ranges"]: name = NAME % facet collection_facet = get_facet_field(category, name, collection["facets"]) counts = response["facet_counts"]["facet_ranges"][name]["counts"] end = response["facet_counts"]["facet_ranges"][name]["end"] counts = range_pair( facet["field"], name, selected_values.get(facet["id"], []), counts, end, collection_facet ) facet = { "id": collection_facet["id"], "field": facet["field"], "type": category, "label": collection_facet["label"], "counts": counts, "extraSeries": [], } normalized_facets.append(facet) elif category == "query" and response["facet_counts"]["facet_queries"]: for name, value in response["facet_counts"]["facet_queries"].iteritems(): collection_facet = get_facet_field(category, name, collection["facets"]) facet = { "id": collection_facet["id"], "query": name, "type": category, "label": name, "counts": value, } normalized_facets.append(facet) elif category == "pivot": name = NAME % facet if "facet_pivot" in response["facet_counts"] and name in response["facet_counts"]["facet_pivot"]: if facet["properties"]["scope"] == "stack": count = _augment_pivot_2d( name, facet["id"], response["facet_counts"]["facet_pivot"][name], selected_values ) else: count = response["facet_counts"]["facet_pivot"][name] _augment_pivot_nd(facet["id"], count, selected_values) else: count = [] facet = {"id": facet["id"], "field": name, "type": category, "label": name, "counts": count} normalized_facets.append(facet) if response and response.get("facets"): for facet in collection["facets"]: category = facet["type"] name = facet["id"] # Nested facets can only have one name if category == "function" and name in response["facets"]: value = response["facets"][name] collection_facet = get_facet_field(category, name, collection["facets"]) facet = {"id": collection_facet["id"], "query": name, "type": category, "label": name, "counts": value} normalized_facets.append(facet) elif category == "nested" and name in response["facets"]: value = response["facets"][name] collection_facet = get_facet_field(category, name, collection["facets"]) extraSeries = [] counts = response["facets"][name]["buckets"] # Date range if collection_facet["properties"]["isDate"]: dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet["properties"]["facets"] or collection_facet["properties"]["facets"][0][ "aggregate" ] not in ("count", "unique"): counts = [_v for _f in counts for _v in (_f["val"], _f["d2"] if "d2" in _f else _f["count"])] counts = range_pair( facet["field"], name, selected_values.get(facet["id"], []), counts, 1, collection_facet ) else: # Dimension 1 with counts and 2 with analytics _series = collections.defaultdict(list) for f in counts: for bucket in f["d2"]["buckets"] if "d2" in f else []: _series[bucket["val"]].append(f["val"]) _series[bucket["val"]].append(bucket["d2"] if "d2" in bucket else bucket["count"]) for name, val in _series.iteritems(): _c = range_pair( facet["field"], name, selected_values.get(facet["id"], []), val, 1, collection_facet ) extraSeries.append({"counts": _c, "label": name}) counts = [] elif not collection_facet["properties"]["facets"] or collection_facet["properties"]["facets"][0][ "aggregate" ] not in ("count", "unique"): # Single dimension or dimension 2 with analytics dimension = 1 counts = [_v for _f in counts for _v in (_f["val"], _f["d2"] if "d2" in _f else _f["count"])] counts = pairwise2(facet["field"], selected_values.get(facet["id"], []), counts) else: # Dimension 1 with counts and 2 with analytics dimension = 2 counts = _augment_stats_2d(name, facet, counts, selected_values) if collection_facet["properties"]["sort"] == "asc": counts.reverse() facet = { "id": collection_facet["id"], "field": facet["field"], "type": category, "label": collection_facet["label"], "counts": counts, "extraSeries": extraSeries, "dimension": dimension, } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop("facet_counts") response.pop("facets") # HTML escaping for doc in response["response"]["docs"]: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(val, errors="replace") for val in value] else: value = smart_unicode(value, errors="replace") escaped_value = escape(value) doc[field] = escaped_value if not query.get("download"): link = None if "link-meta" in doc: meta = json.loads(doc["link-meta"]) link = get_data_link(meta) doc["externalLink"] = link doc["details"] = [] highlighted_fields = response.get("highlighting", {}).keys() if highlighted_fields and not query.get("download"): id_field = collection.get("idField") if id_field: for doc in response["response"]["docs"]: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response["highlighting"][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [ escape(smart_unicode(hl, errors="replace")) .replace("<em>", "<em>") .replace("</em>", "</em>") for hl in hls ] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response["warning"] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented["normalized_facets"].extend(normalized_facets) return augmented
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] cols = ['%(field)s' % facet, 'count(%(field)s)' % facet] last_x_col = 0 last_xx_col = 0 for i, f in enumerate(facet['properties']['facets']): if f['aggregate']['function'] == 'count': cols.append(f['field']) last_xx_col = last_x_col last_x_col = i + 2 cols.append(SolrApi._get_aggregate_function(f)) rows = [] # For dim in dimensions # Number or Date range if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field': dimension = 3 if collection_facet['properties']['isDate'] else 1 # Single dimension or dimension 2 with analytics if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1: column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] # 'count(%s)' % legend agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) _series = collections.defaultdict(list) for row in rows: for i, cell in enumerate(row): if i > last_x_col: legend = cols[i] if last_xx_col != last_x_col: legend = '%s %s' % (cols[i], row[last_x_col]) _series[legend].append(row[last_xx_col]) _series[legend].append(cell) for name, val in _series.iteritems(): _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif collection_facet['properties'].get('isOldPivot'): facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count'] column = 'count' agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) #_convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) dimension = len(facet_fields) elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1): # Dimension 1 with 1 count or agg dimension = 1 column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = pairwise2(legend, selected_values.get(facet['id'], []), counts) else: # Dimension 2 with analytics or 1 with N aggregates dimension = 2 agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']]) counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts) num_bucket = response['facets'][name]['numBuckets'] if 'numBuckets' in response['facets'][name] else len(response['facets'][name]) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension, 'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets 'docs': [dict(zip(cols, row)) for row in rows], 'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols] } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping if not query.get('download'): id_field = collection.get('idField', '') for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif field == '_childDocuments_': # Nested documents escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(escape(val), errors='replace') for val in value] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] doc['hueId'] = smart_unicode(doc.get(id_field, '')) highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented