Пример #1
0
def peek_query(args):
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.id', entity))

    q = filter_query(q, filters, [])
    q = add_filter(q, {
        'not': {
            'terms': {
                'collection_id': authz.collections(authz.READ)
            }
        }
    })
    q = {
        'query': q,
        'size': 0,
        'aggregations': {
            'collections': {
                'terms': {'field': 'collection_id', 'size': 30}
            }
        },
        '_source': False
    }
    # import json
    # print json.dumps(q, indent=2)
    result = get_es().search(index=get_es_index(), body=q,
                             doc_type=TYPE_DOCUMENT)

    aggs = result.get('aggregations', {}).get('collections', {})
    buckets = aggs.get('buckets', [])
    q = Collection.all_by_ids([b['key'] for b in buckets])
    q = q.filter(Collection.creator_id != None)  # noqa
    objs = {o.id: o for o in q.all()}
    roles = {}
    for bucket in buckets:
        collection = objs.get(bucket.get('key'))
        if collection is None or collection.private:
            continue
        if collection.creator_id in roles:
            roles[collection.creator_id]['total'] += bucket.get('doc_count')
        else:
            roles[collection.creator_id] = {
                'name': collection.creator.name,
                'email': collection.creator.email,
                'total': bucket.get('doc_count')
            }

    roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True)
    roles = [format_total(r) for r in roles]
    total = result.get('hits', {}).get('total')
    return format_total({
        'roles': roles,
        'active': total > 0,
        'total': total
    })
Пример #2
0
def peek_query(args):
    if not isinstance(args, MultiDict):
        args = MultiDict(args)
    text = args.get('q', '').strip()
    q = text_query(text)

    filters = parse_filters(args)
    for entity in args.getlist('entity'):
        filters.append(('entities.id', entity))

    q = filter_query(q, filters, [])
    q = add_filter(
        q,
        {'not': {
            'terms': {
                'collection_id': authz.collections(authz.READ)
            }
        }})
    q = {
        'query': q,
        'size': 0,
        'aggregations': {
            'collections': {
                'terms': {
                    'field': 'collection_id',
                    'size': 30
                }
            }
        },
        '_source': False
    }
    # import json
    # print json.dumps(q, indent=2)
    result = get_es().search(index=get_es_index(),
                             body=q,
                             doc_type=TYPE_DOCUMENT)

    aggs = result.get('aggregations', {}).get('collections', {})
    buckets = aggs.get('buckets', [])
    q = Collection.all_by_ids([b['key'] for b in buckets])
    q = q.filter(Collection.creator_id != None)  # noqa
    objs = {o.id: o for o in q.all()}
    roles = {}
    for bucket in buckets:
        collection = objs.get(bucket.get('key'))
        if collection is None or collection.private:
            continue
        if collection.creator_id in roles:
            roles[collection.creator_id]['total'] += bucket.get('doc_count')
        else:
            roles[collection.creator_id] = {
                'name': collection.creator.name,
                'email': collection.creator.email,
                'total': bucket.get('doc_count')
            }

    roles = sorted(roles.values(), key=lambda r: r['total'], reverse=True)
    roles = [format_total(r) for r in roles]
    total = result.get('hits', {}).get('total')
    return format_total({'roles': roles, 'active': total > 0, 'total': total})
Пример #3
0
def analyze_terms(terms, seen=None):
    if seen is None:
        seen = set()
    for term in terms:
        for doc_id in query_doc_ids(text_query(term)):
            if doc_id not in seen:
                analyze_document.delay(doc_id)
            seen.add(doc_id)