def index_search(request): page_obj = PageIndex() data = request.DATA['data'] page_list = data['page_list'] project_pk = data['project_pk'] version_pk = data['version_pk'] project = Project.objects.get(pk=project_pk) version = Version.objects.get(pk=version_pk) resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/', params={'project': project.slug, 'compare': True}) ret_json = resp.json() project_scale = ret_json['scaled_project'][project.slug] project_obj = ProjectIndex() project_obj.index_document({ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), '_boost': project_scale, }) index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = ret_json['scaled_page'].get(page['path'], 1) page['_boost'] = page_scale + project_scale page['project'] = project.slug page['version'] = version.slug page['id'] = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append(page) page_obj.bulk_index(index_list, parent=project_pk) return Response({'indexed': True})
def index_search_request(version, page_list, commit): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % ( version.project.slug, log_msg)) project = version.project page_obj = PageIndex() project_scale = 1 tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': tags, '_boost': project_scale, }) index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = 1 page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, '_boost': page_scale + project_scale, }) page_obj.bulk_index(index_list, parent=project.slug) """
def index_search_request(version, page_list, commit): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % ( version.project.slug, log_msg)) project = version.project page_obj = PageIndex() project_scale = 1 #tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, '_boost': project_scale, }) index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = 1 page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, '_boost': page_scale + project_scale, }) page_obj.bulk_index(index_list, parent=project.slug) """
def search(request): project_slug = request.GET.get('project', None) version_slug = request.GET.get('version', 'latest') query = request.GET.get('q', None) if project_slug: project = Project.objects.get(slug=project_slug) # This is a search within a project -- do a Page search. body = { "filter": { "and": [ {"term": {"project": project.slug}}, {"term": {"version": version_slug}}, ] }, "query": { "bool": { "should": [ {"match": {"title": {"query": query, "boost": 10}}}, {"match": {"headers": {"query": query, "boost": 5}}}, {"match": {"content": {"query": query}}}, ] } }, "facets": { "path": { "terms": {"field": "path"}} }, "highlight": { "fields": { "title": {}, "headers": {}, "content": {}, } } } results = PageIndex().search(body, routing=project.pk, fields=['title', 'project', 'version', 'path']) else: body = { "query": { "bool": { "should": [ {"match": {"name": {"query": query, "boost": 10}}}, {"match": {"description": {"query": query}}}, ] }, } } results = ProjectIndex().search(body, fields=['name', 'slug', 'description', 'lang']) return Response({'results': results})
def search(request): project_slug = request.GET.get('project', None) version_slug = request.GET.get('version', 'latest') query = request.GET.get('q', None) log.debug("(API Search) %s" % query) kwargs = {} body = { "query": { "bool": { "should": [ {"match": {"title": {"query": query, "boost": 10}}}, {"match": {"headers": {"query": query, "boost": 5}}}, {"match": {"content": {"query": query}}}, ] } }, "facets": { "path": { "terms": {"field": "path"}} }, "highlight": { "fields": { "title": {}, "headers": {}, "content": {}, } }, "fields": ["title", "project", "version", "path"], "size": 50 # TODO: Support pagination. } if project_slug: # Get the project ID to add the Elasticsearch routing key. # TODO: Update index to route on slug to avoid this db hit. project = get_object_or_404(Project, slug=project_slug) body['filter'] = { "and": [ {"term": {"project": project.slug}}, {"term": {"version": version_slug}}, ] } # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project.pk results = PageIndex().search(body, **kwargs) return Response({'results': results})
def search(request): project_slug = request.GET.get('project', None) version_slug = request.GET.get('version', 'latest') query = request.GET.get('q', None) log.debug("(API Search) %s" % query) kwargs = {} body = { "query": { "bool": { "should": [ {"match": {"title": {"query": query, "boost": 10}}}, {"match": {"headers": {"query": query, "boost": 5}}}, {"match": {"content": {"query": query}}}, ] } }, "highlight": { "fields": { "title": {}, "headers": {}, "content": {}, } }, "fields": ["title", "project", "version", "path"], "size": 50 # TODO: Support pagination. } if project_slug: body['filter'] = { "and": [ {"term": {"project": project_slug}}, {"term": {"version": version_slug}}, ] } # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project_slug results = PageIndex().search(body, **kwargs) return Response({'results': results})
def elastic_project_search(request, project_slug): """ Use elastic search to search in a project. """ queryset = Project.objects.protected(request.user) project = get_object_or_404(queryset, slug=project_slug) version_slug = request.GET.get('version', 'latest') query = request.GET.get('q', None) if query: user = '' if request.user.is_authenticated(): user = request.user log.info( LOG_TEMPLATE.format( user=user, project=project or '', type='inproject', version=version_slug or '', language='', msg=query or '', )) if query: kwargs = {} body = { "query": { "bool": { "should": [ { "match": { "title": { "query": query, "boost": 10 } } }, { "match": { "headers": { "query": query, "boost": 5 } } }, { "match": { "content": { "query": query } } }, ] } }, "highlight": { "fields": { "title": {}, "headers": {}, "content": {}, } }, "fields": ["title", "project", "version", "path"], "filter": { "and": [ { "term": { "project": project_slug } }, { "term": { "version": version_slug } }, ] }, "size": 50 # TODO: Support pagination. } # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project_slug results = PageIndex().search(body, **kwargs) else: results = {} if results: # pre and post 1.0 compat for num, hit in enumerate(results['hits']['hits']): for key, val in hit['fields'].items(): if isinstance(val, list): results['hits']['hits'][num]['fields'][key] = val[0] return render_to_response( 'search/elastic_project_search.html', { 'project': project, 'query': query, 'results': results, }, context_instance=RequestContext(request), )
def index_search(request): page_obj = PageIndex() section_obj = SectionIndex() data = request.DATA["data"] page_list = data["page_list"] project_pk = data["project_pk"] version_pk = data["version_pk"] project = Project.objects.get(pk=project_pk) version = Version.objects.get(pk=version_pk) resp = requests.get( "https://api.grokthedocs.com/api/v1/index/1/heatmap/", params={"project": project.slug, "compare": True} ) ret_json = resp.json() project_scale = ret_json["scaled_project"][project.slug] project_obj = ProjectIndex() project_obj.index_document( { "id": project.pk, "name": project.name, "slug": project.slug, "description": project.description, "lang": project.language, "author": [user.username for user in project.users.all()], "url": project.get_absolute_url(), "_boost": project_scale, } ) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page["path"])) page_scale = ret_json["scaled_page"].get(page["path"], 1) page_id = hashlib.md5("%s-%s-%s" % (project.slug, version.slug, page["path"])).hexdigest() index_list.append( { "id": page_id, "project": project.slug, "version": version.slug, "path": page["path"], "title": page["title"], "headers": page["headers"], "content": page["content"], "_boost": page_scale + project_scale, } ) for section in page["sections"]: section_index_list.append( { "id": hashlib.md5( "%s-%s-%s-%s" % (project.slug, version.slug, page["path"], section["id"]) ).hexdigest(), "project": project.slug, "version": version.slug, "path": page["path"], "page_id": section["id"], "title": section["title"], "content": section["content"], "_boost": page_scale, } ) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) return Response({"indexed": True})
def index_search_request(version, page_list, commit): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % ( version.project.slug, log_msg)) project = version.project page_obj = PageIndex() project_scale = 1 #tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, '_boost': project_scale, }) index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = 1 page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, '_boost': page_scale + project_scale, }) page_obj.bulk_index(index_list, parent=project.slug) log.info("(Server Search) Deleting files not in commit: %s" % commit) # Figure this out later delete_query = { # ES .90 doesn't wrap this #"query": { "bool": { "must": [ {"term": {"project": project.slug, }}, {"term": {"version": version.slug, }}, ], "must_not": { "term": { "commit": commit } } } #} } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list, commit): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() project_scale = 1 #tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document( data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, '_boost': project_scale, }) index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = 1 page_id = hashlib.md5( '%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, '_boost': page_scale + project_scale, }) page_obj.bulk_index(index_list, parent=project.slug) log.info("(Server Search) Deleting files not in commit: %s" % commit) # Figure this out later delete_query = { # ES .90 doesn't wrap this #"query": { "bool": { "must": [ { "term": { "project": project.slug, } }, { "term": { "version": version.slug, } }, ], "must_not": { "term": { "commit": commit } } } #} } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/', params={ 'project': project.slug, 'compare': True }) ret_json = resp.json() project_scale = ret_json.get('scaled_project', {}).get(project.slug, 1) project_obj = ProjectIndex() project_obj.index_document({ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), '_boost': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = ret_json.get('scaled_page', {}).get(page['path'], 1) page_id = hashlib.md5( '%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], '_boost': page_scale + project_scale, }) for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], '_boost': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug)
def search_file(query, project=None, version='latest', taxonomy=None): kwargs = {} body = { "query": { "bool": { "should": [ { "match": { "title": { "query": query, "boost": 10 } } }, { "match": { "headers": { "query": query, "boost": 5 } } }, { "match": { "content": { "query": query } } }, ] } }, "facets": { "taxonomy": { "terms": { "field": "taxonomy" }, }, "project": { "terms": { "field": "project" }, }, "version": { "terms": { "field": "version" }, }, }, "highlight": { "fields": { "title": {}, "headers": {}, "content": {}, } }, "fields": ["title", "project", "version", "path"], "size": 50 # TODO: Support pagination. } if project or version or taxonomy: final_filter = {"and": []} if project: final_filter['and'].append({'term': {'project': project}}) # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project if version: final_filter['and'].append({'term': {'version': version}}) if taxonomy: final_filter['and'].append({'term': {'taxonomy': taxonomy}}) body['filter'] = final_filter body['facets']['project']['facet_filter'] = final_filter body['facets']['version']['facet_filter'] = final_filter body['facets']['taxonomy']['facet_filter'] = final_filter results = PageIndex().search(body, **kwargs) return results
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() #tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document( data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_id = hashlib.md5( '%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], 'weight': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) if delete: log.info("(Server Search) Deleting files not in commit: %s" % commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ { "term": { "project": project.slug, } }, { "term": { "version": version.slug, } }, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % ( version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() #tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], 'weight': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) if delete: log.info("(Server Search) Deleting files not in commit: %s" % commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ {"term": {"project": project.slug, }}, {"term": {"version": version.slug, }}, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list): log_msg = " ".join([page["path"] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() resp = requests.get( "https://api.grokthedocs.com/api/v1/index/1/heatmap/", params={"project": project.slug, "compare": True} ) ret_json = resp.json() project_scale = ret_json.get("scaled_project", {}).get(project.slug, 1) project_obj = ProjectIndex() project_obj.index_document( { "id": project.pk, "name": project.name, "slug": project.slug, "description": project.description, "lang": project.language, "author": [user.username for user in project.users.all()], "url": project.get_absolute_url(), "_boost": project_scale, } ) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page["path"])) page_scale = ret_json.get("scaled_page", {}).get(page["path"], 1) page_id = hashlib.md5("%s-%s-%s" % (project.slug, version.slug, page["path"])).hexdigest() index_list.append( { "id": page_id, "project": project.slug, "version": version.slug, "path": page["path"], "title": page["title"], "headers": page["headers"], "content": page["content"], "_boost": page_scale + project_scale, } ) for section in page["sections"]: section_index_list.append( { "id": hashlib.md5( "%s-%s-%s-%s" % (project.slug, version.slug, page["path"], section["id"]) ).hexdigest(), "project": project.slug, "version": version.slug, "path": page["path"], "page_id": section["id"], "title": section["title"], "content": section["content"], "_boost": page_scale, } ) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug)