def index_search(request): page_obj = PageIndex() section_obj = SectionIndex() data = request.DATA['data'] page_list = data['page_list'] project_pk = data['project_pk'] version_pk = data['version_pk'] project = Project.objects.get(pk=project_pk) version = Version.objects.get(pk=version_pk) resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/', params={'project': project.slug, 'compare': True}, timeout=5) ret_json = resp.json() project_scale = ret_json['scaled_project'][project.slug] project_obj = ProjectIndex() project_obj.index_document({ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), '_boost': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = ret_json['scaled_page'].get(page['path'], 1) page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], '_boost': page_scale + project_scale, }) for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], '_boost': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) return Response({'indexed': True})
def index_search(request): page_obj = PageIndex() section_obj = SectionIndex() data = request.DATA['data'] page_list = data['page_list'] project_pk = data['project_pk'] version_pk = data['version_pk'] project = Project.objects.get(pk=project_pk) version = Version.objects.get(pk=version_pk) resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/', params={'project': project.slug, 'compare': True}) ret_json = resp.json() project_scale = ret_json['scaled_project'][project.slug] project_obj = ProjectIndex() project_obj.index_document({ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), '_boost': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = ret_json['scaled_page'].get(page['path'], 1) page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], '_boost': page_scale + project_scale, }) for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], '_boost': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) return Response({'indexed': True})
def index_search(request): page_obj = PageIndex() section_obj = SectionIndex() data = request.DATA["data"] page_list = data["page_list"] project_pk = data["project_pk"] version_pk = data["version_pk"] project = Project.objects.get(pk=project_pk) version = Version.objects.get(pk=version_pk) resp = requests.get( "https://api.grokthedocs.com/api/v1/index/1/heatmap/", params={"project": project.slug, "compare": True} ) ret_json = resp.json() project_scale = ret_json["scaled_project"][project.slug] project_obj = ProjectIndex() project_obj.index_document( { "id": project.pk, "name": project.name, "slug": project.slug, "description": project.description, "lang": project.language, "author": [user.username for user in project.users.all()], "url": project.get_absolute_url(), "_boost": project_scale, } ) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page["path"])) page_scale = ret_json["scaled_page"].get(page["path"], 1) page_id = hashlib.md5("%s-%s-%s" % (project.slug, version.slug, page["path"])).hexdigest() index_list.append( { "id": page_id, "project": project.slug, "version": version.slug, "path": page["path"], "title": page["title"], "headers": page["headers"], "content": page["content"], "_boost": page_scale + project_scale, } ) for section in page["sections"]: section_index_list.append( { "id": hashlib.md5( "%s-%s-%s-%s" % (project.slug, version.slug, page["path"], section["id"]) ).hexdigest(), "project": project.slug, "version": version.slug, "path": page["path"], "page_id": section["id"], "title": section["title"], "content": section["content"], "_boost": page_scale, } ) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) return Response({"indexed": True})
def section_search(request): """ Search for a Section of content on Read the Docs. A Section is a subheading on a specific page. Query Thoughts -------------- If you want to search across all documents, just query with a ``q`` GET arg. If you want to filter by a specific project, include a ``project`` GET arg. Facets ------ When you search, you will have a ``project`` facet, which includes the number of matching sections per project. When you search inside a project, the ``path`` facet will show the number of matching sections per page. Possible GET args ----------------- * q - The query string **Required** * project - A project slug *Optional* * version - A version slug *Optional* * path - A file path slug *Optional* Example ------- GET /api/v2/search/section/?q=virtualenv&project=django Current Query ------------- """ query = request.GET.get('q', None) if not query: return Response( {'error': 'Search term required. Use the "q" GET arg to search. '}, status=status.HTTP_400_BAD_REQUEST) project_slug = request.GET.get('project', None) version_slug = request.GET.get('version', 'latest') path_slug = request.GET.get('path', None) log.debug("(API Section Search) [%s:%s] %s" % (project_slug, version_slug, query)) kwargs = {} body = { "query": { "bool": { "should": [ { "match": { "title": { "query": query, "boost": 10 } } }, { "match": { "content": { "query": query } } }, ] } }, "facets": { "project": { "terms": { "field": "project" }, "facet_filter": { "term": { "version": version_slug }, } }, }, "highlight": { "fields": { "title": {}, "content": {}, } }, "fields": ["title", "project", "version", "path", "page_id", "content"], "size": 10 # TODO: Support pagination. } if project_slug: body['filter'] = { "and": [ { "term": { "project": project_slug } }, { "term": { "version": version_slug } }, ] } body["facets"]['path'] = { "terms": { "field": "path" }, "facet_filter": { "term": { "project": project_slug }, } }, # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project_slug if path_slug: body['filter'] = { "and": [ { "term": { "path": path_slug } }, ] } if path_slug and not project_slug: # Show facets when we only have a path body["facets"]['path'] = {"terms": {"field": "path"}} results = SectionIndex().search(body, **kwargs) return Response({'results': results})
def index_search_request(version, page_list): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/', params={ 'project': project.slug, 'compare': True }) ret_json = resp.json() project_scale = ret_json.get('scaled_project', {}).get(project.slug, 1) project_obj = ProjectIndex() project_obj.index_document({ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), '_boost': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_scale = ret_json.get('scaled_page', {}).get(page['path'], 1) page_id = hashlib.md5( '%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], '_boost': page_scale + project_scale, }) for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], '_boost': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug)
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() #tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document( data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_id = hashlib.md5( '%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], 'weight': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) if delete: log.info("(Server Search) Deleting files not in commit: %s" % commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ { "term": { "project": project.slug, } }, { "term": { "version": version.slug, } }, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True): log_msg = ' '.join([page['path'] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % ( version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() #tags = [tag.name for tag in project.tags.all()] project_obj = ProjectIndex() project_obj.index_document(data={ 'id': project.pk, 'name': project.name, 'slug': project.slug, 'description': project.description, 'lang': project.language, 'author': [user.username for user in project.users.all()], 'url': project.get_absolute_url(), 'tags': None, 'weight': project_scale, }) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page['path'])) page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest() index_list.append({ 'id': page_id, 'project': project.slug, 'version': version.slug, 'path': page['path'], 'title': page['title'], 'headers': page['headers'], 'content': page['content'], 'taxonomy': None, 'commit': commit, 'weight': page_scale + project_scale, }) if section: for section in page['sections']: section_index_list.append({ 'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(), 'project': project.slug, 'version': version.slug, 'path': page['path'], 'page_id': section['id'], 'title': section['title'], 'content': section['content'], 'weight': page_scale, }) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug) if delete: log.info("(Server Search) Deleting files not in commit: %s" % commit) # TODO: AK Make sure this works delete_query = { "query": { "bool": { "must": [ {"term": {"project": project.slug, }}, {"term": {"version": version.slug, }}, ], "must_not": { "term": { "commit": commit } } } } } page_obj.delete_document(body=delete_query)
def index_search_request(version, page_list): log_msg = " ".join([page["path"] for page in page_list]) log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg)) project = version.project page_obj = PageIndex() section_obj = SectionIndex() resp = requests.get( "https://api.grokthedocs.com/api/v1/index/1/heatmap/", params={"project": project.slug, "compare": True} ) ret_json = resp.json() project_scale = ret_json.get("scaled_project", {}).get(project.slug, 1) project_obj = ProjectIndex() project_obj.index_document( { "id": project.pk, "name": project.name, "slug": project.slug, "description": project.description, "lang": project.language, "author": [user.username for user in project.users.all()], "url": project.get_absolute_url(), "_boost": project_scale, } ) index_list = [] section_index_list = [] for page in page_list: log.debug("(API Index) %s:%s" % (project.slug, page["path"])) page_scale = ret_json.get("scaled_page", {}).get(page["path"], 1) page_id = hashlib.md5("%s-%s-%s" % (project.slug, version.slug, page["path"])).hexdigest() index_list.append( { "id": page_id, "project": project.slug, "version": version.slug, "path": page["path"], "title": page["title"], "headers": page["headers"], "content": page["content"], "_boost": page_scale + project_scale, } ) for section in page["sections"]: section_index_list.append( { "id": hashlib.md5( "%s-%s-%s-%s" % (project.slug, version.slug, page["path"], section["id"]) ).hexdigest(), "project": project.slug, "version": version.slug, "path": page["path"], "page_id": section["id"], "title": section["title"], "content": section["content"], "_boost": page_scale, } ) section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug) page_obj.bulk_index(index_list, parent=project.slug)