Пример #1
0
def index_search(request):
    page_obj = PageIndex()
    section_obj = SectionIndex()
    data = request.DATA['data']
    page_list = data['page_list']
    project_pk = data['project_pk']
    version_pk = data['version_pk']
    project = Project.objects.get(pk=project_pk)
    version = Version.objects.get(pk=version_pk)
    resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/',
        params={'project': project.slug, 'compare': True}, timeout=5)
    ret_json = resp.json()
    project_scale = ret_json['scaled_project'][project.slug]

    project_obj = ProjectIndex()
    project_obj.index_document({
        'id': project.pk,
        'name': project.name,
        'slug': project.slug,
        'description': project.description,
        'lang': project.language,
        'author': [user.username for user in project.users.all()],
        'url': project.get_absolute_url(),
        '_boost': project_scale,
    })

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page['path']))
        page_scale = ret_json['scaled_page'].get(page['path'], 1)
        page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            '_boost': page_scale + project_scale,
            })
        for section in page['sections']:
            section_index_list.append({
                'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(),
                'project': project.slug,
                'version': version.slug,
                'path': page['path'],
                'page_id': section['id'],
                'title': section['title'],
                'content': section['content'],
                '_boost': page_scale,
            })
        section_obj.bulk_index(section_index_list, parent=page_id,
                               routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)
    return Response({'indexed': True})
Пример #2
0
def index_search(request):
    page_obj = PageIndex()
    section_obj = SectionIndex()
    data = request.DATA['data']
    page_list = data['page_list']
    project_pk = data['project_pk']
    version_pk = data['version_pk']
    project = Project.objects.get(pk=project_pk)
    version = Version.objects.get(pk=version_pk)
    resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/', params={'project': project.slug, 'compare': True})
    ret_json = resp.json()
    project_scale = ret_json['scaled_project'][project.slug]

    project_obj = ProjectIndex()
    project_obj.index_document({
        'id': project.pk,
        'name': project.name,
        'slug': project.slug,
        'description': project.description,
        'lang': project.language,
        'author': [user.username for user in project.users.all()],
        'url': project.get_absolute_url(),
        '_boost': project_scale,
    })

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page['path']))
        page_scale = ret_json['scaled_page'].get(page['path'], 1)
        page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            '_boost': page_scale + project_scale,
            })
        for section in page['sections']:
            section_index_list.append({
                'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(),
                'project': project.slug,
                'version': version.slug,
                'path': page['path'],
                'page_id': section['id'],
                'title': section['title'],
                'content': section['content'],
                '_boost': page_scale,
            })
        section_obj.bulk_index(section_index_list, parent=page_id,
                               routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)
    return Response({'indexed': True})
Пример #3
0
def index_search(request):
    page_obj = PageIndex()
    section_obj = SectionIndex()
    data = request.DATA["data"]
    page_list = data["page_list"]
    project_pk = data["project_pk"]
    version_pk = data["version_pk"]
    project = Project.objects.get(pk=project_pk)
    version = Version.objects.get(pk=version_pk)
    resp = requests.get(
        "https://api.grokthedocs.com/api/v1/index/1/heatmap/", params={"project": project.slug, "compare": True}
    )
    ret_json = resp.json()
    project_scale = ret_json["scaled_project"][project.slug]

    project_obj = ProjectIndex()
    project_obj.index_document(
        {
            "id": project.pk,
            "name": project.name,
            "slug": project.slug,
            "description": project.description,
            "lang": project.language,
            "author": [user.username for user in project.users.all()],
            "url": project.get_absolute_url(),
            "_boost": project_scale,
        }
    )

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page["path"]))
        page_scale = ret_json["scaled_page"].get(page["path"], 1)
        page_id = hashlib.md5("%s-%s-%s" % (project.slug, version.slug, page["path"])).hexdigest()
        index_list.append(
            {
                "id": page_id,
                "project": project.slug,
                "version": version.slug,
                "path": page["path"],
                "title": page["title"],
                "headers": page["headers"],
                "content": page["content"],
                "_boost": page_scale + project_scale,
            }
        )
        for section in page["sections"]:
            section_index_list.append(
                {
                    "id": hashlib.md5(
                        "%s-%s-%s-%s" % (project.slug, version.slug, page["path"], section["id"])
                    ).hexdigest(),
                    "project": project.slug,
                    "version": version.slug,
                    "path": page["path"],
                    "page_id": section["id"],
                    "title": section["title"],
                    "content": section["content"],
                    "_boost": page_scale,
                }
            )
        section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)
    return Response({"indexed": True})
Пример #4
0
def section_search(request):
    """
    Search for a Section of content on Read the Docs.
    A Section is a subheading on a specific page.

    Query Thoughts
    --------------

    If you want to search across all documents, just query with a ``q`` GET arg.
    If you want to filter by a specific project, include a ``project`` GET arg.

    Facets
    ------

    When you search, you will have a ``project`` facet, which includes the number of matching sections per project.
    When you search inside a project, the ``path`` facet will show the number of matching sections per page.

    Possible GET args
    -----------------

    * q - The query string **Required**
    * project - A project slug *Optional*
    * version - A version slug *Optional*
    * path - A file path slug  *Optional*

    Example
    -------

        GET /api/v2/search/section/?q=virtualenv&project=django

    Current Query
    -------------

    """
    query = request.GET.get('q', None)
    if not query:
        return Response(
            {'error': 'Search term required. Use the "q" GET arg to search. '},
            status=status.HTTP_400_BAD_REQUEST)

    project_slug = request.GET.get('project', None)
    version_slug = request.GET.get('version', 'latest')
    path_slug = request.GET.get('path', None)

    log.debug("(API Section Search) [%s:%s] %s" %
              (project_slug, version_slug, query))

    kwargs = {}
    body = {
        "query": {
            "bool": {
                "should": [
                    {
                        "match": {
                            "title": {
                                "query": query,
                                "boost": 10
                            }
                        }
                    },
                    {
                        "match": {
                            "content": {
                                "query": query
                            }
                        }
                    },
                ]
            }
        },
        "facets": {
            "project": {
                "terms": {
                    "field": "project"
                },
                "facet_filter": {
                    "term": {
                        "version": version_slug
                    },
                }
            },
        },
        "highlight": {
            "fields": {
                "title": {},
                "content": {},
            }
        },
        "fields":
        ["title", "project", "version", "path", "page_id", "content"],
        "size": 10  # TODO: Support pagination.
    }

    if project_slug:
        body['filter'] = {
            "and": [
                {
                    "term": {
                        "project": project_slug
                    }
                },
                {
                    "term": {
                        "version": version_slug
                    }
                },
            ]
        }
        body["facets"]['path'] = {
            "terms": {
                "field": "path"
            },
            "facet_filter": {
                "term": {
                    "project": project_slug
                },
            }
        },
        # Add routing to optimize search by hitting the right shard.
        kwargs['routing'] = project_slug

    if path_slug:
        body['filter'] = {
            "and": [
                {
                    "term": {
                        "path": path_slug
                    }
                },
            ]
        }

    if path_slug and not project_slug:
        # Show facets when we only have a path
        body["facets"]['path'] = {"terms": {"field": "path"}}

    results = SectionIndex().search(body, **kwargs)

    return Response({'results': results})
Пример #5
0
def index_search_request(version, page_list):
    log_msg = ' '.join([page['path'] for page in page_list])
    log.info("(Server Search) Indexing Pages: %s [%s]" %
             (version.project.slug, log_msg))
    project = version.project
    page_obj = PageIndex()
    section_obj = SectionIndex()
    resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/',
                        params={
                            'project': project.slug,
                            'compare': True
                        })
    ret_json = resp.json()
    project_scale = ret_json.get('scaled_project', {}).get(project.slug, 1)

    project_obj = ProjectIndex()
    project_obj.index_document({
        'id':
        project.pk,
        'name':
        project.name,
        'slug':
        project.slug,
        'description':
        project.description,
        'lang':
        project.language,
        'author': [user.username for user in project.users.all()],
        'url':
        project.get_absolute_url(),
        '_boost':
        project_scale,
    })

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page['path']))
        page_scale = ret_json.get('scaled_page', {}).get(page['path'], 1)
        page_id = hashlib.md5(
            '%s-%s-%s' %
            (project.slug, version.slug, page['path'])).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            '_boost': page_scale + project_scale,
        })
        for section in page['sections']:
            section_index_list.append({
                'id':
                hashlib.md5('%s-%s-%s-%s' %
                            (project.slug, version.slug, page['path'],
                             section['id'])).hexdigest(),
                'project':
                project.slug,
                'version':
                version.slug,
                'path':
                page['path'],
                'page_id':
                section['id'],
                'title':
                section['title'],
                'content':
                section['content'],
                '_boost':
                page_scale,
            })
        section_obj.bulk_index(section_index_list,
                               parent=page_id,
                               routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)
Пример #6
0
def index_search_request(version,
                         page_list,
                         commit,
                         project_scale,
                         page_scale,
                         section=True,
                         delete=True):
    log_msg = ' '.join([page['path'] for page in page_list])
    log.info("(Server Search) Indexing Pages: %s [%s]" %
             (version.project.slug, log_msg))
    project = version.project
    page_obj = PageIndex()
    section_obj = SectionIndex()

    #tags = [tag.name for tag in project.tags.all()]

    project_obj = ProjectIndex()
    project_obj.index_document(
        data={
            'id': project.pk,
            'name': project.name,
            'slug': project.slug,
            'description': project.description,
            'lang': project.language,
            'author': [user.username for user in project.users.all()],
            'url': project.get_absolute_url(),
            'tags': None,
            'weight': project_scale,
        })

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page['path']))
        page_id = hashlib.md5(
            '%s-%s-%s' %
            (project.slug, version.slug, page['path'])).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            'taxonomy': None,
            'commit': commit,
            'weight': page_scale + project_scale,
        })
        if section:
            for section in page['sections']:
                section_index_list.append({
                    'id':
                    hashlib.md5('%s-%s-%s-%s' %
                                (project.slug, version.slug, page['path'],
                                 section['id'])).hexdigest(),
                    'project':
                    project.slug,
                    'version':
                    version.slug,
                    'path':
                    page['path'],
                    'page_id':
                    section['id'],
                    'title':
                    section['title'],
                    'content':
                    section['content'],
                    'weight':
                    page_scale,
                })
            section_obj.bulk_index(section_index_list,
                                   parent=page_id,
                                   routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)

    if delete:
        log.info("(Server Search) Deleting files not in commit: %s" % commit)
        # TODO: AK Make sure this works
        delete_query = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "term": {
                                "project": project.slug,
                            }
                        },
                        {
                            "term": {
                                "version": version.slug,
                            }
                        },
                    ],
                    "must_not": {
                        "term": {
                            "commit": commit
                        }
                    }
                }
            }
        }
        page_obj.delete_document(body=delete_query)
Пример #7
0
def index_search_request(version, page_list, commit, project_scale, page_scale, section=True, delete=True):
    log_msg = ' '.join([page['path'] for page in page_list])
    log.info("(Server Search) Indexing Pages: %s [%s]" % (
        version.project.slug, log_msg))
    project = version.project
    page_obj = PageIndex()
    section_obj = SectionIndex()

    #tags = [tag.name for tag in project.tags.all()]

    project_obj = ProjectIndex()
    project_obj.index_document(data={
        'id': project.pk,
        'name': project.name,
        'slug': project.slug,
        'description': project.description,
        'lang': project.language,
        'author': [user.username for user in project.users.all()],
        'url': project.get_absolute_url(),
        'tags': None,
        'weight': project_scale,
    })

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page['path']))
        page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest()
        index_list.append({
            'id': page_id,
            'project': project.slug,
            'version': version.slug,
            'path': page['path'],
            'title': page['title'],
            'headers': page['headers'],
            'content': page['content'],
            'taxonomy': None,
            'commit': commit,
            'weight': page_scale + project_scale,
        })
        if section:
            for section in page['sections']:
                section_index_list.append({
                    'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(),
                    'project': project.slug,
                    'version': version.slug,
                    'path': page['path'],
                    'page_id': section['id'],
                    'title': section['title'],
                    'content': section['content'],
                    'weight': page_scale,
                })
            section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)

    if delete:
        log.info("(Server Search) Deleting files not in commit: %s" % commit)
        # TODO: AK Make sure this works
        delete_query = {
            "query": {
                "bool": {
                    "must": [
                        {"term": {"project": project.slug, }},
                        {"term": {"version": version.slug, }},
                    ],
                    "must_not": {
                        "term": {
                            "commit": commit
                        }
                    }
                }
            }
        }
        page_obj.delete_document(body=delete_query)
Пример #8
0
def index_search_request(version, page_list):
    log_msg = " ".join([page["path"] for page in page_list])
    log.info("(Server Search) Indexing Pages: %s [%s]" % (version.project.slug, log_msg))
    project = version.project
    page_obj = PageIndex()
    section_obj = SectionIndex()
    resp = requests.get(
        "https://api.grokthedocs.com/api/v1/index/1/heatmap/", params={"project": project.slug, "compare": True}
    )
    ret_json = resp.json()
    project_scale = ret_json.get("scaled_project", {}).get(project.slug, 1)

    project_obj = ProjectIndex()
    project_obj.index_document(
        {
            "id": project.pk,
            "name": project.name,
            "slug": project.slug,
            "description": project.description,
            "lang": project.language,
            "author": [user.username for user in project.users.all()],
            "url": project.get_absolute_url(),
            "_boost": project_scale,
        }
    )

    index_list = []
    section_index_list = []
    for page in page_list:
        log.debug("(API Index) %s:%s" % (project.slug, page["path"]))
        page_scale = ret_json.get("scaled_page", {}).get(page["path"], 1)
        page_id = hashlib.md5("%s-%s-%s" % (project.slug, version.slug, page["path"])).hexdigest()
        index_list.append(
            {
                "id": page_id,
                "project": project.slug,
                "version": version.slug,
                "path": page["path"],
                "title": page["title"],
                "headers": page["headers"],
                "content": page["content"],
                "_boost": page_scale + project_scale,
            }
        )
        for section in page["sections"]:
            section_index_list.append(
                {
                    "id": hashlib.md5(
                        "%s-%s-%s-%s" % (project.slug, version.slug, page["path"], section["id"])
                    ).hexdigest(),
                    "project": project.slug,
                    "version": version.slug,
                    "path": page["path"],
                    "page_id": section["id"],
                    "title": section["title"],
                    "content": section["content"],
                    "_boost": page_scale,
                }
            )
        section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug)

    page_obj.bulk_index(index_list, parent=project.slug)