def process_api_page(page_path):
    return render_template('api.html',
                           page=get_api_page(build_mode, page_path))
示例#2
0
def build_search_indices(site_structure, pages):
    page_views_statistic = get_page_views_statistic()
    index_objects = []

    print("Start building index")
    for url, endpoint in site_structure:
        if (not url.endswith('.html')) and (not url.endswith('/')):
            continue
        print("Processing " + url)
        if url in page_views_statistic:
            page_views = page_views_statistic[url]
        else:
            page_views = 0
        page_path = get_page_path_from_url(url)
        if endpoint == 'page':
            page_part = pages.get(page_path)
            page_type = "Page"
            if page_path.startswith('community'):
                page_type = 'Community'
            elif page_path.startswith('docs/reference'):
                page_type = 'Reference'
            elif page_path.startswith('docs/tutorials'):
                page_type = 'Tutorial'
            index_objects += get_markdown_page_index_objects(
                page_part.parsed_html,
                url,
                page_path,
                page_part.meta['title'],
                page_type,
                page_views
            )
        elif endpoint == "api_page":
            page_info = get_api_page(True, page_path[4:])
            for table in page_info['content']('table'):
                table.extract()
            for overload_group in page_info['content'].findAll("div", {"class": "signature"}):
                overload_group.extract()
            breadcrumbs = page_info['content'].find("div", {"class": "api-docs-breadcrumbs"})
            full_name = page_info['title']
            if breadcrumbs is not None:
                full_name_parts = list(map(lambda link: link.text, breadcrumbs.findAll("a")))
                if "kotlin-stdlib" in full_name_parts:
                    full_name_parts.remove("kotlin-stdlib")
                else:
                    full_name_parts.remove("kotlin.test")
                full_name = " › ".join(full_name_parts).replace('<', '&lt;').replace('>', '&gt;')
                breadcrumbs.extract()
            type = "Standard Library" if "jvm/stdlib" in url else "Kotlin Test"
            index_objects += get_page_index_objects(page_info['content'], url, page_path, full_name, type, page_views)
        elif endpoint in ["coroutines_redirect", "coroutines_tutor_redirect", "events_redirect", "community_redirect",
                          "compatibility_redirect", "collections_redirect", "community_user_groups_redirect"]:
            continue
        else:
            client = app.test_client()
            content = client.get(url, follow_redirects=True)
            if content.status_code != 200:
                raise Exception('Bad response during indexing')
            parsed = BeautifulSoup(content.data, "html.parser")
            title = parsed.find("title").text

            content = parsed.find("div", {"class": "page-content"})
            if content is None:
                content = parsed.find("article", {"class": "page-content"})

            if content is None:
                index_objects.append({
                    'objectID': page_path,
                    'type': 'Page',
                    'headings': title,
                    'url': url,
                    'content': '',
                    'pageViews': page_views
                })
            else:
                index_objects += get_page_index_objects(
                    content,
                    url,
                    page_path,
                    title,
                    "Page",
                    page_views
                )
    print("Index objects successfully built")

    index = get_index()
    print("Submitting index objects to " + index.index_name + " index")
    index.add_objects(index_objects)
示例#3
0
def build_search_indices(pages, version):
    page_views_statistic = get_page_views_statistic()
    index_objects = []
    wh_index_objects = []

    print("Start building index")
    for url, endpoint in pages:
        if url.endswith('/'): url += 'index.html'
        if not url.endswith('.html'): continue

        title = ''
        content = ''
        page_type = 'Page'
        page_path = get_page_path_from_url(url)
        page_views = 0

        if url in page_views_statistic:
            page_views = page_views_statistic[url]

        if page_path.startswith('community'):
            page_type = 'Community'
        elif page_path.startswith('docs/reference'):
            page_type = 'Reference'
        elif page_path.startswith('docs/tutorials'):
            page_type = 'Tutorial'

        html_content = get_page_content(url)
        parsed = BeautifulSoup(html_content, "html.parser")

        if parsed.find("meta", {"http-equiv": "refresh"}):
            continue

        if page_path.startswith("api/latest/"):
            page_info = get_api_page(True, page_path[4:], dist_path)

            for table in page_info['content']('table'):
                table.extract()

            for overload_group in page_info['content'].findAll(
                    "div", {"class": "signature"}):
                overload_group.extract()

            breadcrumbs = page_info['content'].find(
                "div", {"class": "api-docs-breadcrumbs"})

            title = page_info['title']

            if breadcrumbs is not None:
                full_name_parts = list(
                    map(lambda link: link.text, breadcrumbs.findAll("a")))

                if "kotlin-stdlib" in full_name_parts:
                    full_name_parts.remove("kotlin-stdlib")
                else:
                    full_name_parts.remove("kotlin.test")

                title = " › ".join(full_name_parts).replace('<',
                                                            '&lt;').replace(
                                                                '>', '&gt;')
                breadcrumbs.extract()

            page_type = "Standard Library" if "jvm/stdlib" in url else "Kotlin Test"
            content = page_info['content'].find('article', {"role": "main"})
        else:
            body_title = parsed.select_one("body[data-search-title]")

            if body_title:
                title = body_title.attrs["data-search-title"]

            if not title:
                title_node = parsed.find("title")
                if title_node:
                    title = title_node.text

            # Our default pages
            content = parsed.find("div", {"class": "page-content"})

            # Our modern pages
            if content is None:
                content = parsed.find("article", {"class": "page-content"})

            # WebHelp pages
            if content is None:
                content = parsed.find("article", {"class": "article"})

        if title and content:
            page_indexer = get_page_index_objects

            if parsed.select_one("body[data-article-props]"):
                page_type = "Documentation"
                page_indexer = get_webhelp_page_index_objects
            elif page_type == "Page":
                page_indexer = get_markdown_page_index_objects

            print("processing " + url + ' - ' + page_type)

            page_indices = page_indexer(content, url, page_path, title,
                                        page_type, page_views)

            index_objects += page_indices

            def wh(*args):
                return to_wh_index(version, *args)

            wh_index_objects += list(map(wh, page_indices.copy()))
        else:
            print('skip: ' + url + ' unknown page content in with title: ' +
                  title)

    wh_index = get_wh_index()

    if wh_index:
        print("Submitting WH index objects to " + wh_index.index_name +
              " index")
        wh_index.add_objects(wh_index_objects)

    print("Index objects successfully built")

    index = get_index()
    print("Submitting index objects to " + index.index_name + " index")
    index.add_objects(index_objects)
def process_api_page(page_path):
    return render_template(
        'api.html',
        page=get_api_page(page_path)
    )
示例#5
0
def build_search_indices(site_structure, pages):
    page_views_statistic = get_page_views_statistic()
    index_objects = []

    print("Start building index")
    for url, endpoint in site_structure:
        if (not url.endswith('.html')) and (not url.endswith('/')):
            continue
        print("Processing " + url)
        if url in page_views_statistic:
            page_views = page_views_statistic[url]
        else:
            page_views = 0
        page_path = get_page_path_from_url(url)
        if endpoint == 'page':
            page_part = pages.get(page_path)
            page_type = "Page"
            if page_path.startswith('community'):
                page_type = 'Community'
            elif page_path.startswith('docs/reference'):
                page_type = 'Reference'
            elif page_path.startswith('docs/tutorials'):
                page_type = 'Tutorial'
            index_objects += get_markdown_page_index_objects(
                page_part.parsed_html,
                url,
                page_path,
                page_part.meta['title'],
                page_type,
                page_views
            )
        elif endpoint == "api_page":
            page_info = get_api_page(page_path[4:])
            for table in page_info['content']('table'):
                table.extract()
            for overload_group in page_info['content'].findAll("div", {"class": "signature"}):
                overload_group.extract()
            breadcrumbs = page_info['content'].find("div", {"class": "api-docs-breadcrumbs"})
            full_name = page_info['title']
            if breadcrumbs is not None:
                full_name_parts = list(map(lambda link: link.text, breadcrumbs.findAll("a")))
                if "kotlin-stdlib" in full_name_parts:
                    full_name_parts.remove("kotlin-stdlib")
                else:
                    full_name_parts.remove("kotlin-test")
                full_name = " › ".join(full_name_parts).replace('<', '&lt;').replace('>', '&gt;')
                breadcrumbs.extract()
            type = "Standard Library" if "jvm/stdlib" in url else "Kotlin Test"
            index_objects += get_page_index_objects(page_info['content'], url, page_path, full_name, type, page_views)
        elif endpoint in ["coroutines_alias", "events_redirect", "community_redirect"]:
            continue
        else:
            client = app.test_client()
            content = client.get(url, follow_redirects=True)
            if content.status_code != 200:
                raise Exception('Bad response during indexing')
            parsed = BeautifulSoup(content.data, "html.parser")
            title = parsed.find("title").text

            content = parsed.find("div", {"class": "page-content"})
            if content is None:
                content = parsed.find("article", {"class": "page-content"})

            if content is None:
                index_objects.append({
                    'objectID': page_path,
                    'type': 'Page',
                    'headings': title,
                    'url': url,
                    'content': '',
                    'pageViews': page_views
                })
            else:
                index_objects += get_page_index_objects(
                    content,
                    url,
                    page_path,
                    title,
                    "Page",
                    page_views
                )
    print("Index objects successfully built")

    index = get_index()
    print("Submitting index objects to " + index.index_name + " index")
    index.add_objects(index_objects)