Python SearchResultContent示例

编程语言: Python

命名空间/包名称: models

类/类型: SearchResultContent

hotexamples.com的示例: 2

Python SearchResultContent - 已找到2个示例。这些是从开源项目中提取的最受好评的models.SearchResultContent现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

create(1)

raw(1)

select(1)

示例#1

显示文件

文件： results_content.py 项目： andrewhead/Package-Qualifiers

def get_results_content(fetch_all, fetch_indexes, share_content):

    # We order search results by URL so that we can visit search results that share the
    # same URL one after the other.  This way we can associate the same fetched contents
    # with all search results that share a URL at the same time.
    results = (
        SearchResult
        .select()
        .order_by(SearchResult.url)
    )
    if fetch_all:
        results = results
    elif fetch_indexes:
        results = (
            results
            .join(Search)
            .where(Search.fetch_index << fetch_indexes)
        )
    else:
        results = (
            results
            .join(SearchResultContent, JOIN_LEFT_OUTER)
            .where(SearchResultContent.content >> None)
        )

    previous_url = None
    previous_content = None

    for search_result in results:

        # If the caller has specified that we should share fetched contents between
        # search results with the same URL, then check to see if the URL has stayed the same.
        if share_content and search_result.url == previous_url:
            logger.debug("Already called URL %s.  Reusing its response.", search_result.url)
            if previous_content is not None:
                SearchResultContent.create(search_result=search_result, content=previous_content)
            continue

        # Fetch content for the search result
        resp = make_request(default_requests_session.get, search_result.url)

        # Associate the scraped content to a URL
        if hasattr(resp, 'content'):
            # To avoid redundant storage, we create a record for web page
            # contents that can be shared across multiple URLs.
            # As it turns out, we want "response.text" (Unicode) and not "response.content" (bytes),
            # if we want to successfully store the responses from all URLs.
            web_page_content = WebPageContent.create(url=search_result.url, content=resp.text)
            SearchResultContent.create(search_result=search_result, content=web_page_content)
            previous_content = web_page_content
        else:
            logger.warn("Error fetching content from URL: %s", search_result.url)
            previous_content = None

        # With either a successful or failed response, save that we queried this URL
        previous_url = search_result.url

        # Even though most of the pages will be from different domains, we pause between
        # fetching the content for each result to avoid spamming any specific domain with requests.
        time.sleep(DELAY_TIME)

示例#2

显示文件

文件： 0005_move_column_searchresultcontent_content_to_webpagecontent.py 项目： andrewhead/Package-Qualifiers

def forward(migrator):

    # Add a placeholder field for storing a link to a WebPageContent object
    migrate(
        migrator.add_column(
            "searchresultcontent",
            "webpagecontent_id",
            ForeignKeyField(WebPageContent, null=True, to_field=WebPageContent.id),
        )
    )

    # Move the data previously in SearchResultContent model into WebPageContent,
    # and link the WebPageContent to the SearchResultContent.
    # Note that because the model for SearchResultContent has already been updated beyond the
    # state of the table, we have to access the 'content' and 'date' fields through the "SQL"
    # class instead of a field on the model.  This is also the reason that we mix both
    # Query object methods and raw queries below.  The models access the future field names,
    # and the raw queries access the past field names.
    content_records = (
        SearchResultContent.select(SQL("content"), SQL("date"), SearchResult.url, SearchResultContent.id)
        .join(SearchResult)
        .dicts()
    )

    for record in content_records:
        web_page_content = WebPageContent.create(content=record["content"], date=record["date"], url=record["url"])
        # Normally, it's not recommended to directly insert values into queries.  But I do
        # it here because I think Postgres and SQLite have two different interpolating strings,
        # so this is one way to write the migration to make it more portable.
        # I also think there is no risk that either of these fields that I insert will
        # be anything other than an integer.
        SearchResultContent.raw(
            "UPDATE searchresultcontent SET webpagecontent_id = "
            + str(web_page_content.id)
            + "WHERE id = "
            + str(record["id"])
        ).execute()

    # Drop unnecessary columns from SearchResultContent model
    migrate(
        migrator.drop_column("searchresultcontent", "date"),
        migrator.drop_column("searchresultcontent", "content"),
        migrator.rename_column("searchresultcontent", "webpagecontent_id", "content_id"),
        migrator.drop_not_null("searchresultcontent", "content_id"),
    )