Python Document_Text.select示例

编程语言: Python

命名空间/包名称: osp.corpus.models

类/类型: Document_Text

方法/功能: select

hotexamples.com的示例: 7

Python Document_Text.select - 已找到7个示例。这些是从开源项目中提取的最受好评的osp.corpus.models.Document_Text.select现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

select(5)

get(2)

create(1)

term_counts(1)

示例#1

显示文件

def syllabus_refinement(in_file, out_file, r, threshold):
    """
    Select the N documents around a given threshold in the syllabus /
    not-syllabus classifier predictions.
    """

    cols = ['path', 'score']
    reader = csv.DictReader(in_file, cols)

    # Gather ordered (path, score) tuples.
    scores = [(r['path'], float(r['score'])) for r in reader]
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    # Get the index of the document at the threshold.
    center = min(range(len(scores)),
                 key=lambda x: abs(scores[x][1] - threshold))

    # CSV writer.
    cols = ['id', 'title', 'text']
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    for path, score in scores[center - r:center + r]:

        row = (Document_Text.select(Document_Text.text,
                                    Document.path).join(Document).where(
                                        Document.path == path).naive().first())

        writer.writerow({'id': row.path, 'title': row.path, 'text': row.text})

示例#2

显示文件

文件： test_ext_text.py 项目： project-renard-survey/open-syllabus-project

def test_text_extraction_fails(mock_osp):
    """
    If no text can be extracted, don't write the row.
    """

    # Add an empty file.
    path = mock_osp.add_file(content='')
    document = Document.create(path=path)

    ext_text(document.id)

    # Shouldn't write a row.
    assert Document_Text.select().count() == 0

示例#3

显示文件

文件： test_ext_text.py 项目： MichaelEdage/open-syllabus-project

def test_text_extraction_fails(mock_osp):

    """
    If no text can be extracted, don't write the row.
    """

    # Add an empty file.
    path = mock_osp.add_file(content='')
    document = Document.create(path=path)

    ext_text(document.id)

    # Shouldn't write a row.
    assert Document_Text.select().count() == 0

示例#4

显示文件

文件： document_index.py 项目： ivanistheone/open-syllabus-project

    def es_stream_docs(cls):
        """
        Index document texts.

        Yields:
            dict: The next document.
        """

        for row in query_bar(Document_Text.select()):

            yield dict(
                _id=row.document_id,
                body=row.text,
            )

示例#5

显示文件

文件： document_index.py 项目： MichaelEdage/open-syllabus-project

    def es_stream_docs(cls):

        """
        Index document texts.

        Yields:
            dict: The next document.
        """

        for row in query_bar(Document_Text.select()):

            yield dict(
                _id = row.document_id,
                body = row.text,
            )

示例#6

显示文件

def random(out_file, n):
    """
    Write a CSV with plaintext for N random docs.
    """

    # CSV writer.
    cols = ['id', 'title', 'text']
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    query = (Document_Text.select(Document_Text.text,
                                  Document.path).join(Document).order_by(
                                      fn.random()).limit(n))

    for row in query_bar(query):

        writer.writerow({'id': row.path, 'title': row.path, 'text': row.text})

示例#7

显示文件

def truncated(out_file, frag_len):
    """
    Write a CSV with truncated document texts.
    """

    # CSV writer.
    cols = ['id', 'title', 'text']
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    query = (Document_Text.select(Document_Text.text,
                                  Document.path).join(Document))

    for row in query_bar(query):

        # Truncate the text.
        fragment = row.text[:frag_len]

        writer.writerow({'id': row.path, 'title': row.path, 'text': fragment})