Python SentenceExtractor.parse示例

编程语言: Python

命名空间/包名称: entities.sentences.extractor

方法/功能: parse

hotexamples.com的示例: 9

Python SentenceExtractor.parse - 已找到9个示例。这些是从开源项目中提取的最受好评的entities.sentences.extractor.SentenceExtractor.parse现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

parse(9)

SentenceExtractor(1)

示例#1

显示文件

文件： test_parse_tex.py 项目： alexkreidler/scholarphi

def test_combine_sentence_tex_following_latex_linebreak_conventions():
    extractor = SentenceExtractor(from_named_sections_only=False)
    sentences = list(
        extractor.parse(
            "main.tex",
            "\n".join([
                # All on one line.
                (r"This is the first sentence.\\" +
                 r"This is the second sentence.\linebreak " +
                 "This is the third sentence."),
                "",
                "This is the fourth sentence.",
                " ",
                # With only a single newline, consecutive lines should be considered the same sentence.
                "This is the fifth sentence, which is written on multiple",
                "lines.",
            ]),
        ))
    assert sentences[0].text == "This is the first sentence."
    assert sentences[0].end == 27
    assert sentences[1].text == "This is the second sentence."
    assert sentences[2].text == "This is the third sentence."
    assert sentences[3].text == "This is the fourth sentence."
    assert (sentences[4].text ==
            "This is the fifth sentence, which is written on multiple lines.")
    assert len(sentences) == 5

示例#2

显示文件

文件： test_parse_tex.py 项目： silky/scholarphi

def test_ignore_periods_in_equations():
    extractor = SentenceExtractor(from_named_sections_only=False)
    sentences = list(
        extractor.parse("main.tex", "This sentence has an $ equation. In $ the middle.")
    )
    assert len(sentences) == 1
    assert sentences[0].text == "This sentence has an <<equation-0>> the middle."

示例#3

显示文件

def test_ignore_periods_in_equations():
    extractor = SentenceExtractor()
    sentences = list(
        extractor.parse("main.tex",
                        "This sentence has an $ equation. In $ the middle."))
    assert len(sentences) == 1
    assert sentences[0].text == "This sentence has an [[math]] the middle."

示例#4

显示文件

文件： test_parse_tex.py 项目： alexkreidler/scholarphi

def test_sentence_includes_preceding_equation():
    extractor = SentenceExtractor(from_named_sections_only=False)
    # This was a specific case seen in an example paper.
    sentences = list(
        extractor.parse("main.tex",
                        "Sentence 1.\n\\[x\\]\nstarts the sentence."))
    assert sentences[1].tex == "\\[x\\]\nstarts the sentence."
    assert sentences[1].start == 12
    assert sentences[1].end == 38

示例#5

显示文件

文件： test_parse_tex.py 项目： alexkreidler/scholarphi

def test_ignore_periods_in_equations():
    extractor = SentenceExtractor(from_named_sections_only=False)
    sentences = list(
        extractor.parse("main.tex",
                        "This sentence has an $ equation. In $ the middle."))
    assert len(sentences) == 1
    assert sentences[0].text == (
        "This sentence has an EQUATION_DEPTH_0_START equation. In EQUATION_DEPTH_0_END "
        + "the middle.")

示例#6

显示文件

文件： test_parse_tex.py 项目： silky/scholarphi

def test_sentence_from_within_command():
    extractor = SentenceExtractor(from_named_sections_only=False)
    # The space in the start of this sentence is important, as in an earlier version of the
    # code, this space caused extraction to fail.
    sentences = list(extractor.parse("main.tex", r" \textit{Example sentence.}"))
    assert sentences[0].text == "Example sentence."
    assert sentences[0].tex == "Example sentence."
    assert sentences[0].start == 9
    assert sentences[0].end == 26

示例#7

显示文件

文件： test_parse_tex.py 项目： alexkreidler/scholarphi

def test_sentence_splitting_end_points():
    extractor = SentenceExtractor(from_named_sections_only=False)
    sentences = list(
        extractor.parse(
            "main.tex",
            "This is a sentence. Next we describe two items. 1) The first item. 2) The second item.",
        ))

    assert len(sentences) == 4
    sentence_end_points = [[0, 19], [20, 47], [48, 66], [67, 86]]
    for i, [start, end] in enumerate(sentence_end_points):
        assert sentences[i].start == start
        assert sentences[i].end == end

示例#8

显示文件

def test_sentence_splitting_end_points_and_more_text():
    extractor = SentenceExtractor()
    sentences = list(
        extractor.parse(
            "main.tex",
            "This sentence. has extra. text. 1. first 2. second 3. third. And some extra. stuff.",
        ))
    assert len(sentences) == 8
    sentence_end_points = [[0, 14], [15, 25], [26, 31], [32, 40], [41, 50],
                           [51, 60], [61, 76], [77, 83]]
    for i, [start, end] in enumerate(sentence_end_points):
        assert sentences[i].start == start
        assert sentences[i].end == end

示例#9

显示文件

文件： test_parse_tex.py 项目： alexkreidler/scholarphi

def test_extract_sentences():
    extractor = SentenceExtractor(from_named_sections_only=False)
    sentences = list(
        extractor.parse(
            "main.tex",
            "This is the first \\macro[arg]{sentence}. This is the second sentence.",
        ))
    assert len(sentences) == 2

    sentence1 = sentences[0]
    assert sentence1.start == 0
    assert sentence1.end == 40
    assert sentence1.text == "This is the first argsentence."

    sentence2 = sentences[1]
    assert sentence2.start == 41
    assert sentence2.end == 69
    assert sentence2.text == "This is the second sentence."