Пример #1
0
def test_read_conll_sentences_diff_comment_string():
    text_tokens = [["#", "1", "2"], ["b", "3", "4"], ["c", "5", "6"]]
    gold = [text_tokens]
    text = StringIO("\n".join(" ".join([t for t in tt]) for tt in text_tokens))
    for p, g in zip(read_conll_sentences(text, comment_pattern="# comment: "),
                    gold):
        assert p == g
Пример #2
0
def test_read_conll_sentences_hash_token():
    text_tokens = [["# This is actually a comment"], ["a", "1", "2"],
                   ["#", "3", "4"], ["c", "5", "6"]]
    gold = [text_tokens[1:]]
    text = StringIO("\n".join(" ".join([t for t in tt]) for tt in text_tokens))
    for p, g in zip(read_conll_sentences(text), gold):
        assert p == g
Пример #3
0
def test_read_conll_sentences_no_comments():
    text_tokens = [
        ["#", "1", "2"],
        ["b", "3", "4"],
        ["c", "5", "6"],
    ]
    gold = [text_tokens]
    text = StringIO("\n".join(" ".join([t for t in tt]) for tt in text_tokens))
    for p, g in zip(read_conll_sentences(text, allow_comments=False), gold):
        assert p == g
Пример #4
0
def _read_conll_file(f, delim):
    """Read a golds and predictions out of a conll file.

    :param f: `file` The open file object.
    :param delim: `str` The symbol that separates columns in the file.

    :returns: `Tuple[List[List[str]], List[List[str]]]` The golds
        and the predictions. They are aligned lists and each element
        is a List of strings that are the list of tags.

    Note:
        the file should contain lines with items separated
        by $delimiter characters (default space). The final
        two items should contain the correct tag and the
        guessed tag in that order. Sentences should be
        separated from each other by empty lines.
    """
    golds = []
    preds = []
    for lines in read_conll_sentences(f, delim=delim):
        golds.append([l[-2] for l in lines])
        preds.append([l[-1] for l in lines])
    return golds, preds
Пример #5
0
def test_read_conll_sentences():
    for p, g in zip(read_conll_sentences(TEST_FILE), gold_sentences):
        assert p == g