Пример #1
0
    def __iter__(self):
        start_match = self._re.match(self._text)
        iterable = self._re.split(self._text)
        if iterable[0] == '':
            iterable.pop(0)

        pos = 0
        length = len(iterable)

        # special case, starts with word break, add it to first word
        if start_match is not None:
            matches = iterable[0:3]
            pos = 3
            yield Item({
                "item": matches[1],
                "type": "word",
                "@raw": ''.join(matches)
            })

        while pos < length:
            raw = ''.join(iterable[pos:pos + 2])
            if raw != '':
                yield Item({
                    "item": iterable[pos],
                    "type": "word",
                    "@raw": raw
                })
            pos += 2
Пример #2
0
def test_exceptions():
    with raises(ValueError) as exc:
        Item(None, None)
    assert 'Expected max 1 argument' in str(exc)

    with raises(ValueError) as exc:
        Item(None, somekeyword=None)
    assert "Cannot combine both a positional and keyword arguments" in str(exc)

    schema = Schema()
    with raises(SchemaError) as exc:
        schema.append(None)
    assert "Wrong type" in str(exc)
Пример #3
0
def test_item():
    item1 = Item()
    assert len(item1) == 0
    assert repr(item1) == 'Item({})'

    item = Item(a='a_', b='b_')
    assert len(item) == 2
    for k in item:
        assert k + '_' == item[k]
    assert repr(item) in [
        'Item({"a": "a_", "b": "b_"})', 'Item({"b": "b_", "a": "a_"})'
    ]

    assert item1 != item
Пример #4
0
def test_roundtrip():
    schema = Schema()
    testlen = 1
    for i in range(testlen):
        schema.append(
            dict(item=random_str(),
                 start=randint(0, 1e10),
                 end=randint(0, 1e10)))
        schema.append(
            Item(
                OrderedDict(item=random_str(),
                            start=randint(0, 1e10),
                            end=randint(0, 1e10))))

    schema.extend(list(schema))
    assert len(schema) == testlen * 4

    for item in schema:
        assert type(item) is Item

    json_ = schema.json()
    assert Schema.loads(json_) == schema
    schema = Schema.loads(json_)
    for item in schema:
        assert type(item) is Item
Пример #5
0
def test_encode():
    item = Item(item='word', start=12, end=23)
    itemdict = item._asdict()
    line = json.dumps(itemdict)
    line_formatted = json.dumps(itemdict, indent=2)

    assert item.json() == line
    assert item.json(indent=2) == line_formatted

    buffer = io.StringIO()
    Schema.dump(Schema([item]), buffer)
    assert ('[%s]' % (item.json(), )) == buffer.getvalue()

    buffer = io.StringIO()
    Schema([item]).dump(buffer)
    assert ('[%s]' % (item.json(), )) == buffer.getvalue()

    schema = Schema()
    schema.append(item)
    schema.append(item)
    assert len(schema) is 2
    assert schema.json() == '[%s, %s]' % ((line, ) * 2)
    assert schema.json(indent=2) == '[\n%s,\n%s\n]' % (
        (textwrap.indent(line_formatted, '  '), ) * 2)
    assert repr(schema) == ('Schema(%s)' % (schema.json()))

    class T:
        ok = False

    with raises(TypeError) as exc:
        assert json.dumps(T(), cls=JSONEncoder)
    assert "is not JSON serializable" in str(exc)
Пример #6
0
from benchmarkstt.input.core import PlainText, File
from benchmarkstt.schema import Item, Schema
import pytest

candide_file = './resources/test/_data/candide.txt'
with open(candide_file) as f:
    candide = f.read()

candide_schema = [Item({"item": "\"There", "type": "word", "@raw": "\n\"There "}),
                  Item({"item": "is", "type": "word", "@raw": "is "}),
                  Item({"item": "a", "type": "word", "@raw": "a "}),
                  Item({"item": "concatenation", "type": "word", "@raw": "concatenation "}),
                  Item({"item": "of", "type": "word", "@raw": "of "}),
                  Item({"item": "events", "type": "word", "@raw": "events "}),
                  Item({"item": "in", "type": "word", "@raw": "in "}),
                  Item({"item": "this", "type": "word", "@raw": "this "}),
                  Item({"item": "best", "type": "word", "@raw": "best "}),
                  Item({"item": "of", "type": "word", "@raw": "of "}),
                  Item({"item": "all", "type": "word", "@raw": "all "}),
                  Item({"item": "possible", "type": "word", "@raw": "possible "}),
                  Item({"item": "worlds:", "type": "word", "@raw": "worlds:\n"}),
                  Item({"item": "for", "type": "word", "@raw": "for "}),
                  Item({"item": "if", "type": "word", "@raw": "if "}),
                  Item({"item": "you", "type": "word", "@raw": "you "}),
                  Item({"item": "had", "type": "word", "@raw": "had "}),
                  Item({"item": "not", "type": "word", "@raw": "not "}),
                  Item({"item": "been", "type": "word", "@raw": "been "}),
                  Item({"item": "kicked", "type": "word", "@raw": "kicked "}),
                  Item({"item": "out", "type": "word", "@raw": "out "}),
                  Item({"item": "of", "type": "word", "@raw": "of "}),
                  Item({"item": "a", "type": "word", "@raw": "a "}),
Пример #7
0
def test_equality():
    assert Schema.loads('[]') == Schema()
    assert Schema([Item(item='test')]) != Schema()
    assert Item(item='test') == {'item': 'test'}
    assert Item({'item': 'test', 'item2': 55}) == Item(item='test', item2=55)
    assert Item({'item2': 55, 'item': 'test'}) == Item(item='test', item2=55)