Пример #1
0
    def test_write_layers(self):
        output = io.BytesIO()
        document = Document(header=Header(encoding="utf-8"))

        layer_1 = Layer(0, "Layer 1")
        layer_2 = Layer(1, "Layer 2")

        document.add_layer(layer_1)
        document.add_layer(layer_2)

        objs_layer_1 = [
            LayerObject(0, 0),
            LayerObject(1, 0),
            LayerObject(2, 0),
            LayerObject(3, 0),
        ]

        objs_layer_2 = [
            LayerObject(0, 1),
            LayerObject(1, 1),
            LayerObject(2, 1),
            LayerObject(3, 1),
        ]

        layer_1.add_objects(objs_layer_1)
        layer_2.add_objects(objs_layer_2)

        writer = _Writer(document, output)
        writer._write_layers()

        assert output.getvalue() == b'\x03' \
                                    b'\x00\x00\x00\x02' \
                                    b'\x09Layer 1\x00\x00\x00\x00\x04\x0A' \
                                    b'\x09Layer 2\x00\x00\x00\x00\x04\x0A' \
                                    b'\x04'
Пример #2
0
    def __init__(self, stream):
        self.stream = io.BufferedReader(stream)  # Grants access to e.g. peek
        self.document = Document()
        self.encoding = 'utf-8'

        self._temp_layers = []
        self._temp_layer_objects = {}
Пример #3
0
    def test_write_relations(self):
        output = io.BytesIO()
        document = Document(header=Header(encoding="utf-8"))

        layer_1 = Layer(0, "Layer 1")
        layer_2 = Layer(1, "Layer 2")

        document.add_layer(layer_1)
        document.add_layer(layer_2)

        objs_layer_1 = [
            LayerObject(0, 0),
            LayerObject(1, 0),
            LayerObject(2, 0),
            LayerObject(3, 0),
        ]

        objs_layer_2 = [
            LayerObject(0, 1),
            LayerObject(1, 1),
            LayerObject(2, 1),
            LayerObject(3, 1),
        ]

        layer_1.add_objects(objs_layer_1)
        layer_2.add_objects(objs_layer_2)

        # Add some relations
        objs_layer_1[0].add_child(objs_layer_2[1])
        objs_layer_1[1].add_child(objs_layer_2[0])
        objs_layer_1[2].add_child(objs_layer_2[3])
        objs_layer_1[3].add_child(objs_layer_2[2])

        writer = _Writer(document, output)
        writer._prepare_relations_for_writing()
        writer._write_relations()

        assert output.getvalue() == b'\x07' \
                                    b'\x00\x00\x00\x01' \
                                    b'\x0e' \
                                    b'\x00\x00\x00\x00' \
                                    b'\x00\x00\x00\x01' \
                                    b'\x00\x00\x00\x04' \
                                    b'\x00\x00\x00\x00' \
                                    b'\x00\x00\x00\x01' \
                                    b'\x00\x00\x00\x01' \
                                    b'\x00\x00\x00\x00' \
                                    b'\x00\x00\x00\x02' \
                                    b'\x00\x00\x00\x03' \
                                    b'\x00\x00\x00\x03' \
                                    b'\x00\x00\x00\x02' \
                                    b'\x0f' \
                                    b'\x08'
Пример #4
0
    def test_write_attrs(self):
        output = io.BytesIO()
        document = Document(header=Header(encoding="utf-8"))

        layer_1 = Layer(0, "Layer 1")
        layer_2 = Layer(1, "Layer 2")

        document.add_layer(layer_1)
        document.add_layer(layer_2)

        objs_layer_1 = [
            LayerObject(0, 0, attrs={'key1': 'val2'}),
        ]

        objs_layer_2 = [
            LayerObject(0, 1, attrs={'key3': 'val'}),
            LayerObject(1, 1, attrs={'key4': 'val2'}),
        ]

        layer_1.add_objects(objs_layer_1)
        layer_2.add_objects(objs_layer_2)

        writer = _Writer(document, output)
        writer._prepare_attrs_for_writing()
        writer._write_attrs()

        # All chunks will be full
        # The ordering may vary, so we simply that the correct chunks are there.
        output_value = output.getvalue()
        assert b'\x05' \
               b'\x00\x00\x00\x03' in output_value
        assert b'\x0B' \
               b'\x00\x00\x00\x00' \
               b'key1\x00' \
               b'val2\x00' \
               b'\x0D' in output_value
        assert b'\x0B' \
               b'\x00\x00\x00\x01' \
               b'key3\x00' \
               b'val\x00\x00' \
               b'\x0D' in output_value
        assert b'\x0B' \
               b'\x00\x00\x00\x01' \
               b'key4\x00' \
               b'\x00val2\x00' \
               b'\x0D'in output_value
        assert b'\x06' in output_value[-1:]
Пример #5
0
    def test_write_header(self):
        output = io.BytesIO()
        document = Document(header=Header(encoding="utf-8"))

        writer = _Writer(document, output)
        writer._write_header()

        assert output.getvalue() == b'\x01utf-8\x00\x02'
Пример #6
0
    def test_prepare_relations_for_writing(self):
        output = io.BytesIO()
        document = Document(header=Header(encoding="utf-8"))

        layer_1 = Layer(0, "Layer 1")
        layer_2 = Layer(1, "Layer 2")

        document.add_layer(layer_1)
        document.add_layer(layer_2)

        objs_layer_1 = [
            LayerObject(0, 0),
            LayerObject(1, 0),
            LayerObject(2, 0),
            LayerObject(3, 0),
        ]

        objs_layer_2 = [
            LayerObject(0, 1),
            LayerObject(1, 1),
            LayerObject(2, 1),
            LayerObject(3, 1),
        ]

        layer_1.add_objects(objs_layer_1)
        layer_2.add_objects(objs_layer_2)

        # Add some relations
        objs_layer_1[0].add_child(objs_layer_2[1])
        objs_layer_1[1].add_child(objs_layer_2[0])
        objs_layer_1[2].add_child(objs_layer_2[3])
        objs_layer_1[3].add_child(objs_layer_2[2])

        writer = _Writer(document, output)
        writer._prepare_relations_for_writing()

        assert 0 in writer._temp_relations
        assert 1 in writer._temp_relations[0]

        rel_tuples = writer._temp_relations[0][1]
        assert (0, 1) in rel_tuples
        assert (1, 0) in rel_tuples
        assert (2, 3) in rel_tuples
        assert (3, 2) in rel_tuples

        assert writer._temp_num_of_relations == 1
Пример #7
0
    def test_write_flag(self):
        output = io.BytesIO()
        document = Document(header=Header(encoding="utf-8"))

        writer = _Writer(document, output)

        writer._write_flag(HEADER_START)
        assert output.getvalue() == b'\x01'
Пример #8
0
    def test_write_string(self):
        output = io.BytesIO()
        document = Document(header=Header(encoding="utf-8"))

        writer = _Writer(document, output)

        # We assert 4 bytes per int here
        writer._write_string('Hello')
        assert output.getvalue() == b'Hello'
Пример #9
0
    def test_write_to_string(self):
        document = Document(header=Header(encoding="utf-8"))

        layer_1 = Layer(0, "Layer 1")
        layer_2 = Layer(1, "Layer 2")

        document.add_layer(layer_1)
        document.add_layer(layer_2)

        objs_layer_1 = [
            LayerObject(0, 0, attrs={'key1': 'val2'}),
        ]

        objs_layer_2 = [
            LayerObject(0, 1, attrs={'key3': 'val'}),
            LayerObject(1, 1, attrs={'key4': 'val2'}),
        ]

        layer_1.add_objects(objs_layer_1)
        layer_2.add_objects(objs_layer_2)

        output = write_to_string(document)
        assert isinstance(output, str)
Пример #10
0
    def test_write_then_parse_goes_through(self):
        document = Document(header=Header(encoding="utf-8"))

        layer_1 = Layer(0, "Layer 1")
        layer_2 = Layer(1, "Layer 2")

        document.add_layer(layer_1)
        document.add_layer(layer_2)

        objs_layer_1 = [
            LayerObject(0, 0, attrs={'key1': 'val2'}),
        ]

        objs_layer_2 = [
            LayerObject(0, 1, attrs={'key3': 'val'}),
            LayerObject(1, 1, attrs={'key4': 'val2'}),
        ]

        layer_1.add_objects(objs_layer_1)
        layer_2.add_objects(objs_layer_2)

        output = write_to_bytes(document)
        parsed = parse_from_bytes(output)

        assert isinstance(parsed, Document)
        assert len(parsed.layers) == 2
        assert parsed.layers[0].name == 'Layer 1'
        assert parsed.layers[1].name == 'Layer 2'

        layer_1 = parsed.layers[0]
        layer_2 = parsed.layers[1]

        assert len(layer_1.objects) == 1
        assert layer_1.objects[0].id == 0
        assert layer_1.objects[0].layer == 0
        assert 'key1' in layer_1.objects[0].attrs
        assert layer_1.objects[0].attrs['key1'] == b'val2'

        assert len(layer_2.objects) == 2
        assert layer_2.objects[0].id == 0
        assert layer_2.objects[0].layer == 1
        assert 'key3' in layer_2.objects[0].attrs
        assert layer_2.objects[0].attrs['key3'] == b'val'
        assert 'key4' in layer_2.objects[1].attrs
        assert layer_2.objects[1].attrs['key4'] == b'val2'

        output = write_to_string(document)
        parsed = parse_from_string(output)

        assert isinstance(parsed, Document)
        assert len(parsed.layers) == 2
        assert parsed.layers[0].name == 'Layer 1'
        assert parsed.layers[1].name == 'Layer 2'

        layer_1 = parsed.layers[0]
        layer_2 = parsed.layers[1]

        assert len(layer_1.objects) == 1
        assert layer_1.objects[0].id == 0
        assert layer_1.objects[0].layer == 0
        assert 'key1' in layer_1.objects[0].attrs
        assert layer_1.objects[0].attrs['key1'] == b'val2'

        assert len(layer_2.objects) == 2
        assert layer_2.objects[0].id == 0
        assert layer_2.objects[0].layer == 1
        assert 'key3' in layer_2.objects[0].attrs
        assert layer_2.objects[0].attrs['key3'] == b'val'
        assert 'key4' in layer_2.objects[1].attrs
        assert layer_2.objects[1].attrs['key4'] == b'val2'
Пример #11
0
    def test_prepare_attrs_for_writing(self):
        output = io.BytesIO()
        document = Document(header=Header(encoding="utf-8"))

        layer_1 = Layer(0, "Layer 1")
        layer_2 = Layer(1, "Layer 2")

        document.add_layer(layer_1)
        document.add_layer(layer_2)

        objs_layer_1 = [
            LayerObject(0, 0, attrs={'key1': 'val2'}),
            LayerObject(1, 0, attrs={'key2': 'val'}),
            LayerObject(2, 0, attrs={'key1': 'val4', 'key2': 'val3'}),
            LayerObject(3, 0, attrs={'key2': 'val'}),
        ]

        objs_layer_2 = [
            LayerObject(0, 1, attrs={'key3': 'val'}),
            LayerObject(1, 1, attrs={'key4': 'val6'}),
            LayerObject(2, 1, attrs={'key2': 'val7'}),
            LayerObject(3, 1, attrs={'key4': 'val2'}),
        ]

        layer_1.add_objects(objs_layer_1)
        layer_2.add_objects(objs_layer_2)

        # Add some relations
        objs_layer_1[0].add_child(objs_layer_2[1])
        objs_layer_1[1].add_child(objs_layer_2[0])
        objs_layer_1[2].add_child(objs_layer_2[3])
        objs_layer_1[3].add_child(objs_layer_2[2])

        writer = _Writer(document, output)
        writer._prepare_attrs_for_writing()

        assert 0 in writer._temp_attributes
        assert 1 in writer._temp_attributes

        layer_0_attrs = writer._temp_attributes[0]
        assert 'key1' in layer_0_attrs
        assert 'key2' in layer_0_attrs

        assert 0 in layer_0_attrs['key1']
        assert layer_0_attrs['key1'][0] == 'val2'
        assert 2 in layer_0_attrs['key1']
        assert layer_0_attrs['key1'][2] == 'val4'

        assert 1 in layer_0_attrs['key2']
        assert layer_0_attrs['key2'][1] == 'val'
        assert 2 in layer_0_attrs['key2']
        assert layer_0_attrs['key2'][2] == 'val3'
        assert 3 in layer_0_attrs['key2']
        assert layer_0_attrs['key2'][3] == 'val'

        layer_1_attrs = writer._temp_attributes[1]
        assert 'key2' in layer_1_attrs
        assert 'key3' in layer_1_attrs
        assert 'key4' in layer_1_attrs

        assert 2 in layer_1_attrs['key2']
        assert layer_1_attrs['key2'][2] == 'val7'

        assert 0 in layer_1_attrs['key3']
        assert layer_1_attrs['key3'][0] == 'val'

        assert 1 in layer_1_attrs['key4']
        assert layer_1_attrs['key4'][1] == 'val6'
        assert 3 in layer_1_attrs['key4']
        assert layer_1_attrs['key4'][3] == 'val2'
Пример #12
0
class _Parser(object):
    def __init__(self, stream):
        self.stream = io.BufferedReader(stream)  # Grants access to e.g. peek
        self.document = Document()
        self.encoding = 'utf-8'

        self._temp_layers = []
        self._temp_layer_objects = {}

    def parse(self):
        self._parse_header()
        self._parse_layers()
        self._parse_relations()
        self._parse_attrs()

        return self.document

    def _parse_header(self):
        self._expect(HEADER_START)
        encoding = self._eat_until_sep().decode()

        header = Header(encoding=encoding)
        self.encoding = encoding
        self.document.header = header

        self._expect(HEADER_END)

    def _parse_layers(self):
        self._expect(LAYERS_START)
        number_of_layers = self._read_as_int(4)
        for i in range(number_of_layers):
            self._parse_layer(i)

        self._expect(LAYERS_END)

    def _parse_layer(self, _id):
        self._expect(LAYER_START)
        name = self._eat_until_sep().decode(self.encoding)
        number_of_entities = self._read_as_int(4)

        objects = [LayerObject(i, _id) for i in range(number_of_entities)]
        layer = Layer(_id, name, objects)
        self.document.add_layer(layer)
        self._temp_layers.append(layer)
        self._temp_layer_objects[_id] = objects

        self._expect(LAYER_END)

    def _parse_relations(self):
        self._expect(RELATIONS_START)
        number_of_relations = self._read_as_int(4)
        for i in range(number_of_relations):
            self._parse_relation()

        self._expect(RELATIONS_END)

    def _parse_relation(self):
        self._expect(RELATION_START)
        parent_layer = self._read_as_int(4)
        children_layer = self._read_as_int(4)
        num_of_pairs = self._read_as_int(4)

        for i in range(num_of_pairs):
            parent = self._read_as_int(4)
            child = self._read_as_int(4)

            # TODO, add explicit checks and error reporting
            parent_object = self._temp_layer_objects[parent_layer][parent]
            child_object = self._temp_layer_objects[children_layer][child]

            parent_object.add_child(child_object)

        self._expect(RELATION_END)

    def _parse_attrs(self):
        self._expect(ATTRS_START)
        num_of_chunks = self._read_as_int(4)
        for i in range(num_of_chunks):
            self._parse_chunk()

        self._expect(ATTRS_END)

    def _parse_chunk(self):
        _next = self._peek_as_int()
        if _next == CHUNK_FULL_START:
            self._parse_full_chunk()
        elif _next == CHUNK_LINKED_START:
            self._parse_linked_chunk()
        else:
            raise TBFParsingException(
                "Expected CHUNK_FULL_START or CHUNK_LINKED_START, got %d" %
                _next)

    def _parse_full_chunk(self):
        self._expect(CHUNK_FULL_START)
        layer_id = self._read_as_int(4)
        attr_name = self._eat_until_sep().decode(self.encoding)
        for layer_object in self._temp_layer_objects[layer_id]:
            value = self._eat_until_sep()
            layer_object.set_attr(attr_name, value)

        self._expect(CHUNK_END)

    def _parse_linked_chunk(self):
        self._expect(CHUNK_LINKED_START)
        layer_id = self._read_as_int(4)
        attr_name = self._eat_until_sep().decode(self.encoding)
        num_of_entries = self._read_as_int(4)

        for i in range(num_of_entries):
            object_id = self._read_as_int(4)
            value = self._eat_until_sep()

            layer_object = self._temp_layer_objects[layer_id][object_id]
            layer_object.set_attr(attr_name, value)

        self._expect(CHUNK_END)

    def _read(self, length=1):
        return self.stream.read(length)

    def _read_as_int(self, length=1):
        return int.from_bytes(self.stream.read(length), byteorder='big')

    def _read_as_str(self, length=1):
        return self.stream.read(length).decode(self.encoding)

    def _peek(self, length=1):
        return self.stream.peek(length)[:length]

    def _peek_as_int(self, length=1):
        return int.from_bytes(self.stream.peek(length)[:length],
                              byteorder='big')

    def _expect(self, byte):
        _next = ord(self._read())
        if _next != byte:
            raise TBFParsingException("Expected %d, got %d" % (byte, _next))

    def _eat_until_sep(self):
        temp = b''
        while True:
            _next = self._read()
            if _next == b'' or ord(_next) == SEPARATOR:
                break
            temp += _next
        return temp