def test_write_layers(self): output = io.BytesIO() document = Document(header=Header(encoding="utf-8")) layer_1 = Layer(0, "Layer 1") layer_2 = Layer(1, "Layer 2") document.add_layer(layer_1) document.add_layer(layer_2) objs_layer_1 = [ LayerObject(0, 0), LayerObject(1, 0), LayerObject(2, 0), LayerObject(3, 0), ] objs_layer_2 = [ LayerObject(0, 1), LayerObject(1, 1), LayerObject(2, 1), LayerObject(3, 1), ] layer_1.add_objects(objs_layer_1) layer_2.add_objects(objs_layer_2) writer = _Writer(document, output) writer._write_layers() assert output.getvalue() == b'\x03' \ b'\x00\x00\x00\x02' \ b'\x09Layer 1\x00\x00\x00\x00\x04\x0A' \ b'\x09Layer 2\x00\x00\x00\x00\x04\x0A' \ b'\x04'
def __init__(self, stream): self.stream = io.BufferedReader(stream) # Grants access to e.g. peek self.document = Document() self.encoding = 'utf-8' self._temp_layers = [] self._temp_layer_objects = {}
def test_write_relations(self): output = io.BytesIO() document = Document(header=Header(encoding="utf-8")) layer_1 = Layer(0, "Layer 1") layer_2 = Layer(1, "Layer 2") document.add_layer(layer_1) document.add_layer(layer_2) objs_layer_1 = [ LayerObject(0, 0), LayerObject(1, 0), LayerObject(2, 0), LayerObject(3, 0), ] objs_layer_2 = [ LayerObject(0, 1), LayerObject(1, 1), LayerObject(2, 1), LayerObject(3, 1), ] layer_1.add_objects(objs_layer_1) layer_2.add_objects(objs_layer_2) # Add some relations objs_layer_1[0].add_child(objs_layer_2[1]) objs_layer_1[1].add_child(objs_layer_2[0]) objs_layer_1[2].add_child(objs_layer_2[3]) objs_layer_1[3].add_child(objs_layer_2[2]) writer = _Writer(document, output) writer._prepare_relations_for_writing() writer._write_relations() assert output.getvalue() == b'\x07' \ b'\x00\x00\x00\x01' \ b'\x0e' \ b'\x00\x00\x00\x00' \ b'\x00\x00\x00\x01' \ b'\x00\x00\x00\x04' \ b'\x00\x00\x00\x00' \ b'\x00\x00\x00\x01' \ b'\x00\x00\x00\x01' \ b'\x00\x00\x00\x00' \ b'\x00\x00\x00\x02' \ b'\x00\x00\x00\x03' \ b'\x00\x00\x00\x03' \ b'\x00\x00\x00\x02' \ b'\x0f' \ b'\x08'
def test_write_attrs(self): output = io.BytesIO() document = Document(header=Header(encoding="utf-8")) layer_1 = Layer(0, "Layer 1") layer_2 = Layer(1, "Layer 2") document.add_layer(layer_1) document.add_layer(layer_2) objs_layer_1 = [ LayerObject(0, 0, attrs={'key1': 'val2'}), ] objs_layer_2 = [ LayerObject(0, 1, attrs={'key3': 'val'}), LayerObject(1, 1, attrs={'key4': 'val2'}), ] layer_1.add_objects(objs_layer_1) layer_2.add_objects(objs_layer_2) writer = _Writer(document, output) writer._prepare_attrs_for_writing() writer._write_attrs() # All chunks will be full # The ordering may vary, so we simply that the correct chunks are there. output_value = output.getvalue() assert b'\x05' \ b'\x00\x00\x00\x03' in output_value assert b'\x0B' \ b'\x00\x00\x00\x00' \ b'key1\x00' \ b'val2\x00' \ b'\x0D' in output_value assert b'\x0B' \ b'\x00\x00\x00\x01' \ b'key3\x00' \ b'val\x00\x00' \ b'\x0D' in output_value assert b'\x0B' \ b'\x00\x00\x00\x01' \ b'key4\x00' \ b'\x00val2\x00' \ b'\x0D'in output_value assert b'\x06' in output_value[-1:]
def test_write_header(self): output = io.BytesIO() document = Document(header=Header(encoding="utf-8")) writer = _Writer(document, output) writer._write_header() assert output.getvalue() == b'\x01utf-8\x00\x02'
def test_prepare_relations_for_writing(self): output = io.BytesIO() document = Document(header=Header(encoding="utf-8")) layer_1 = Layer(0, "Layer 1") layer_2 = Layer(1, "Layer 2") document.add_layer(layer_1) document.add_layer(layer_2) objs_layer_1 = [ LayerObject(0, 0), LayerObject(1, 0), LayerObject(2, 0), LayerObject(3, 0), ] objs_layer_2 = [ LayerObject(0, 1), LayerObject(1, 1), LayerObject(2, 1), LayerObject(3, 1), ] layer_1.add_objects(objs_layer_1) layer_2.add_objects(objs_layer_2) # Add some relations objs_layer_1[0].add_child(objs_layer_2[1]) objs_layer_1[1].add_child(objs_layer_2[0]) objs_layer_1[2].add_child(objs_layer_2[3]) objs_layer_1[3].add_child(objs_layer_2[2]) writer = _Writer(document, output) writer._prepare_relations_for_writing() assert 0 in writer._temp_relations assert 1 in writer._temp_relations[0] rel_tuples = writer._temp_relations[0][1] assert (0, 1) in rel_tuples assert (1, 0) in rel_tuples assert (2, 3) in rel_tuples assert (3, 2) in rel_tuples assert writer._temp_num_of_relations == 1
def test_write_flag(self): output = io.BytesIO() document = Document(header=Header(encoding="utf-8")) writer = _Writer(document, output) writer._write_flag(HEADER_START) assert output.getvalue() == b'\x01'
def test_write_string(self): output = io.BytesIO() document = Document(header=Header(encoding="utf-8")) writer = _Writer(document, output) # We assert 4 bytes per int here writer._write_string('Hello') assert output.getvalue() == b'Hello'
def test_write_to_string(self): document = Document(header=Header(encoding="utf-8")) layer_1 = Layer(0, "Layer 1") layer_2 = Layer(1, "Layer 2") document.add_layer(layer_1) document.add_layer(layer_2) objs_layer_1 = [ LayerObject(0, 0, attrs={'key1': 'val2'}), ] objs_layer_2 = [ LayerObject(0, 1, attrs={'key3': 'val'}), LayerObject(1, 1, attrs={'key4': 'val2'}), ] layer_1.add_objects(objs_layer_1) layer_2.add_objects(objs_layer_2) output = write_to_string(document) assert isinstance(output, str)
def test_write_then_parse_goes_through(self): document = Document(header=Header(encoding="utf-8")) layer_1 = Layer(0, "Layer 1") layer_2 = Layer(1, "Layer 2") document.add_layer(layer_1) document.add_layer(layer_2) objs_layer_1 = [ LayerObject(0, 0, attrs={'key1': 'val2'}), ] objs_layer_2 = [ LayerObject(0, 1, attrs={'key3': 'val'}), LayerObject(1, 1, attrs={'key4': 'val2'}), ] layer_1.add_objects(objs_layer_1) layer_2.add_objects(objs_layer_2) output = write_to_bytes(document) parsed = parse_from_bytes(output) assert isinstance(parsed, Document) assert len(parsed.layers) == 2 assert parsed.layers[0].name == 'Layer 1' assert parsed.layers[1].name == 'Layer 2' layer_1 = parsed.layers[0] layer_2 = parsed.layers[1] assert len(layer_1.objects) == 1 assert layer_1.objects[0].id == 0 assert layer_1.objects[0].layer == 0 assert 'key1' in layer_1.objects[0].attrs assert layer_1.objects[0].attrs['key1'] == b'val2' assert len(layer_2.objects) == 2 assert layer_2.objects[0].id == 0 assert layer_2.objects[0].layer == 1 assert 'key3' in layer_2.objects[0].attrs assert layer_2.objects[0].attrs['key3'] == b'val' assert 'key4' in layer_2.objects[1].attrs assert layer_2.objects[1].attrs['key4'] == b'val2' output = write_to_string(document) parsed = parse_from_string(output) assert isinstance(parsed, Document) assert len(parsed.layers) == 2 assert parsed.layers[0].name == 'Layer 1' assert parsed.layers[1].name == 'Layer 2' layer_1 = parsed.layers[0] layer_2 = parsed.layers[1] assert len(layer_1.objects) == 1 assert layer_1.objects[0].id == 0 assert layer_1.objects[0].layer == 0 assert 'key1' in layer_1.objects[0].attrs assert layer_1.objects[0].attrs['key1'] == b'val2' assert len(layer_2.objects) == 2 assert layer_2.objects[0].id == 0 assert layer_2.objects[0].layer == 1 assert 'key3' in layer_2.objects[0].attrs assert layer_2.objects[0].attrs['key3'] == b'val' assert 'key4' in layer_2.objects[1].attrs assert layer_2.objects[1].attrs['key4'] == b'val2'
def test_prepare_attrs_for_writing(self): output = io.BytesIO() document = Document(header=Header(encoding="utf-8")) layer_1 = Layer(0, "Layer 1") layer_2 = Layer(1, "Layer 2") document.add_layer(layer_1) document.add_layer(layer_2) objs_layer_1 = [ LayerObject(0, 0, attrs={'key1': 'val2'}), LayerObject(1, 0, attrs={'key2': 'val'}), LayerObject(2, 0, attrs={'key1': 'val4', 'key2': 'val3'}), LayerObject(3, 0, attrs={'key2': 'val'}), ] objs_layer_2 = [ LayerObject(0, 1, attrs={'key3': 'val'}), LayerObject(1, 1, attrs={'key4': 'val6'}), LayerObject(2, 1, attrs={'key2': 'val7'}), LayerObject(3, 1, attrs={'key4': 'val2'}), ] layer_1.add_objects(objs_layer_1) layer_2.add_objects(objs_layer_2) # Add some relations objs_layer_1[0].add_child(objs_layer_2[1]) objs_layer_1[1].add_child(objs_layer_2[0]) objs_layer_1[2].add_child(objs_layer_2[3]) objs_layer_1[3].add_child(objs_layer_2[2]) writer = _Writer(document, output) writer._prepare_attrs_for_writing() assert 0 in writer._temp_attributes assert 1 in writer._temp_attributes layer_0_attrs = writer._temp_attributes[0] assert 'key1' in layer_0_attrs assert 'key2' in layer_0_attrs assert 0 in layer_0_attrs['key1'] assert layer_0_attrs['key1'][0] == 'val2' assert 2 in layer_0_attrs['key1'] assert layer_0_attrs['key1'][2] == 'val4' assert 1 in layer_0_attrs['key2'] assert layer_0_attrs['key2'][1] == 'val' assert 2 in layer_0_attrs['key2'] assert layer_0_attrs['key2'][2] == 'val3' assert 3 in layer_0_attrs['key2'] assert layer_0_attrs['key2'][3] == 'val' layer_1_attrs = writer._temp_attributes[1] assert 'key2' in layer_1_attrs assert 'key3' in layer_1_attrs assert 'key4' in layer_1_attrs assert 2 in layer_1_attrs['key2'] assert layer_1_attrs['key2'][2] == 'val7' assert 0 in layer_1_attrs['key3'] assert layer_1_attrs['key3'][0] == 'val' assert 1 in layer_1_attrs['key4'] assert layer_1_attrs['key4'][1] == 'val6' assert 3 in layer_1_attrs['key4'] assert layer_1_attrs['key4'][3] == 'val2'
class _Parser(object): def __init__(self, stream): self.stream = io.BufferedReader(stream) # Grants access to e.g. peek self.document = Document() self.encoding = 'utf-8' self._temp_layers = [] self._temp_layer_objects = {} def parse(self): self._parse_header() self._parse_layers() self._parse_relations() self._parse_attrs() return self.document def _parse_header(self): self._expect(HEADER_START) encoding = self._eat_until_sep().decode() header = Header(encoding=encoding) self.encoding = encoding self.document.header = header self._expect(HEADER_END) def _parse_layers(self): self._expect(LAYERS_START) number_of_layers = self._read_as_int(4) for i in range(number_of_layers): self._parse_layer(i) self._expect(LAYERS_END) def _parse_layer(self, _id): self._expect(LAYER_START) name = self._eat_until_sep().decode(self.encoding) number_of_entities = self._read_as_int(4) objects = [LayerObject(i, _id) for i in range(number_of_entities)] layer = Layer(_id, name, objects) self.document.add_layer(layer) self._temp_layers.append(layer) self._temp_layer_objects[_id] = objects self._expect(LAYER_END) def _parse_relations(self): self._expect(RELATIONS_START) number_of_relations = self._read_as_int(4) for i in range(number_of_relations): self._parse_relation() self._expect(RELATIONS_END) def _parse_relation(self): self._expect(RELATION_START) parent_layer = self._read_as_int(4) children_layer = self._read_as_int(4) num_of_pairs = self._read_as_int(4) for i in range(num_of_pairs): parent = self._read_as_int(4) child = self._read_as_int(4) # TODO, add explicit checks and error reporting parent_object = self._temp_layer_objects[parent_layer][parent] child_object = self._temp_layer_objects[children_layer][child] parent_object.add_child(child_object) self._expect(RELATION_END) def _parse_attrs(self): self._expect(ATTRS_START) num_of_chunks = self._read_as_int(4) for i in range(num_of_chunks): self._parse_chunk() self._expect(ATTRS_END) def _parse_chunk(self): _next = self._peek_as_int() if _next == CHUNK_FULL_START: self._parse_full_chunk() elif _next == CHUNK_LINKED_START: self._parse_linked_chunk() else: raise TBFParsingException( "Expected CHUNK_FULL_START or CHUNK_LINKED_START, got %d" % _next) def _parse_full_chunk(self): self._expect(CHUNK_FULL_START) layer_id = self._read_as_int(4) attr_name = self._eat_until_sep().decode(self.encoding) for layer_object in self._temp_layer_objects[layer_id]: value = self._eat_until_sep() layer_object.set_attr(attr_name, value) self._expect(CHUNK_END) def _parse_linked_chunk(self): self._expect(CHUNK_LINKED_START) layer_id = self._read_as_int(4) attr_name = self._eat_until_sep().decode(self.encoding) num_of_entries = self._read_as_int(4) for i in range(num_of_entries): object_id = self._read_as_int(4) value = self._eat_until_sep() layer_object = self._temp_layer_objects[layer_id][object_id] layer_object.set_attr(attr_name, value) self._expect(CHUNK_END) def _read(self, length=1): return self.stream.read(length) def _read_as_int(self, length=1): return int.from_bytes(self.stream.read(length), byteorder='big') def _read_as_str(self, length=1): return self.stream.read(length).decode(self.encoding) def _peek(self, length=1): return self.stream.peek(length)[:length] def _peek_as_int(self, length=1): return int.from_bytes(self.stream.peek(length)[:length], byteorder='big') def _expect(self, byte): _next = ord(self._read()) if _next != byte: raise TBFParsingException("Expected %d, got %d" % (byte, _next)) def _eat_until_sep(self): temp = b'' while True: _next = self._read() if _next == b'' or ord(_next) == SEPARATOR: break temp += _next return temp