def root_element(self): if self._root_element is None: data = self.stream.read() self._root_element = parse_xml_from_string( xml=data, remove_namespaces=True, ) return self._root_element
def test_child_without_type_is_an_element(self): xml = ''' <bucket> <water /> </bucket> ''' bucket = self._get_model_instance_from_xml(xml) root = parse_xml_from_string(xml) self.assertEqual(type(bucket.agua), type(root))
def root_element(self): if self._root_element is None: if self.stream is None: return data = self.stream.read() self._root_element = parse_xml_from_string( xml=data, remove_namespaces=True, ) return self._root_element
def test_get_image_sizes(self): parser = XMLDocx2Html( document_xml=self.get_xml(), relationships=self.relationships, ) tree = parse_xml_from_string(self.get_xml()) els = [] els.extend(find_all(tree, 'drawing')) els.extend(find_all(tree, 'pict')) image_ids = [] for el in els: image_ids.append(parser._get_image_size(el)) expected = [ ('40px', '20px'), ('41pt', '21pt'), ] self.assertEqual( set(image_ids), set(expected), )
def test_get_image_id(self): parser = XMLDocx2Html( document_xml=self.get_xml(), relationships=self.relationships, ) tree = parse_xml_from_string(self.get_xml(), remove_namespaces=True) els = [] els.extend(find_all(tree, 'drawing')) els.extend(find_all(tree, 'pict')) image_ids = [] for el in els: image_ids.append(parser._get_image_id(el)) expected = [ 'rId0', 'rId1', ] self.assertEqual( set(image_ids), set(expected), )
def test_namespace_manager(self): xml = '''<?xml version="1.0" encoding="UTF-8"?> <a:foo xmlns:a="http://example/test" xmlns:b="http://example2/test2"> <a:cat /> <b:dog /> <a:mouse><a:bat /></a:mouse> </a:foo> ''' root = parse_xml_from_string(xml) manager = XmlNamespaceManager() manager.add_namespace('http://example/test') tags = [] for element in manager.iterate_children(root): tags.append(element.tag) expected_tags = [ '{http://example/test}cat', '{http://example/test}mouse', ] self.assertEqual(tags, expected_tags) manager.add_namespace('http://example2/test2') tags = [] for element in manager.iterate_children(root): tags.append(element.tag) expected_tags = [ '{http://example/test}cat', '{http://example2/test2}dog', '{http://example/test}mouse', ] self.assertEqual(tags, expected_tags) manager = XmlNamespaceManager() manager.add_namespace('http://example2/test2') tags = [] for element in manager.iterate_children(root): tags.append(element.tag) expected_tags = [ '{http://example2/test2}dog', ] self.assertEqual(tags, expected_tags)
def _load_relationships(self): part_container = self.get_part_container() if not part_container.part_exists(self.relationship_uri): return manager = XmlNamespaceManager() manager.add_namespace(PackageRelationship.namespace) stream = part_container.get_part(self.relationship_uri).stream root = parse_xml_from_string(stream.read()) for node in manager.iterate_children(root): _, tag = xml_tag_split(node.tag) if tag != PackageRelationship.XML_TAG_NAME: continue relationship_id = node.get(PackageRelationship.XML_ATTR_ID) relationship_type = node.get(PackageRelationship.XML_ATTR_TYPE) target_mode = node.get(PackageRelationship.XML_ATTR_TARGETMODE) target_uri = node.get(PackageRelationship.XML_ATTR_TARGET) self.create_relationship( target_uri=target_uri, target_mode=target_mode, relationship_type=relationship_type, relationship_id=relationship_id, )
def _load(self): self.document = WordprocessingDocument(path=None) package = self.document.package document_part = package.create_part( uri='/word/document.xml', ) if self.styles_xml: self.relationships.append({ 'external': False, 'target_path': 'styles.xml', 'data': self.styles_xml, 'relationship_id': 'styles', 'relationship_type': StyleDefinitionsPart.relationship_type, }) for relationship in self.relationships: target_mode = 'Internal' if relationship['external']: target_mode = 'External' target_uri = relationship['target_path'] if 'data' in relationship: full_target_uri = posixpath.join( package.uri, 'word', target_uri, ) package.streams[full_target_uri] = BytesIO( relationship['data'], ) package.create_part(uri=full_target_uri) document_part.create_relationship( target_uri=target_uri, target_mode=target_mode, relationship_type=relationship['relationship_type'], relationship_id=relationship['relationship_id'], ) package.streams[document_part.uri] = BytesIO(self.document_xml) package.create_relationship( target_uri=document_part.uri, target_mode='Internal', relationship_type=MainDocumentPart.relationship_type, ) self.numbering_root = None if self.numbering_dict is not None: self.numbering_root = parse_xml_from_string( DXB.numbering(self.numbering_dict), ) # This is the standard page width for a word document (in points), Also # the page width that we are looking for in the test. self.page_width = 612 self.styles_manager = StylesManager( self.document.main_document_part.style_definitions_part, ) self.styles = self.styles_manager.styles self.parse_begin(self.document.main_document_part.root_element)
def _load_from_xml(self, xml): root = parse_xml_from_string(xml) return AbstractNum.load(root)
def make_xml(s): xml = b'<?xml version="1.0"?>' + s return parse_xml_from_string(xml)
def _load_from_xml(self, xml): root = parse_xml_from_string(xml) return ParagraphProperties.load(root)
def _get_model_instance_from_xml(self, xml): root = parse_xml_from_string(xml) return self.model.load(root)
def _load_from_xml(self, xml): root = parse_xml_from_string(xml) return Level.load(root)
def _load_from_xml(self, xml): root = parse_xml_from_string(xml) return LevelOverride.load(root)
def _load_from_xml(self, xml): root = parse_xml_from_string(xml) return NumberingInstance.load(root)
def _load_from_xml(self, xml): root = parse_xml_from_string(xml) return NumberingProperties.load(root)
def root_element(self): if self._root_element is None: self._root_element = parse_xml_from_string(self.stream.read()) return self._root_element
def _load_styles_from_xml(self, xml): root = parse_xml_from_string(xml) return Styles.load(root)
def _load_styles_from_xml(self, xml): root = parse_xml_from_string(xml) return RunProperties.load(root)
def _load_styles_from_xml(self, xml): root = parse_xml_from_string(xml) return Style.load(root)
def _load(self): self.document = WordprocessingDocument(path=None) package = self.document.package document_part = package.create_part(uri='/word/document.xml', ) if self.styles_xml: self.relationships.append({ 'external': False, 'target_path': 'styles.xml', 'data': self.styles_xml, 'relationship_id': 'styles', 'relationship_type': StyleDefinitionsPart.relationship_type, }) for relationship in self.relationships: target_mode = 'Internal' if relationship['external']: target_mode = 'External' target_uri = relationship['target_path'] if 'data' in relationship: full_target_uri = posixpath.join( package.uri, 'word', target_uri, ) package.streams[full_target_uri] = BytesIO( relationship['data'], ) package.create_part(uri=full_target_uri) document_part.create_relationship( target_uri=target_uri, target_mode=target_mode, relationship_type=relationship['relationship_type'], relationship_id=relationship['relationship_id'], ) package.streams[document_part.uri] = BytesIO(self.document_xml) package.create_relationship( target_uri=document_part.uri, target_mode='Internal', relationship_type=MainDocumentPart.relationship_type, ) self.numbering_root = None if self.numbering_dict is not None: self.numbering_root = parse_xml_from_string( DXB.numbering(self.numbering_dict), ) # This is the standard page width for a word document (in points), Also # the page width that we are looking for in the test. self.page_width = 612 self.styles_manager = StylesManager( self.document.main_document_part.style_definitions_part, ) self.styles = self.styles_manager.styles self.parse_begin(self.document.main_document_part)
def _load_from_xml(self, xml): root = parse_xml_from_string(xml) return Body.load(root)