def test_add_appinfo_element_present_in_second_of_two_appinfo(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo></xs:appinfo> <xs:appinfo><attribute>old</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ xpath = "xs:element" updated_xsd_string = delete_appinfo_element(xsd_string, xpath, "attribute") expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation><xs:appinfo/><xs:appinfo/></xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def set_xsd_element_occurrences(xsd_string, xpath, min_occurs, max_occurs): """Set occurrences of element. Args: xsd_string: xpath: min_occurs: max_occurs: Returns: """ # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get namespaces namespaces = get_namespaces(xsd_string) # get default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # add the element to the sequence element = xsd_tree.find(xpath) element.attrib["minOccurs"] = min_occurs element.attrib["maxOccurs"] = max_occurs # save the tree in the session xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def rename_xsd_element(xsd_string, xpath, new_name): """Rename xsd element. Args: xsd_string: xpath: new_name: Returns: """ # build the xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get the namespaces namespaces = get_namespaces(xsd_string) # get the default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # rename element xsd_tree.find(xpath).attrib["name"] = new_name # rebuild xsd string xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def get_content_by_xpath(xml_string, xpath, namespaces=None): """Get list of xml content by xpath Args: xml_string: xpath: namespaces: Returns: """ # Build lxml tree from xml string xsd_tree = XSDTree.build_tree(xml_string) # Get values at xpath values_list = xsd_tree.xpath(xpath, namespaces=namespaces) # Build list of string values str_values_list = list() # Iterate through all xml elements found for value in values_list: # Get string value for element str_value = value if isinstance(value, str) else XSDTree.tostring(value) # Add value to list str_values_list.append(str_value) # Return list of string values found at xpath return str_values_list
def delete_xsd_element(xsd_string, xpath): """Delete element from tree. Args: xsd_string: xpath: Returns: """ # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get xsd namespaces namespaces = get_namespaces(xsd_string) # get default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # get element to remove from tree element_to_remove = xsd_tree.find(xpath) # remove element from tree element_to_remove.getparent().remove(element_to_remove) # rebuild xsd string xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def check_xml_file_is_valid(data, request=None): """Check if xml data is valid against a given schema. Args: data: request: Returns: """ template = data.template try: xml_tree = XSDTree.build_tree(data.xml_content) except Exception as e: raise exceptions.XMLError(str(e)) try: xsd_tree = XSDTree.build_tree(template.content) except Exception as e: raise exceptions.XSDError(str(e)) error = validate_xml_data(xsd_tree, xml_tree, request=request) if error is not None: raise exceptions.XMLError(error) else: return True
def get_hash(xml_string): """ Get the hash of an XML String. Removes blank text, comments, processing instructions and annotations from the input. Allows to retrieve the same hash for two similar XML string. Args: xml_string (str): XML String to hash Returns: str: SHA-1 hash of the XML string """ # Load the required parser hash_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=hash_parser) xml_tree = XSDTree.build_tree(xml_string) # Remove all annotations annotations = xml_tree.findall( ".//{http://www.w3.org/2001/XMLSchema}annotation") for annotation in annotations: annotation.getparent().remove(annotation) clean_xml_string = XSDTree.tostring(xml_tree) # Parse XML string into dict xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict) # Returns the SHA-1 hash of the ordered dict return hash_dict(xml_dict)
def test_generate_extension_with_single_child_attribute_returns_expected_json_dict( self, ): xsd_files = join("attribute", "single") xsd_tree = self.extension_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( "/xs:schema/xs:element/xs:complexType/xs:simpleContent/xs:extension", namespaces=self.namespaces, )[0] xml_tree = self.extension_data_handler.get_xml(xsd_files) xml_data = XSDTree.tostring(xml_tree) edit_data_tree = XSDTree.transform_to_xml(xml_data) # Generate result dict result_dict = self.parser.generate_extension( xsd_element, xsd_tree, full_path="/root", edit_data_tree=edit_data_tree, default_value="entry0", ) # Load expected dictionary and compare with result expected_dict = self.extension_data_handler.get_json(xsd_files + ".reload") self.assertDictEqual(expected_dict, result_dict)
def test_add_appinfo_element_no_element_adds_it(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"><xs:annotation> <xs:appinfo></xs:appinfo></xs:annotation></xs:element> </xs:schema> """ xpath = "xs:element" updated_xsd_string = add_appinfo_element(xsd_string, xpath, "attribute", "value") expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo><attribute>value</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def test_reload_simple_content_basic(self): xsd_files = join("simple_content", "basic") xsd_tree = self.complex_type_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( "/xs:schema/xs:complexType", namespaces=self.namespaces )[0] xml_tree = self.complex_type_data_handler.get_xml(xsd_files) xml_data = XSDTree.tostring(xml_tree) edit_data_tree = XSDTree.transform_to_xml(xml_data) xml_value = xml_tree.xpath("/root", namespaces=self.namespaces)[0].text # Generate result dict result_string = self.parser.generate_complex_type( xsd_element, xsd_tree, full_path="/root", edit_data_tree=edit_data_tree, default_value=xml_value, ) # Load expected dictionary and compare with result expected_dict = self.complex_type_data_handler.get_json("%s.reload" % xsd_files) self.assertDictEqual(result_string, expected_dict)
def test_iterparse_method_with_unicode(self): xsd_string = """ <xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'> <\u0192-root><test></test></\u0192-root> </xs:schema> """ XSDTree.iterparse(xsd_string, ("end", ))
def validate_form(request): """Validate data present in the form via XML validation. Args: request: Returns: """ response_dict = {} try: # get curate data structure curate_data_structure_id = request.POST['id'] curate_data_structure = curate_data_structure_api.get_by_id( curate_data_structure_id) # generate the XML xml_data = render_xml( curate_data_structure.data_structure_element_root) # build trees xsd_tree = XSDTree.build_tree(curate_data_structure.template.content) xml_tree = XSDTree.build_tree(xml_data) # validate XML document errors = validate_xml_data(xsd_tree, xml_tree) # FIXME: test xmlParseEntityRef exception: use of & < > forbidden if errors is not None: response_dict['errors'] = errors except Exception, e: message = e.message.replace('"', '\'') response_dict['errors'] = message
def download_xml_build_req(request): """ Download xml of the building request. Args: request: Returns: XML file to download. """ if 'xmlStringOAIPMH' in request.session: # We retrieve the XML file in session xml_string = request.session['xmlStringOAIPMH'] try: xml_tree = XSDTree.build_tree(xml_string) xml_string_encoded = XSDTree.tostring(xml_tree, pretty=True) except: xml_string_encoded = xml_string # Get the date to append it to the file title i = datetime.datetime.now() title = "OAI_PMH_BUILD_REQ_%s_.xml" % i.isoformat() file_obj = StringIO(xml_string_encoded) # Return the XML file response = HttpResponse(FileWrapper(file_obj), content_type='application/xml') response['Content-Disposition'] = 'attachment; filename=' + title return response else: return HttpResponseBadRequest( 'An error occurred. Please reload the page and try again.')
def insert_element_built_in_type(xsd_string, xpath, element_type_name): """Insert element with a builtin type in xsd string. Args: xsd_string: xsd string xpath: xpath where to insert the element element_type_name: name of the type to insert Returns: """ # build the dom tree of the schema being built xsd_tree = XSDTree.build_tree(xsd_string) # get namespaces information for the schema namespaces = get_namespaces(xsd_string) # get the default namespace default_prefix = get_default_prefix(namespaces) # build xpath to element xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) type_name = default_prefix + ':' + element_type_name xsd_tree.find(xpath).append( XSDTree.create_element("{}element".format(LXML_SCHEMA_NAMESPACE), attrib={ 'type': type_name, 'name': element_type_name })) # validate XML schema error = validate_xml_schema(xsd_tree) # if errors, raise exception if error is not None: raise XMLError(error) return XSDTree.tostring(xsd_tree)
def _update_attribute(xsd_string, xpath, attribute, value=None): """Updates an attribute (sets the value or deletes) Args: xsd_string: xpath: xpath of the element to update attribute: name of the attribute to update value: value of the attribute to set Returns: """ # Build the XSD tree xsd_tree = XSDTree.build_tree(xsd_string) # Get namespaces namespaces = get_namespaces(xsd_string) # Get XSD element using its xpath element = get_element_by_xpath(xsd_tree, xpath, namespaces) # Add or update the attribute if value is not None: # Set element attribute with value element.attrib[attribute] = value else: # Deletes attribute if attribute in element.attrib: del element.attrib[attribute] # Converts XSD tree back to string updated_xsd_string = XSDTree.tostring(xsd_tree) return updated_xsd_string
def _update_appinfo_element(xsd_string, xpath, appinfo_name, value=None): """Updates an appinfo element Args: xsd_string: xpath: xpath to element to update appinfo_name: name of the attribute to update value: value to set Returns: """ # Build the XSD tree xsd_tree = XSDTree.build_tree(xsd_string) # Get namespaces namespaces = get_namespaces(xsd_string) # Get XSD element using its xpath element = get_element_by_xpath(xsd_tree, xpath, namespaces) if value is not None: # If a value is provided, create or update the appinfo add_appinfo_child_to_element(element, appinfo_name, value) else: # value is None, deletes the appinfo if present delete_appinfo_child_from_element(element, appinfo_name) # Converts XSD tree back to string updated_xsd_string = XSDTree.tostring(xsd_tree) return updated_xsd_string
def test_delete_appinfo_element_removed_if_exists(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo><attribute>value</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ xpath = "xs:element" attribute_name = "attribute" updated_xsd_string = delete_appinfo_element(xsd_string, xpath, attribute_name) expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation><xs:appinfo/></xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def set_status(data, status, user): """ Set the status of a data Args: data: status: user: Returns: Data """ if status == DataStatus.DELETED and (data.workspace is None or data.workspace.is_public is False): raise exceptions.ModelError( "the " + get_data_label() + " should be published if the targeted status is 'Deleted'") # build the xsd tree xml_tree = XSDTree.build_tree(data.xml_content) # get the root root = xml_tree.getroot() # and change the attribute root.attrib['status'] = status # update the xml content data.xml_content = XSDTree.tostring(xml_tree) # upsert the data return data_api.upsert(data, user)
def test_output_without_method_html_default(self): xsd_string = ( '<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" xmlns:rsm="http://schema.nist.gov/xml/res-md/1.0wd-02-2017">' '<html lang="en"><head></head><body></body></html> </xsl:stylesheet>' ) xslt_parsed = XSDTree.build_tree(xsd_string) extension_result = XSDTree.get_extension(xslt_parsed) self.assertEqual(extension_result, "html")
def test_output_without_method_xml_default(self): xsd_string = ( '<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" xmlns:rsm="http://schema.nist.gov/xml/res-md/1.0wd-02-2017">' '<xsl:output xsl="http://www.w3.org/1999/XSL/Transform" indent="yes" encoding="UTF-8" /> </xsl:stylesheet>' ) xslt_parsed = XSDTree.build_tree(xsd_string) extension_result = XSDTree.get_extension(xslt_parsed) self.assertEqual(extension_result, "xml")
def test_iterparse_method_without_decoded_symbols(self): xsd_string = """ <xs:schema xmlns:xs='http://www.w3.org/2001/XMLSchema'> <ƒ-root><test></test></ƒ-root> </xs:schema> """ with self.assertRaises(XMLError): XSDTree.iterparse(xsd_string, ("end", ))
def test_remove_no_annotations_returns_same_value(self): xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' xsd_tree = XSDTree.build_tree(xsd_string) remove_annotations(xsd_tree) result_xsd_string = XSDTree.tostring(xsd_tree) self.assertTrue(xsd_string == result_xsd_string)
def sanitize(input_value): """Sanitize the strings in the input :param input_value: :return: """ # get the type of the input input_type = type(input_value) # input is a list if input_type == list: clean_value = [] for item in input_value: clean_value.append(sanitize(item)) return clean_value # input is a dict elif input_type == dict: return { sanitize(key): sanitize(val) for key, val in list(input_value.items()) } # input is a string of characters elif input_type == str: try: # XML cleaning xml_cleaner_parser = etree.XMLParser(remove_blank_text=True) xml_data = XSDTree.fromstring(input_value, parser=xml_cleaner_parser) input_value = XSDTree.tostring(xml_data) except XMLError as e: # input is not XML, pass logger.warning("sanitize threw an exception: {0}".format(str(e))) finally: try: json_value = json.loads(input_value) sanitized_value = sanitize(json_value) clean_value = json.dumps(sanitized_value) except ValueError: clean_value = escape(input_value) return clean_value # input is a number elif input_type == int or input_type == float: return input_value # default, escape characters else: # Default sanitizing return escape(str(input_value))
def test_remove_annotations_returns_tree_without_annotations(self): xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:annotation><xs:appinfo/></xs:annotation>' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' expected_xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' xsd_tree = XSDTree.build_tree(xsd_string) remove_annotations(xsd_tree) result_xsd_string = XSDTree.tostring(xsd_tree) self.assertTrue(expected_xsd_string == result_xsd_string)
def format_content_xml(xml_string): """Format XML content. Args: xml_string: Returns: """ try: xml_tree = XSDTree.build_tree(xml_string) return XSDTree.tostring(xml_tree, pretty=True) except Exception: raise exceptions.XMLError("Content is not well formatted XML.")
def insert_element_type(xsd_string, xpath, type_content, element_type_name, include_url, request): """Insert an element of given type in xsd string, and validates result. Args: xsd_string: xsd string xpath: xpath where to insert the element type_content: string content of the type to insert element_type_name: name of the type include_url: url used to reference the type in schemaLocation request: request Returns: """ new_xsd_tree = _insert_element_type(xsd_string, xpath, type_content, element_type_name, include_url) error = validate_xml_schema(new_xsd_tree, request=request) # if errors, raise exception if error is not None: raise XMLError(error) new_xsd_string = XSDTree.tostring(new_xsd_tree) return new_xsd_string
def is_well_formed_xml(xml_string): """True if well formatted XML. Args: xml_string: Returns: """ # is it a valid XML document? try: XSDTree.build_tree(xml_string) except Exception: return False return True
def get_template_with_server_dependencies(xsd_string, dependencies, request=None): """Return the template with schema locations pointing to the server. Args: xsd_string: dependencies: request: Returns: """ # replace includes/imports by API calls (get dependencies starting by the imports) try: xsd_tree = update_dependencies(xsd_string, dependencies) except Exception: raise exceptions.XSDError("Something went wrong during dependency update.") # validate the schema try: error = validate_xml_schema(xsd_tree, request=request) except Exception: raise exceptions.XSDError("Something went wrong during XSD validation.") # is it a valid XML document ? if error is None: updated_xsd_string = XSDTree.tostring(xsd_tree) else: raise exceptions.XSDError(error.replace("'", "")) return updated_xsd_string
def get_record_elt(xml_elt, metadata_prefix): """Init a Record sickle object from a representative xml string. Args: xml_elt: XML string to convert toward Record sickle object. metadata_prefix: Metadata Prefix Returns: Representation of an Oai-Pmh record object. """ record = Record(xml_elt) elt_ = { "identifier": record.header.identifier, "datestamp": record.header.datestamp, "deleted": record.deleted, "sets": record.header.setSpecs, "metadataPrefix": metadata_prefix, "metadata": XSDTree.tostring( record.xml.find(".//" + "{http://www.openarchives.org/OAI/2.0/}" + "metadata/")) if not record.deleted else None, "raw": record.raw, } return elt_
def update_dependencies(xsd_string, dependencies): """Update dependencies of the schemas with given dependencies. Args: xsd_string: dependencies: Returns: """ # build the tree xsd_tree = XSDTree.build_tree(xsd_string) # get the imports xsd_imports = xsd_tree.findall( "{}import".format(xml_utils_constants.LXML_SCHEMA_NAMESPACE) ) # get the includes xsd_includes = xsd_tree.findall( "{}include".format(xml_utils_constants.LXML_SCHEMA_NAMESPACE) ) for schema_location, dependency_id in dependencies.items(): if dependency_id is not None: for xsd_include in xsd_includes: if schema_location == xsd_include.attrib["schemaLocation"]: xsd_include.attrib["schemaLocation"] = _get_schema_location_uri( dependency_id ) for xsd_import in xsd_imports: if schema_location == xsd_import.attrib["schemaLocation"]: xsd_import.attrib["schemaLocation"] = _get_schema_location_uri( dependency_id ) return xsd_tree