def test_delete_appinfo_element_removed_if_exists(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo><attribute>value</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ xpath = "xs:element" attribute_name = "attribute" updated_xsd_string = delete_appinfo_element(xsd_string, xpath, attribute_name) expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation><xs:appinfo/></xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def test_add_appinfo_element_present_in_second_of_two_appinfo(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo></xs:appinfo> <xs:appinfo><attribute>old</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ xpath = "xs:element" updated_xsd_string = delete_appinfo_element(xsd_string, xpath, "attribute") expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation><xs:appinfo/><xs:appinfo/></xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def test_add_appinfo_element_no_element_adds_it(self): xsd_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"><xs:annotation> <xs:appinfo></xs:appinfo></xs:annotation></xs:element> </xs:schema> """ xpath = "xs:element" updated_xsd_string = add_appinfo_element(xsd_string, xpath, "attribute", "value") expected_string = """ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="root"> <xs:annotation> <xs:appinfo><attribute>value</attribute></xs:appinfo> </xs:annotation> </xs:element> </xs:schema> """ updated_tree = XSDTree.fromstring(updated_xsd_string) updated_xsd_string = XSDTree.tostring(updated_tree) expected_tree = XSDTree.fromstring(expected_string) expected_string = XSDTree.tostring(expected_tree) self.assertEqual(updated_xsd_string, expected_string)
def test_generate_extension_with_single_child_attribute_returns_expected_json_dict( self, ): xsd_files = join("attribute", "single") xsd_tree = self.extension_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( "/xs:schema/xs:element/xs:complexType/xs:simpleContent/xs:extension", namespaces=self.namespaces, )[0] xml_tree = self.extension_data_handler.get_xml(xsd_files) xml_data = XSDTree.tostring(xml_tree) edit_data_tree = XSDTree.transform_to_xml(xml_data) # Generate result dict result_dict = self.parser.generate_extension( xsd_element, xsd_tree, full_path="/root", edit_data_tree=edit_data_tree, default_value="entry0", ) # Load expected dictionary and compare with result expected_dict = self.extension_data_handler.get_json(xsd_files + ".reload") self.assertDictEqual(expected_dict, result_dict)
def delete_xsd_element(xsd_string, xpath): """Delete element from tree. Args: xsd_string: xpath: Returns: """ # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get xsd namespaces namespaces = get_namespaces(xsd_string) # get default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # get element to remove from tree element_to_remove = xsd_tree.find(xpath) # remove element from tree element_to_remove.getparent().remove(element_to_remove) # rebuild xsd string xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def get_record_elt(xml_elt, metadata_prefix): """Init a Record sickle object from a representative xml string. Args: xml_elt: XML string to convert toward Record sickle object. metadata_prefix: Metadata Prefix Returns: Representation of an Oai-Pmh record object. """ record = Record(xml_elt) elt_ = { "identifier": record.header.identifier, "datestamp": record.header.datestamp, "deleted": record.deleted, "sets": record.header.setSpecs, "metadataPrefix": metadata_prefix, "metadata": XSDTree.tostring( record.xml.find(".//" + "{http://www.openarchives.org/OAI/2.0/}" + "metadata/")) if not record.deleted else None, "raw": record.raw, } return elt_
def download_xml_build_req(request): """ Download xml of the building request. Args: request: Returns: XML file to download. """ if 'xmlStringOAIPMH' in request.session: # We retrieve the XML file in session xml_string = request.session['xmlStringOAIPMH'] try: xml_tree = XSDTree.build_tree(xml_string) xml_string_encoded = XSDTree.tostring(xml_tree, pretty=True) except: xml_string_encoded = xml_string # Get the date to append it to the file title i = datetime.datetime.now() title = "OAI_PMH_BUILD_REQ_%s_.xml" % i.isoformat() file_obj = StringIO(xml_string_encoded) # Return the XML file response = HttpResponse(FileWrapper(file_obj), content_type='application/xml') response['Content-Disposition'] = 'attachment; filename=' + title return response else: return HttpResponseBadRequest( 'An error occurred. Please reload the page and try again.')
def _update_appinfo_element(xsd_string, xpath, appinfo_name, value=None): """Updates an appinfo element Args: xsd_string: xpath: xpath to element to update appinfo_name: name of the attribute to update value: value to set Returns: """ # Build the XSD tree xsd_tree = XSDTree.build_tree(xsd_string) # Get namespaces namespaces = get_namespaces(xsd_string) # Get XSD element using its xpath element = get_element_by_xpath(xsd_tree, xpath, namespaces) if value is not None: # If a value is provided, create or update the appinfo add_appinfo_child_to_element(element, appinfo_name, value) else: # value is None, deletes the appinfo if present delete_appinfo_child_from_element(element, appinfo_name) # Converts XSD tree back to string updated_xsd_string = XSDTree.tostring(xsd_tree) return updated_xsd_string
def test_reload_simple_content_basic(self): xsd_files = join("simple_content", "basic") xsd_tree = self.complex_type_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( "/xs:schema/xs:complexType", namespaces=self.namespaces )[0] xml_tree = self.complex_type_data_handler.get_xml(xsd_files) xml_data = XSDTree.tostring(xml_tree) edit_data_tree = XSDTree.transform_to_xml(xml_data) xml_value = xml_tree.xpath("/root", namespaces=self.namespaces)[0].text # Generate result dict result_string = self.parser.generate_complex_type( xsd_element, xsd_tree, full_path="/root", edit_data_tree=edit_data_tree, default_value=xml_value, ) # Load expected dictionary and compare with result expected_dict = self.complex_type_data_handler.get_json("%s.reload" % xsd_files) self.assertDictEqual(result_string, expected_dict)
def get_content_by_xpath(xml_string, xpath, namespaces=None): """Get list of xml content by xpath Args: xml_string: xpath: namespaces: Returns: """ # Build lxml tree from xml string xsd_tree = XSDTree.build_tree(xml_string) # Get values at xpath values_list = xsd_tree.xpath(xpath, namespaces=namespaces) # Build list of string values str_values_list = list() # Iterate through all xml elements found for value in values_list: # Get string value for element str_value = value if isinstance(value, str) else XSDTree.tostring(value) # Add value to list str_values_list.append(str_value) # Return list of string values found at xpath return str_values_list
def rename_xsd_element(xsd_string, xpath, new_name): """Rename xsd element. Args: xsd_string: xpath: new_name: Returns: """ # build the xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get the namespaces namespaces = get_namespaces(xsd_string) # get the default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # rename element xsd_tree.find(xpath).attrib["name"] = new_name # rebuild xsd string xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def _retrieve_data(self, request): """Retrieve module's data Args: request: Returns: """ data = "" if request.method == "GET": if "data" in request.GET: data = request.GET["data"] try: xml_string = "" # convert the string to an XML tree xml_data = self.parse_data_with_root(data) for xml_data_element in xml_data: # keep the pretty format of the XML for display xml_string += XSDTree.tostring(xml_data_element, True) data = xml_string if xml_string else request.GET["data"] except XMLError: # If an XML Error is thrown when we want to display the data again # the data may not be valid # so we display the data as is data = request.GET["data"] elif request.method == "POST": if "data" in request.POST: data = request.POST["data"] return data
def set_status(data, status, user): """ Set the status of a data Args: data: status: user: Returns: Data """ if status == DataStatus.DELETED and (data.workspace is None or data.workspace.is_public is False): raise exceptions.ModelError( "the " + get_data_label() + " should be published if the targeted status is 'Deleted'") # build the xsd tree xml_tree = XSDTree.build_tree(data.xml_content) # get the root root = xml_tree.getroot() # and change the attribute root.attrib['status'] = status # update the xml content data.xml_content = XSDTree.tostring(xml_tree) # upsert the data return data_api.upsert(data, user)
def set_xsd_element_occurrences(xsd_string, xpath, min_occurs, max_occurs): """Set occurrences of element. Args: xsd_string: xpath: min_occurs: max_occurs: Returns: """ # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # get namespaces namespaces = get_namespaces(xsd_string) # get default prefix default_prefix = get_default_prefix(namespaces) # set the element namespace xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) # add the element to the sequence element = xsd_tree.find(xpath) element.attrib["minOccurs"] = min_occurs element.attrib["maxOccurs"] = max_occurs # save the tree in the session xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def insert_element_built_in_type(xsd_string, xpath, element_type_name): """Insert element with a builtin type in xsd string. Args: xsd_string: xsd string xpath: xpath where to insert the element element_type_name: name of the type to insert Returns: """ # build the dom tree of the schema being built xsd_tree = XSDTree.build_tree(xsd_string) # get namespaces information for the schema namespaces = get_namespaces(xsd_string) # get the default namespace default_prefix = get_default_prefix(namespaces) # build xpath to element xpath = xpath.replace(default_prefix + ":", LXML_SCHEMA_NAMESPACE) type_name = default_prefix + ':' + element_type_name xsd_tree.find(xpath).append( XSDTree.create_element("{}element".format(LXML_SCHEMA_NAMESPACE), attrib={ 'type': type_name, 'name': element_type_name })) # validate XML schema error = validate_xml_schema(xsd_tree) # if errors, raise exception if error is not None: raise XMLError(error) return XSDTree.tostring(xsd_tree)
def get_hash(xml_string): """ Get the hash of an XML String. Removes blank text, comments, processing instructions and annotations from the input. Allows to retrieve the same hash for two similar XML string. Args: xml_string (str): XML String to hash Returns: str: SHA-1 hash of the XML string """ # Load the required parser hash_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True) etree.set_default_parser(parser=hash_parser) xml_tree = XSDTree.build_tree(xml_string) # Remove all annotations annotations = xml_tree.findall( ".//{http://www.w3.org/2001/XMLSchema}annotation") for annotation in annotations: annotation.getparent().remove(annotation) clean_xml_string = XSDTree.tostring(xml_tree) # Parse XML string into dict xml_dict = xmltodict.parse(clean_xml_string, dict_constructor=dict) # Returns the SHA-1 hash of the ordered dict return hash_dict(xml_dict)
def _update_attribute(xsd_string, xpath, attribute, value=None): """Updates an attribute (sets the value or deletes) Args: xsd_string: xpath: xpath of the element to update attribute: name of the attribute to update value: value of the attribute to set Returns: """ # Build the XSD tree xsd_tree = XSDTree.build_tree(xsd_string) # Get namespaces namespaces = get_namespaces(xsd_string) # Get XSD element using its xpath element = get_element_by_xpath(xsd_tree, xpath, namespaces) # Add or update the attribute if value is not None: # Set element attribute with value element.attrib[attribute] = value else: # Deletes attribute if attribute in element.attrib: del element.attrib[attribute] # Converts XSD tree back to string updated_xsd_string = XSDTree.tostring(xsd_tree) return updated_xsd_string
def get_template_with_server_dependencies(xsd_string, dependencies, request=None): """Return the template with schema locations pointing to the server. Args: xsd_string: dependencies: request: Returns: """ # replace includes/imports by API calls (get dependencies starting by the imports) try: xsd_tree = update_dependencies(xsd_string, dependencies) except Exception: raise exceptions.XSDError("Something went wrong during dependency update.") # validate the schema try: error = validate_xml_schema(xsd_tree, request=request) except Exception: raise exceptions.XSDError("Something went wrong during XSD validation.") # is it a valid XML document ? if error is None: updated_xsd_string = XSDTree.tostring(xsd_tree) else: raise exceptions.XSDError(error.replace("'", "")) return updated_xsd_string
def insert_element_type(xsd_string, xpath, type_content, element_type_name, include_url, request): """Insert an element of given type in xsd string, and validates result. Args: xsd_string: xsd string xpath: xpath where to insert the element type_content: string content of the type to insert element_type_name: name of the type include_url: url used to reference the type in schemaLocation request: request Returns: """ new_xsd_tree = _insert_element_type(xsd_string, xpath, type_content, element_type_name, include_url) error = validate_xml_schema(new_xsd_tree, request=request) # if errors, raise exception if error is not None: raise XMLError(error) new_xsd_string = XSDTree.tostring(new_xsd_tree) return new_xsd_string
def extract_xml_from_table(table_name, table): """Transform table into XML string Args: table_name: table: Returns: """ if not ExcelUploaderModule.is_table_valid(table_name, table): return "" root = XSDTree.create_element("table") root.set("name", table_name) header = XSDTree.create_sub_element(root, "headers") values = XSDTree.create_sub_element(root, "rows") col_index = 0 for header_name in table["headers"]: header_cell = XSDTree.create_sub_element(header, "column") header_cell.set("id", str(col_index)) header_cell.text = header_name col_index += 1 row_index = 0 for value_list in table["values"]: value_row = XSDTree.create_sub_element(values, "row") value_row.set("id", str(row_index)) col_index = 0 for value in value_list: value_cell = XSDTree.create_sub_element(value_row, "column") value_cell.set("id", str(col_index)) value_cell.text = value col_index += 1 row_index += 1 xml_string = XSDTree.tostring(header) xml_string += XSDTree.tostring(values) return xml_string
def test_remove_no_annotations_returns_same_value(self): xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' xsd_tree = XSDTree.build_tree(xsd_string) remove_annotations(xsd_tree) result_xsd_string = XSDTree.tostring(xsd_tree) self.assertTrue(xsd_string == result_xsd_string)
def sanitize(input_value): """Sanitize the strings in the input :param input_value: :return: """ # get the type of the input input_type = type(input_value) # input is a list if input_type == list: clean_value = [] for item in input_value: clean_value.append(sanitize(item)) return clean_value # input is a dict elif input_type == dict: return { sanitize(key): sanitize(val) for key, val in list(input_value.items()) } # input is a string of characters elif input_type == str: try: # XML cleaning xml_cleaner_parser = etree.XMLParser(remove_blank_text=True) xml_data = XSDTree.fromstring(input_value, parser=xml_cleaner_parser) input_value = XSDTree.tostring(xml_data) except XMLError as e: # input is not XML, pass logger.warning("sanitize threw an exception: {0}".format(str(e))) finally: try: json_value = json.loads(input_value) sanitized_value = sanitize(json_value) clean_value = json.dumps(sanitized_value) except ValueError: clean_value = escape(input_value) return clean_value # input is a number elif input_type == int or input_type == float: return input_value # default, escape characters else: # Default sanitizing return escape(str(input_value))
def test_remove_annotations_returns_tree_without_annotations(self): xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:annotation><xs:appinfo/></xs:annotation>' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' expected_xsd_string = '<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">' \ '<xs:element name="integer" type="xs:integer"/></xs:schema>' xsd_tree = XSDTree.build_tree(xsd_string) remove_annotations(xsd_tree) result_xsd_string = XSDTree.tostring(xsd_tree) self.assertTrue(expected_xsd_string == result_xsd_string)
def format_content_xml(xml_string): """Format XML content. Args: xml_string: Returns: """ try: xml_tree = XSDTree.build_tree(xml_string) return XSDTree.tostring(xml_tree, pretty=True) except Exception: raise exceptions.XMLError("Content is not well formatted XML.")
def _xsd_serialize(xsd_tree, pretty_print=False): """ Serialize xsd document Args: xsd_tree: pretty_print: Returns: xsd string """ try: xsd_string = XSDTree.tostring(xsd_tree, pretty=pretty_print) except Exception as e: raise Exception("XSD serialization error : " + str(e)) return xsd_string
def test_reload_complex_type_basic(self): xsd_files = join("complex_type", "basic") xsd_tree = self.element_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath( "/xs:schema/xs:element", namespaces=self.namespaces )[0] xml_tree = self.element_data_handler.get_xml(xsd_files) xml_data = XSDTree.tostring(xml_tree) edit_data_tree = XSDTree.transform_to_xml(xml_data) # Generate result dict result_dict = self.parser.generate_element( xsd_element, xsd_tree, full_path="", edit_data_tree=edit_data_tree ) # Load expected dictionary and compare with result expected_dict = self.element_data_handler.get_json(xsd_files + ".reload") self.assertDictEqual(expected_dict, result_dict)
def remove_single_root_element(xsd_string): """Remove root element from the xsd string. Args: xsd_string: Returns: """ # Build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # find the root element root = xsd_tree.find("{}element".format(LXML_SCHEMA_NAMESPACE)) if root is not None: # remove root element from parent (schema) root.getparent().remove(root) # convert the tree to back string xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string
def test_reload_element_basic(self): xsd_files = join("element", "basic") xsd_tree = self.choice_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath("/xs:schema/xs:complexType/xs:choice", namespaces=self.namespaces)[0] xml_tree = self.choice_data_handler.get_xml(xsd_files) xml_data = XSDTree.tostring(xml_tree) edit_data_tree = XSDTree.transform_to_xml(xml_data) result_string = self.parser.generate_choice( xsd_element, xsd_tree, full_path="/root", edit_data_tree=edit_data_tree) expected_element = self.choice_data_handler.get_json(xsd_files + ".reload") self.assertDictEqual(expected_element, result_string)
def _run_test(self, xsd_files): xsd_tree = self.sequence_data_handler.get_xsd(xsd_files) xsd_element = xsd_tree.xpath("/xs:schema/xs:complexType/xs:sequence", namespaces=self.namespaces)[0] xml_tree = self.sequence_data_handler.get_xml(xsd_files) xml_data = XSDTree.tostring(xml_tree) edit_data_tree = XSDTree.transform_to_xml(xml_data) # Generate result dict result_dict = self.parser.generate_sequence( xsd_element, xsd_tree, full_path="/root", edit_data_tree=edit_data_tree) # Load expected dictionary and compare with result expected_dict = self.sequence_data_handler.get_json(xsd_files + ".reload") return result_dict, expected_dict
def rename_single_root_type(xsd_string, type_name): """Rename the type of the single root element. Args: xsd_string: type_name: Returns: """ # build xsd tree xsd_tree = XSDTree.build_tree(xsd_string) # xpath to the single root element xpath_root = LXML_SCHEMA_NAMESPACE + "element" # xpath to the single root type xpath_root_type = LXML_SCHEMA_NAMESPACE + "complexType" # change the root type name in the xsd tree xsd_tree.find(xpath_root).attrib["type"] = type_name xsd_tree.find(xpath_root_type).attrib["name"] = type_name # rebuild xsd string xsd_string = XSDTree.tostring(xsd_tree) # return xsd string return xsd_string