def build_etree(self, xml_string): try: etree = ET.fromstring(self.re_encode_string(xml_string)) except ET.ParseError as e: logger = RequiredFeature('logger').request() logger.error("Building ETree from XML failed: %s. Offending string follows ..." % e.message) logger.error(xml_string) raise ValueError("Building ETree Failed") return etree
def build_etree(self, xml_string): try: etree = ET.fromstring(self.re_encode_string(xml_string)) except ET.ParseError as e: logger = RequiredFeature('logger').request() logger.error( "Building ETree from XML failed: %s. Offending string follows ..." % e.message) logger.error(xml_string) raise ValueError("Building ETree Failed") return etree
def re_encode_string(self, xml_string): logger = RequiredFeature('logger').request() regex = re.compile('UTF-\d{1,2}') specified_encoding = regex.search(xml_string) logger.info("Trying to decode as: %s" % 'ASCII') try: xml_string = xml_string.decode(encoding='ascii') except UnicodeDecodeError as e: logger.info("Decoding as %s failed, trying as %s" % ('ASCII', 'UTF-8')) try: xml_string = xml_string.decode(encoding='UTF-8') except UnicodeDecodeError as e: logger.info("Decoding as %s failed, trying as %s" % ('UTF-8', 'UTF-16')) try: xml_string = xml_string.decode(encoding='UTF-16') except UnicodeDecodeError as e: logger.error( "Decoding as UTF-16 failed, this was the last attempt. Offending string follows ..." ) logger.error(xml_string) raise ValueError("String Decode Failed") if specified_encoding is not None: try: logger.info("Trying to encode as specified in XML: %s" % specified_encoding.group(0)) xml_string = xml_string.encode( encoding=specified_encoding.group(0)) except UnicodeEncodeError as e: new_encode_setting = 'UTF-16' if specified_encoding.group( 0) == 'UTF-8' else 'UTF-8' logger.info("Encoding as %s failed, trying as %s" % (specified_encoding.group(0), new_encode_setting)) try: xml_string = xml_string.encode(encoding=new_encode_setting) except UnicodeEncodeError as e: logger.error( "Encoding as %s failed, this was the last attempt. Offending string follows ..." % new_encode_setting) logger.error(xml_string) raise ValueError("String Encode Failed") return xml_string else: logger.info("RegExp couldn't find a match in the XML string ...") try: logger.info("Trying to encode as: UTF-8") xml_string = xml_string.encode(encoding='UTF-8') except UnicodeEncodeError as e: logger.info("Encoding as UTF-8 failed, trying as UTF-16") try: xml_string = xml_string.encode(encoding='UTF-16') except UnicodeEncodeError as e: logger.error( "Encoding as UTF-16 failed, this was the last attempt. Offending string follows ..." ) logger.error(xml_string) raise ValueError("String Encode Failed") return xml_string
def re_encode_string(self, xml_string): logger = RequiredFeature('logger').request() regex = re.compile('UTF-\d{1,2}') specified_encoding = regex.search(xml_string) logger.info("Trying to decode as: %s" % 'ASCII') try: xml_string = xml_string.decode(encoding='ascii') except UnicodeDecodeError as e: logger.info("Decoding as %s failed, trying as %s" % ('ASCII', 'UTF-8')) try: xml_string = xml_string.decode(encoding='UTF-8') except UnicodeDecodeError as e: logger.info("Decoding as %s failed, trying as %s" % ('UTF-8', 'UTF-16')) try: xml_string = xml_string.decode(encoding='UTF-16') except UnicodeDecodeError as e: logger.error("Decoding as UTF-16 failed, this was the last attempt. Offending string follows ...") logger.error(xml_string) raise ValueError("String Decode Failed") if specified_encoding is not None: try: logger.info("Trying to encode as specified in XML: %s" % specified_encoding.group(0)) xml_string = xml_string.encode(encoding=specified_encoding.group(0)) except UnicodeEncodeError as e: new_encode_setting = 'UTF-16' if specified_encoding.group(0) == 'UTF-8' else 'UTF-8' logger.info("Encoding as %s failed, trying as %s" % (specified_encoding.group(0), new_encode_setting)) try: xml_string = xml_string.encode(encoding=new_encode_setting) except UnicodeEncodeError as e: logger.error( "Encoding as %s failed, this was the last attempt. Offending string follows ..." % new_encode_setting) logger.error(xml_string) raise ValueError("String Encode Failed") return xml_string else: logger.info("RegExp couldn't find a match in the XML string ...") try: logger.info("Trying to encode as: UTF-8") xml_string = xml_string.encode(encoding='UTF-8') except UnicodeEncodeError as e: logger.info("Encoding as UTF-8 failed, trying as UTF-16") try: xml_string = xml_string.encode(encoding='UTF-16') except UnicodeEncodeError as e: logger.error("Encoding as UTF-16 failed, this was the last attempt. Offending string follows ...") logger.error(xml_string) raise ValueError("String Encode Failed") return xml_string