def __init__(self, codecs=None): """ Instantiates a new DefaultEncoder. @param codecs: : a list of codec instances to use for canonicalization """ Encoder.__init__(self) self.html_codec = HTMLEntityCodec() self.percent_codec = PercentCodec() self.javascript_codec = JavascriptCodec() self.vbscript_codec = VBScriptCodec() self.css_codec = CSSCodec() self.ldap_codec = LDAPCodec() self.ldap_dn_codec = LDAPDNCodec() self.logger = ESAPI.logger("Encoder") # Used for canonicalization self.codecs = [] if codecs is None: self.codecs.append(self.html_codec) self.codecs.append(self.percent_codec) self.codecs.append(self.javascript_codec) # Leaving out css_codec because it eats / characters # Leaving out vbscript_codec because it eats " characters else: for codec in codecs: if not isinstance(codec, Codec): raise TypeError( _("Codecs in list must be instances of children of Codec" )) self.codecs.append(codec)
def test_codec_for_css(self): instance = ESAPI.encoder() ### High level self.assertEquals(None, instance.encode_for_css(None)) self.assertEquals("\\3c script\\3e ", instance.encode_for_css("<script>")) self.assertEquals("\\21 \\40 \\24 \\25 \\28 \\29 \\3d \\2b \\7b \\7d \\5b \\5d ", instance.encode_for_css("!@$%()=+{}[]")) # Unicode self.assertEquals(unichr(12345), instance.encode_for_css(unichr(12345))) ### Low level codec = CSSCodec() cases = ( # PLAIN - ENCODED - ALT_ENCODINGS ('', '', ()), # 0 length string ('t','t', ()), ('test', 'test', ()), ('<script>', '\\3c script\\3e ', ()), ('!@#$%^&*(){}[]?+/=|\\', '\\21 \\40 \\23 \\24 \\25 \\5e \\26 \\2a \\28 \\29 \\7b \\7d \\5b \\5d \\3f \\2b \\2f \\3d \\7c \\5c ', ()), ('"`~1234_-', '\\22 \\60 \\7e 1234\\5f \\2d ', ()), (unichr(9), "\\9 ", ()), (unichr(12345), unichr(12345), ()), ('\\', '\\5c ', ('\\')), ('\\2aq', None, ("\\2aq",)), # Malformed hex ('\\2aq ', None, ("\\2aq ",)), # Malformed hex ('\\q ', None, ('\\q ',)), # Malformed hex ) for case in cases: if case[ENCODED] is not None: self.assertEquals(case[ENCODED], codec.encode('', case[PLAIN])) self.assertEquals(case[PLAIN], codec.decode(case[ENCODED])) for encoding in case[ALT_ENCODINGS]: print "encoding=",encoding self.assertEquals(case[PLAIN], codec.decode(encoding))
def __init__(self, logger, extra={}): self.logger = logger self.extra = extra # Enable code for html, JS, url and CSS codeclist = [HTMLEntityCodec(), JavascriptCodec(), PercentCodec(), CSSCodec()] self.encoder = SecurityEncoder(codeclist)
class DefaultEncoder(Encoder): """ Reference implementation of the Encoder interface. This implementation takes a whitelist approach to encoding, meaning that everything not specifically identified in a list of "immune" characters is encoded. @author: Craig Younkins ([email protected]) """ IMMUNE_HTML = ',.-_ ' IMMUNE_HTMLATTR = ',.-_' IMMUNE_CSS = '' IMMUNE_JAVASCRIPT = ',._' IMMUNE_VBSCRIPT = ',._' IMMUNE_XML = ',.-_ ' IMMUNE_SQL = ' ' IMMUNE_OS = '-' IMMUNE_XMLATTR = ',.-_' IMMUNE_XPATH = ',.-_ ' IMMUNE_LDAP = '' IMMUNE_LDAP_DN = '' # Unreserved characters as specified in RFC 3986 IMMUNE_URL = '-_.~' def __init__(self, codecs=None): """ Instantiates a new DefaultEncoder. @param codecs: : a list of codec instances to use for canonicalization """ Encoder.__init__(self) self.html_codec = HTMLEntityCodec() self.percent_codec = PercentCodec() self.javascript_codec = JavascriptCodec() self.vbscript_codec = VBScriptCodec() self.css_codec = CSSCodec() self.ldap_codec = LDAPCodec() self.ldap_dn_codec = LDAPDNCodec() self.logger = ESAPI.logger("Encoder") # Used for canonicalization self.codecs = [] if codecs is None: self.codecs.append(self.html_codec) self.codecs.append(self.percent_codec) self.codecs.append(self.javascript_codec) # Leaving out css_codec because it eats / characters # Leaving out vbscript_codec because it eats " characters else: for codec in codecs: if not isinstance(codec, Codec): raise TypeError( _("Codecs in list must be instances of children of Codec" )) self.codecs.append(codec) def canonicalize(self, input_, strict=True): if input_ is None: return None working = input_[:] codecs_found = [] found_count = 0 clean = False while not clean: clean = True # Try each codec and keep track of which ones work for codec in self.codecs: old = working[:] working = codec.decode(working) if old != working: if codec.__class__.__name__ not in codecs_found: codecs_found.append(codec.__class__.__name__) if clean: found_count += 1 clean = False if found_count >= 2 and len(codecs_found) > 1: if strict: raise IntrusionException( _("Input validation failure"), _("Multiple (%(times_encoded)sx) and mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'times_encoded': found_count, 'codecs_found': str(codecs_found), 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Multiple (%s(times_encoded)x) and mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'times_encoded': found_count, 'codecs_found': str(codecs_found), 'input': input_ }) elif found_count >= 2: if strict: raise IntrusionException( _("Input validation failure"), _("Multiple (%s(times_encoded)x) encoding detected in %(input)s" ) % { 'times_encoded': found_count, 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Multiple (%s(times_encoded)x) encoding detected in %(input)s" ) % { 'times_encoded': found_count, 'input': input_ }) elif len(codecs_found) > 1: if strict: raise IntrusionException( _("Input validation failure"), _("Mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'codecs_found': str(codecs_found), 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'codecs_found': str(codecs_found), 'input': input_ }) return working def encode_for_css(self, input_): return self.css_codec.encode(DefaultEncoder.IMMUNE_CSS, input_) def encode_for_html(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_HTML, input_) def encode_for_html_attribute(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_HTMLATTR, input_) def encode_for_javascript(self, input_): return self.javascript_codec.encode(DefaultEncoder.IMMUNE_JAVASCRIPT, input_) def encode_for_vbscript(self, input_): return self.vbscript_codec.encode(DefaultEncoder.IMMUNE_VBSCRIPT, input_) def encode_for_sql(self, codec, input_): return codec.encode(DefaultEncoder.IMMUNE_SQL, input_) def encode_for_os(self, codec, input_): return codec.encode(DefaultEncoder.IMMUNE_OS, input_) def encode_for_ldap(self, input_): return self.ldap_codec.encode(DefaultEncoder.IMMUNE_LDAP, input_) def encode_for_dn(self, input_): return self.ldap_dn_codec.encode(DefaultEncoder.IMMUNE_LDAP_DN, input_) def encode_for_xpath(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XPATH, input_) def encode_for_xml(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XML, input_) def encode_for_xml_attribute(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XMLATTR, input_) def encode_for_url(self, input_): return self.percent_codec.encode(DefaultEncoder.IMMUNE_URL, input_) def decode_from_url(self, input_): if input_ is None: return None canonical = self.canonicalize(input_) return self.percent_codec.decode(canonical) def encode_for_base64(self, input_): try: return base64.b64encode(input_) except: return None def decode_from_base64(self, input_): try: return base64.b64decode(input_) except: return None
def test_canonicalize(self): codecs = [HTMLEntityCodec(), PercentCodec()] encoder_class = ESAPI.security_configuration().get_class_for_interface('encoder') instance = encoder_class(codecs) # Test None paths self.assertEquals( None, instance.canonicalize(None)) self.assertEquals( None, instance.canonicalize(None, True)) self.assertEquals( None, instance.canonicalize(None, False)) # test exception paths self.assertEquals( "%", instance.canonicalize("%25", True)) self.assertEquals( "%", instance.canonicalize("%25", False)) self.assertEquals( "%", instance.canonicalize("%25")) self.assertEquals( "%F", instance.canonicalize("%25F")) self.assertEquals( "<", instance.canonicalize("%3c")) self.assertEquals( "<", instance.canonicalize("%3C")) self.assertEquals( "%X1", instance.canonicalize("%X1")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "%", instance.canonicalize("%")) self.assertEquals( "%", instance.canonicalize("%")) self.assertEquals( "%b", instance.canonicalize("%b")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) # percent encoding self.assertEquals( "<", instance.canonicalize("%3c")) self.assertEquals( "<", instance.canonicalize("%3C")) # html entity encoding self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("&lT")) self.assertEquals( "<", instance.canonicalize("&Lt")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("&lT;")) self.assertEquals( "<", instance.canonicalize("≪")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<script>alert(\"hello\");</script>", instance.canonicalize("%3Cscript%3Ealert%28%22hello%22%29%3B%3C%2Fscript%3E") ) self.assertEquals( "<script>alert(\"hello\");</script>", instance.canonicalize("%3Cscript>alert%28%22hello"%29%3B%3C%2Fscript%3E", False) ) # javascript escape syntax js = [JavascriptCodec()] instance = encoder_class( js ) self.assertEquals( "\0", instance.canonicalize("\\0")) self.assertEquals( "\b", instance.canonicalize("\\b")) self.assertEquals( "\t", instance.canonicalize("\\t")) self.assertEquals( "\n", instance.canonicalize("\\n")) self.assertEquals( unichr(0x0b), instance.canonicalize("\\v")) self.assertEquals( "\f", instance.canonicalize("\\f")) self.assertEquals( "\r", instance.canonicalize("\\r")) self.assertEquals( "\'", instance.canonicalize("\\'")) self.assertEquals( "\"", instance.canonicalize("\\\"")) self.assertEquals( "\\", instance.canonicalize("\\\\")) self.assertEquals( "<", instance.canonicalize("\\<")) self.assertEquals( "<", instance.canonicalize("\\u003c")) self.assertEquals( "<", instance.canonicalize("\\U003c")) self.assertEquals( "<", instance.canonicalize("\\u003C")) self.assertEquals( "<", instance.canonicalize("\\U003C")) self.assertEquals( "<", instance.canonicalize("\\x3c")) self.assertEquals( "<", instance.canonicalize("\\X3c")) self.assertEquals( "<", instance.canonicalize("\\x3C")) self.assertEquals( "<", instance.canonicalize("\\X3C")) # css escape syntax # be careful because some codecs see \0 as null byte css = [CSSCodec()] instance = encoder_class( css ) self.assertEquals( "<", instance.canonicalize("\\3c")); # add strings to prevent null byte self.assertEquals( "<", instance.canonicalize("\\03c")) self.assertEquals( "<", instance.canonicalize("\\003c")) self.assertEquals( "<", instance.canonicalize("\\0003c")) self.assertEquals( "<", instance.canonicalize("\\00003c")) self.assertEquals( "<", instance.canonicalize("\\3C")) self.assertEquals( "<", instance.canonicalize("\\03C")) self.assertEquals( "<", instance.canonicalize("\\003C")) self.assertEquals( "<", instance.canonicalize("\\0003C")) self.assertEquals( "<", instance.canonicalize("\\00003C"))
def test_encoder_constructor_exception(self): """ Checks that only valid codecs are allowed. """ codecs = [CSSCodec(), str] self.assertRaises(TypeError, ESAPI.encoder, codecs)