def __init__(self, codecs=None): """ Instantiates a new DefaultEncoder. @param codecs: : a list of codec instances to use for canonicalization """ Encoder.__init__(self) self.html_codec = HTMLEntityCodec() self.percent_codec = PercentCodec() self.javascript_codec = JavascriptCodec() self.vbscript_codec = VBScriptCodec() self.css_codec = CSSCodec() self.ldap_codec = LDAPCodec() self.ldap_dn_codec = LDAPDNCodec() self.logger = ESAPI.logger("Encoder") # Used for canonicalization self.codecs = [] if codecs is None: self.codecs.append(self.html_codec) self.codecs.append(self.percent_codec) self.codecs.append(self.javascript_codec) # Leaving out css_codec because it eats / characters # Leaving out vbscript_codec because it eats " characters else: for codec in codecs: if not isinstance(codec, Codec): raise TypeError( _("Codecs in list must be instances of children of Codec" )) self.codecs.append(codec)
def test_is_valid_dir_path(self): encoder_class = ESAPI.security_configuration().get_class_for_interface('encoder') validator_class = ESAPI.security_configuration().get_class_for_interface('validator') encoder = encoder_class([HTMLEntityCodec()]) instance = validator_class(encoder) if os.name == 'nt': # Windows # Windows paths that don't exist and thus should fail self.assertFalse(instance.is_valid_directory_path("test", "c:\\ridiculous", "c:\\", False)) self.assertFalse(instance.is_valid_directory_path("test", "c:\\jeff", "c:\\", False)) self.assertFalse(instance.is_valid_directory_path("test", "c:\\temp\\..\\etc", "c:\\", False)) # When the parent directory doesn't exist, these should fail self.assertFalse(instance.is_valid_directory_path("test", "c:\\", "c:\\ridiculous", False)) self.assertFalse(instance.is_valid_directory_path("test", "c:\\", None, False)) # Windows paths that should pass self.assertTrue(instance.is_valid_directory_path("test", "C:\\", "C:\\", False)) # Windows root directory self.assertTrue(instance.is_valid_directory_path("test", "C:\\Windows", "C:\\", False)) # Windows always exist directory # Should fail for files self.assertFalse(instance.is_valid_directory_path("test", "C:\\Windows\\System32\\cmd.exe", "C:\\", False)) # Windows command shell # Testing case insensitivity between input and parent_dir self.assertTrue(instance.is_valid_directory_path("test", "C:\\", "c:\\", False)) # Windows root directory self.assertTrue(instance.is_valid_directory_path("test", "c:\\Windows", "C:\\", False)) # Windows always exist directory # Testing the verification of the parent directory self.assertFalse(instance.is_valid_directory_path("test", "c:\\", "C:\\windows", False)) # Windows always exist directory self.assertFalse(instance.is_valid_directory_path("test", "C:\\", "C:\\windows", False)) # Windows always exist directory # Unix specific paths should not pass self.assertFalse(instance.is_valid_directory_path("test", "/tmp", "/", False)) # Unix Temporary directory self.assertFalse(instance.is_valid_directory_path("test", "/bin/sh", "/", False)) # Unix Standard shell self.assertFalse(instance.is_valid_directory_path("test", "/etc/config", "/", False)) # Unix specific paths that should not exist or work self.assertFalse(instance.is_valid_directory_path("test", "/etc/ridiculous", "/", False)) self.assertFalse(instance.is_valid_directory_path("test", "/tmp/../etc", "/", False)) else: # Windows paths should fail self.assertFalse(instance.is_valid_directory_path("test", "c:\\ridiculous", "c:\\", False)) self.assertFalse(instance.is_valid_directory_path("test", "c:\\temp\\..\\etc", "c:\\", False)) # Standard Windows locations should fail self.assertFalse(instance.is_valid_directory_path("test", "c:\\", "c:\\", False)) self.assertFalse(instance.is_valid_directory_path("test", "c:\\Windows\\temp", "c:\\", False)) self.assertFalse(instance.is_valid_directory_path("test", "c:\\Windows\\System32\\cmd.exe", "c:\\", False)) # Unix specific paths should pass # Root self.assertTrue(instance.is_valid_directory_path("test", "/", "/", False)) # /bin self.assertTrue(instance.is_valid_directory_path("test", "/bin", "/", False)) # Unix specific paths that should not exist or work self.assertFalse(instance.is_valid_directory_path("test", "/etc/ridiculous", "/", False)) self.assertFalse(instance.is_valid_directory_path("test", "/tmp/../etc", "/", False))
def make_file_validator(self): if DefaultValidator.file_validator is not None: return DefaultValidator.file_validator = 'fail' file_codecs = [HTMLEntityCodec(), PercentCodec()] encoder_class = ESAPI.security_configuration().get_class_for_interface( 'encoder') file_encoder = encoder_class(file_codecs) DefaultValidator.file_validator = DefaultValidator(file_encoder)
def test_html_codec(self): instance = ESAPI.encoder() ### High level self.assertEquals(None, instance.encode_for_html(None)) # test invalid characters are replaced with spaces self.assertEquals("a b c d e f	g", instance.encode_for_html("a" + unichr(0) + "b" + unichr(4) + "c" + unichr(128) + "d" + unichr(150) + "e" +unichr(159) + "f" + unichr(9) + "g")) self.assertEquals("<script>", instance.encode_for_html("<script>")) self.assertEquals("&lt;script&gt;", instance.encode_for_html("<script>")) self.assertEquals("!@$%()=+{}[]", instance.encode_for_html("!@$%()=+{}[]")) self.assertEquals(",.-_ ", instance.encode_for_html(",.-_ ")) self.assertEquals("dir&", instance.encode_for_html("dir&")) self.assertEquals("one&two", instance.encode_for_html("one&two")) # Unicode self.assertEquals(unichr(12345), instance.encode_for_html(unichr(12345))) ### Low lovel codec = HTMLEntityCodec() cases = ( # PLAIN - ENCODED - ALT_ENCODINGS ('', '', ()), ('t','t', ()), ('test', 'test', ()), ('<script>', '<script>', ()), ('!@#$%^&*(){}[]?+/=|\\', '!@#$%^&*(){}[]?+/=|\', ()), ('"`~1234_-', '"`~1234_-', ()), (unichr(9), "	", ()), (unichr(12345), unichr(12345), ()), ('\\', '\', ()), ) for case in cases: self.assertEquals(case[ENCODED], codec.encode('', case[PLAIN])) self.assertEquals(case[PLAIN], codec.decode(case[ENCODED])) for encoding in case[ALT_ENCODINGS]: self.assertEquals(case[PLAIN], codec.decode(encoding)) # Bad entity name self.assertEquals("&ridiculous;", codec.decode("&ridiculous;"))
class BotoJsonEncoder(JSONEncoder): # use this codec directly vs using factory which messes with logging config codec = HTMLEntityCodec() IMMUNE_HTML = ',.-_ ' IMMUNE_HTMLATTR = ',.-_' # these are system generated values that aren't a risk for XSS attacks FIELD_WHITELIST = [ 'id', 'image_id', 'kernel_id', 'ramdisk_id', 'reservation_id', 'owner_id', 'root_device_type', 'state', 'state_reason', 'state_code', 'monitored', 'platform', 'volume_id', 'snapshot_id', 'launch_time', 'attach_time', 'create_time', 'start_time', 'instance_type', 'zone', 'progress', 'ip_protocol', 'fingerprint', ] def __sanitize_and_copy__(self, dict): try: ret = copy.copy(dict) # Don't sanitize. We're doing this in the browser now! # Leave this code in for now... #for key in ret.keys(): # if key in self.FIELD_WHITELIST: # continue # if isinstance(ret[key], basestring): # ret[key] = self.codec.encode(self.IMMUNE_HTML, ret[key]) return ret except Exception, e: logging.error(e)
def __init__(self, logger, extra={}): self.logger = logger self.extra = extra # Enable code for html, JS, url and CSS codeclist = [HTMLEntityCodec(), JavascriptCodec(), PercentCodec(), CSSCodec()] self.encoder = SecurityEncoder(codeclist)
class DefaultEncoder(Encoder): """ Reference implementation of the Encoder interface. This implementation takes a whitelist approach to encoding, meaning that everything not specifically identified in a list of "immune" characters is encoded. @author: Craig Younkins ([email protected]) """ IMMUNE_HTML = ',.-_ ' IMMUNE_HTMLATTR = ',.-_' IMMUNE_CSS = '' IMMUNE_JAVASCRIPT = ',._' IMMUNE_VBSCRIPT = ',._' IMMUNE_XML = ',.-_ ' IMMUNE_SQL = ' ' IMMUNE_OS = '-' IMMUNE_XMLATTR = ',.-_' IMMUNE_XPATH = ',.-_ ' IMMUNE_LDAP = '' IMMUNE_LDAP_DN = '' # Unreserved characters as specified in RFC 3986 IMMUNE_URL = '-_.~' def __init__(self, codecs=None): """ Instantiates a new DefaultEncoder. @param codecs: : a list of codec instances to use for canonicalization """ Encoder.__init__(self) self.html_codec = HTMLEntityCodec() self.percent_codec = PercentCodec() self.javascript_codec = JavascriptCodec() self.vbscript_codec = VBScriptCodec() self.css_codec = CSSCodec() self.ldap_codec = LDAPCodec() self.ldap_dn_codec = LDAPDNCodec() self.logger = ESAPI.logger("Encoder") # Used for canonicalization self.codecs = [] if codecs is None: self.codecs.append(self.html_codec) self.codecs.append(self.percent_codec) self.codecs.append(self.javascript_codec) # Leaving out css_codec because it eats / characters # Leaving out vbscript_codec because it eats " characters else: for codec in codecs: if not isinstance(codec, Codec): raise TypeError( _("Codecs in list must be instances of children of Codec" )) self.codecs.append(codec) def canonicalize(self, input_, strict=True): if input_ is None: return None working = input_[:] codecs_found = [] found_count = 0 clean = False while not clean: clean = True # Try each codec and keep track of which ones work for codec in self.codecs: old = working[:] working = codec.decode(working) if old != working: if codec.__class__.__name__ not in codecs_found: codecs_found.append(codec.__class__.__name__) if clean: found_count += 1 clean = False if found_count >= 2 and len(codecs_found) > 1: if strict: raise IntrusionException( _("Input validation failure"), _("Multiple (%(times_encoded)sx) and mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'times_encoded': found_count, 'codecs_found': str(codecs_found), 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Multiple (%s(times_encoded)x) and mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'times_encoded': found_count, 'codecs_found': str(codecs_found), 'input': input_ }) elif found_count >= 2: if strict: raise IntrusionException( _("Input validation failure"), _("Multiple (%s(times_encoded)x) encoding detected in %(input)s" ) % { 'times_encoded': found_count, 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Multiple (%s(times_encoded)x) encoding detected in %(input)s" ) % { 'times_encoded': found_count, 'input': input_ }) elif len(codecs_found) > 1: if strict: raise IntrusionException( _("Input validation failure"), _("Mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'codecs_found': str(codecs_found), 'input': input_ }) else: self.logger.warning( Logger.SECURITY_FAILURE, _("Mixed encoding (%(codecs_found)s) detected in %(input)s" ) % { 'codecs_found': str(codecs_found), 'input': input_ }) return working def encode_for_css(self, input_): return self.css_codec.encode(DefaultEncoder.IMMUNE_CSS, input_) def encode_for_html(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_HTML, input_) def encode_for_html_attribute(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_HTMLATTR, input_) def encode_for_javascript(self, input_): return self.javascript_codec.encode(DefaultEncoder.IMMUNE_JAVASCRIPT, input_) def encode_for_vbscript(self, input_): return self.vbscript_codec.encode(DefaultEncoder.IMMUNE_VBSCRIPT, input_) def encode_for_sql(self, codec, input_): return codec.encode(DefaultEncoder.IMMUNE_SQL, input_) def encode_for_os(self, codec, input_): return codec.encode(DefaultEncoder.IMMUNE_OS, input_) def encode_for_ldap(self, input_): return self.ldap_codec.encode(DefaultEncoder.IMMUNE_LDAP, input_) def encode_for_dn(self, input_): return self.ldap_dn_codec.encode(DefaultEncoder.IMMUNE_LDAP_DN, input_) def encode_for_xpath(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XPATH, input_) def encode_for_xml(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XML, input_) def encode_for_xml_attribute(self, input_): return self.html_codec.encode(DefaultEncoder.IMMUNE_XMLATTR, input_) def encode_for_url(self, input_): return self.percent_codec.encode(DefaultEncoder.IMMUNE_URL, input_) def decode_from_url(self, input_): if input_ is None: return None canonical = self.canonicalize(input_) return self.percent_codec.decode(canonical) def encode_for_base64(self, input_): try: return base64.b64encode(input_) except: return None def decode_from_base64(self, input_): try: return base64.b64decode(input_) except: return None
def test_canonicalize(self): codecs = [HTMLEntityCodec(), PercentCodec()] encoder_class = ESAPI.security_configuration().get_class_for_interface('encoder') instance = encoder_class(codecs) # Test None paths self.assertEquals( None, instance.canonicalize(None)) self.assertEquals( None, instance.canonicalize(None, True)) self.assertEquals( None, instance.canonicalize(None, False)) # test exception paths self.assertEquals( "%", instance.canonicalize("%25", True)) self.assertEquals( "%", instance.canonicalize("%25", False)) self.assertEquals( "%", instance.canonicalize("%25")) self.assertEquals( "%F", instance.canonicalize("%25F")) self.assertEquals( "<", instance.canonicalize("%3c")) self.assertEquals( "<", instance.canonicalize("%3C")) self.assertEquals( "%X1", instance.canonicalize("%X1")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "%", instance.canonicalize("%")) self.assertEquals( "%", instance.canonicalize("%")) self.assertEquals( "%b", instance.canonicalize("%b")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) # percent encoding self.assertEquals( "<", instance.canonicalize("%3c")) self.assertEquals( "<", instance.canonicalize("%3C")) # html entity encoding self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("&lT")) self.assertEquals( "<", instance.canonicalize("&Lt")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<", instance.canonicalize("&lT;")) self.assertEquals( "<", instance.canonicalize("≪")) self.assertEquals( "<", instance.canonicalize("<")) self.assertEquals( "<script>alert(\"hello\");</script>", instance.canonicalize("%3Cscript%3Ealert%28%22hello%22%29%3B%3C%2Fscript%3E") ) self.assertEquals( "<script>alert(\"hello\");</script>", instance.canonicalize("%3Cscript>alert%28%22hello"%29%3B%3C%2Fscript%3E", False) ) # javascript escape syntax js = [JavascriptCodec()] instance = encoder_class( js ) self.assertEquals( "\0", instance.canonicalize("\\0")) self.assertEquals( "\b", instance.canonicalize("\\b")) self.assertEquals( "\t", instance.canonicalize("\\t")) self.assertEquals( "\n", instance.canonicalize("\\n")) self.assertEquals( unichr(0x0b), instance.canonicalize("\\v")) self.assertEquals( "\f", instance.canonicalize("\\f")) self.assertEquals( "\r", instance.canonicalize("\\r")) self.assertEquals( "\'", instance.canonicalize("\\'")) self.assertEquals( "\"", instance.canonicalize("\\\"")) self.assertEquals( "\\", instance.canonicalize("\\\\")) self.assertEquals( "<", instance.canonicalize("\\<")) self.assertEquals( "<", instance.canonicalize("\\u003c")) self.assertEquals( "<", instance.canonicalize("\\U003c")) self.assertEquals( "<", instance.canonicalize("\\u003C")) self.assertEquals( "<", instance.canonicalize("\\U003C")) self.assertEquals( "<", instance.canonicalize("\\x3c")) self.assertEquals( "<", instance.canonicalize("\\X3c")) self.assertEquals( "<", instance.canonicalize("\\x3C")) self.assertEquals( "<", instance.canonicalize("\\X3C")) # css escape syntax # be careful because some codecs see \0 as null byte css = [CSSCodec()] instance = encoder_class( css ) self.assertEquals( "<", instance.canonicalize("\\3c")); # add strings to prevent null byte self.assertEquals( "<", instance.canonicalize("\\03c")) self.assertEquals( "<", instance.canonicalize("\\003c")) self.assertEquals( "<", instance.canonicalize("\\0003c")) self.assertEquals( "<", instance.canonicalize("\\00003c")) self.assertEquals( "<", instance.canonicalize("\\3C")) self.assertEquals( "<", instance.canonicalize("\\03C")) self.assertEquals( "<", instance.canonicalize("\\003C")) self.assertEquals( "<", instance.canonicalize("\\0003C")) self.assertEquals( "<", instance.canonicalize("\\00003C"))