示例#1
0
def test_gzip():
    assert "string" == encoding.decode(
        "gzip",
        encoding.encode(
            "gzip",
            "string"))
    assert None == encoding.decode("gzip", "bogus")
示例#2
0
 def test_simple(self):
     assert "string" == encoding.decode(
         "deflate", encoding.encode("deflate", "string"))
     assert "string" == encoding.decode(
         "deflate",
         encoding.encode("deflate", "string")[2:-4])
     assert None == encoding.decode("deflate", "bogus")
示例#3
0
def test_deflate():
    assert "string" == encoding.decode(
        "deflate",
        encoding.encode(
            "deflate",
            "string"))
    assert "string" == encoding.decode(
        "deflate",
        encoding.encode(
            "deflate",
            "string")[
            2:-
            4])
    assert None == encoding.decode("deflate", "bogus")
    def handle_response(self, flow):
        hid = (flow.request.host, flow.request.port)

        # We only inject into html responses
        if flow.response.headers['Content-Type'] and str(
                flow.response.headers['Content-Type'][0]).startswith(
                    'text/html'):

            # Decoding of message body (zip/deflate) needed?
            body = flow.response.content
            if flow.response.headers['Content-Encoding']:
                body = encoding.decode(
                    flow.response.headers['Content-Encoding'][0],
                    flow.response.content)

            # We inject the js code directly before </head>
            injected_url = "http://" + self.options.target + self.options.path
            injected_code = "<script language=\"javascript\" type=\"text/javascript\" src=\"" + injected_url + "\"></script>"
            try:
                body = body.replace("</head>", injected_code + "</head>")
            except:
                pass

            # (Re)Encoding needed?
            if flow.response.headers['Content-Encoding']:
                body = encoding.encode(
                    flow.response.headers['Content-Encoding'][0], body)

            flow.response.content = body

        flow.reply()
示例#5
0
    def handle_response(self, flow):
        url = flow.request.url
        method = flow.request.method
        content1 = flow.request.content
        resc = flow.response.headers.get('Content-type')
        reqc = flow.request.headers.get('Content-type')
        host = flow.request.host
        port = flow.request.port
        if lytool.filter_url(url) or self.ana2(reqc) or self.ana2(
                resc) or self.ana_host(host, port):
            pass
        else:
            host = flow.request.host
            port = flow.request.port
            url = unquote_plus(flow.request.url)
            path = flow.request.path
            scheme = flow.request.scheme
            method = flow.request.method
            httpversion = flow.request.httpversion
            headers1 = flow.request.headers
            headers2 = flow.response.headers
            content1 = flow.request.content
            content = flow.response.content
            gzipped = flow.response.headers.get_first("content-encoding")
            if gzipped:
                content2 = encoding.decode(gzipped, content)
            else:
                content2 = content
            self.db_save(host, port, url, path, method, scheme, httpversion,
                         headers1, content1, headers2, content2)

        flow.reply()
    def handle_response(self, flow):
        hid = (flow.request.host, flow.request.port)

        # We only inject into html responses
        if flow.response.headers['Content-Type'] and str(flow.response.headers['Content-Type'][0]).startswith('text/html'):

            # Decoding of message body (zip/deflate) needed?
            body = flow.response.content
            if flow.response.headers['Content-Encoding']:
                body = encoding.decode(flow.response.headers['Content-Encoding'][0],flow.response.content)

            # We inject the js code directly before </head>
            injected_url = "http://" + self.options.target + self.options.path
            injected_code = "<script language=\"javascript\" type=\"text/javascript\" src=\"" + injected_url + "\"></script>"
            try:
                body = body.replace("</head>", injected_code + "</head>")
            except:
                pass

            # (Re)Encoding needed?
            if flow.response.headers['Content-Encoding']:
                body = encoding.encode(flow.response.headers['Content-Encoding'][0], body)

            flow.response.content = body

        flow.reply()
示例#7
0
 def addFlow(self, flow):
     """
     Adds a flow to all lists in the corresponding format
     """
     flowRepr = flow._get_state()
     flowRepr["id"] = len(self._flows_serialized)
     
     #In transparent mode, we are unsure about the actual host, but we want to show it in the GUI.
     #Firstly, we get the Host from the request headers.
     #As this might be faked, we go on and check whether the request IP matches one of the DNS entries belonging to the headerHost
     if(True or FlowCollection.regex_isip.match(flowRepr["request"]["host"])):
         try:
             headerHost = flow.request.headers["Host"]
             if(headerHost):
                 headerHost = headerHost[0]
                 info = socket.getaddrinfo(flowRepr["request"]["host"], flowRepr["request"]["port"],0,0,socket.SOL_TCP)
                 for i in info:
                     if(i[4][0] == flowRepr["request"]["host"] and i[4][1] == flowRepr["request"]["port"]):
                         flowRepr["request"]["hostFormatted"] = headerHost
                         break
         except socket.gaierror:
             pass
         except:
             import traceback
             print traceback.format_exc()
         
         
     decoded_content = {}
     for i in ["request","response"]:
         #strip content out of the flowRepr
         flowRepr[i]["contentLength"] = len(flowRepr[i]["content"])
         del flowRepr[i]["content"]
         
         r = getattr(flow,i)
         decoded = r.content
         
         #decode with http content-encoding
         ce = r.headers["content-encoding"]
         if ce and ce[0] in encoding.ENCODINGS:
             decoded = encoding.decode(ce[0],r.content)
         
         #decode with http content-type encoding
         ct = r.headers["content-type"]
         default_charset = "latin-1" #HTTP 1.1 says that the default charset is ISO-8859-1
         charset = default_charset
         if ct:
             m = FlowCollection.regex_charset.search(ct[0])
             if m:
                 charset = m.group(1).strip('"').strip('"\'')
         #TODO: guess from html metadata
         try:
             decoded = decoded.decode(charset)
         except:
             try:
                 decoded = decoded.decode(default_charset)
             except:
                 print "Warning: Could not decode request."
                 import traceback
                 print traceback.format_exc()
                 
         decoded_content[i] = decoded
     
     self._flows.append(flow)
     self._flows_serialized.append(flowRepr)
     self._decoded_contents.append(decoded_content)
     return len(self._flows_serialized)-1
示例#8
0
def test_gzip():
    assert "string" == encoding.decode("gzip", encoding.encode("gzip", "string"))
    assert None == encoding.decode("gzip", "bogus")
示例#9
0
    def add(self, flow):
        """
        Gets called whenever a new flow has been added.
        """

        #dumping empty flows is stupid
        if (len(flow.response.content) == 0):
            return

        content = flow.response.content
        enc = flow.response.headers.get("content-encoding")
        if enc and enc[0] != "identity":
            decoded = encoding.decode(enc[0], content)
            if decoded:
                content = decoded

        #get host and path
        host = flow.request.host
        if (flow.request.port != 80):
            host += "-" + str(flow.request.port)
        path = unquote(
            flow.request.path.split("#")[0].split("?")[0].lstrip("/\\"))
        if (path == ""):
            path = "__root__"

        #subdir is our relative path
        subdir = os.path.join(host, path)

        #remove invalid characters
        subdir = os.path.normpath(allowed_chars.sub('_', subdir))

        #forbid relative directory changes.
        subdir = "/".join(
            i.lstrip(".") for i in subdir.replace("\\", "/").split("/"))
        subdir = "/".join(i[:20] + "[...]" + i[-20:] if (len(i) > 40) else i
                          for i in subdir.split("/"))

        #cut off too long filenames
        MAX_DIR_LENGTH = 150
        MAX_FILE_LENGTH = 50
        MAX_EXT_LENGTH = 30
        if (len(subdir) > MAX_DIR_LENGTH):
            if (subdir[MAX_DIR_LENGTH] == "/"):
                subdir = subdir[0:MAX_DIR_LENGTH + 1]
            else:
                subdir = subdir[0:MAX_DIR_LENGTH]
            subdir += "[...]"

        #ensure that subdir is relative, otherwise it could exploit outside of self.path
        #os.path.join(foo,"/bar") => "/bar"
        subdir = os.path.normpath("./" + subdir)

        filename = os.path.join(self.path, subdir)

        #We have the problematic situation that a both foo.com/bar
        #and foo.com/bar/baz can be both valid files.
        #However, we cannot create both a folder and a file both called "baz" in the same directory
        #A possible approach would be using folders for everything and placing __resource__ files in them.
        #While this would be a much consistent structure, it doesn't represent the file system very well.
        #As this view is for visualization purposes only, we took the approach to append [dir] to conflicting folders.
        #to accomplish this, we use a slightly modified version of os.makedirs
        def makedirs(directory):
            head, tail = os.path.split(directory)
            if not os.path.isdir(head):
                head = makedirs(head)
                directory = os.path.join(head, tail)
            if (os.path.isfile(directory)
                ):  #our special case - rename current dir
                tail += "[dir]"
                directory = os.path.join(head, tail)
                return makedirs(directory)
            if (not os.path.isdir(directory)):
                os.mkdir(directory)
            return directory

        d, filename = os.path.split(filename)
        filename = os.path.join(makedirs(d), filename)

        filename, ext = os.path.splitext(filename)
        if (len(filename) > MAX_DIR_LENGTH + MAX_FILE_LENGTH):
            filename = filename[0:MAX_DIR_LENGTH + MAX_FILE_LENGTH] + "[...]"
        if (len(ext) >= MAX_EXT_LENGTH):
            ext = "[..]" + ext[-MAX_EXT_LENGTH:]
        appendix = ""

        #rename if file already exists and content is different
        if (os.path.isdir(filename + ext)):
            os.rename(filename + ext, filename + ext + "[dir]")
        while (os.path.isfile(filename + str(appendix) + ext)):
            with open(filename + str(appendix) + ext, "rb") as f:
                s = f.read()
                if (s == content):
                    return
            if (appendix == ""):
                appendix = 1
            else:
                appendix += 1

        filename = filename + str(appendix) + ext

        with open(filename, 'wb') as f:
            f.write(str(content))
示例#10
0
 def test_simple(self):
     assert "string" == encoding.decode("identity", "string")
     assert "string" == encoding.encode("identity", "string")
     assert not encoding.encode("nonexistent", "string")
示例#11
0
    def addFlow(self, flow):
        """
        Adds a flow to all lists in the corresponding format
        """
        flowRepr = flow._get_state()
        flowRepr["id"] = len(self._flows_serialized)

        #In transparent mode, we are unsure about the actual host, but we want to show it in the GUI.
        #Firstly, we get the Host from the request headers.
        #As this might be faked, we go on and check whether the request IP matches one of the DNS entries belonging to the headerHost
        if (FlowCollection.regex_isip.match(flowRepr["request"]["host"])):
            try:
                headerHost = flow.request.headers["Host"]
                if (headerHost):
                    headerHost = headerHost[0]
                    info = socket.getaddrinfo(flowRepr["request"]["host"],
                                              flowRepr["request"]["port"], 0,
                                              0, socket.SOL_TCP)
                    for i in info:
                        if i[4][0] == flowRepr["request"]["host"]:
                            flowRepr["request"]["host_guess"] = headerHost
                            break
            except socket.gaierror:
                pass
            except:
                import traceback
                print traceback.format_exc()

        #Save decoded content
        decoded_content = {}
        algorithms = ["md5", "sha1", "sha256"]
        for i in ["request", "response"]:
            #strip content out of the flowRepr
            flowRepr[i]["contentLength"] = len(flowRepr[i]["content"])
            del flowRepr[i]["content"]

            r = getattr(flow, i)
            decoded = r.content

            #decode with http content-encoding
            try:
                ce = r.headers["content-encoding"]
                if ce and ce[0] in encoding.ENCODINGS:
                    decoded_ = encoding.decode(ce[0], decoded)
                    if decoded_ != None:  #If the decoding fails, encoding.decode returns None.
                        decoded = decoded_
            except:
                print "Warning: Data cannot be decoded with given Content Encoding."

            #calculate hashsums
            flowRepr[i]["contentChecksums"] = {}
            parts = {"Checksum": decoded}

            #Handle multipart checksums
            if i == "request":
                try:
                    headers = dict(
                        map(str.lower, map(str, a)) for a in
                        flow.request.headers)  # odict -> (lowered) dict
                    fs = cgi.FieldStorage(StringIO.StringIO(decoded),
                                          headers,
                                          environ={'REQUEST_METHOD': 'POST'})
                    parts = getParts(fs)
                except Exception as e:
                    import traceback
                    traceback.print_exc()
                    print "Warning: Cannot decode multipart"

            for item, data in parts.viewitems():
                checksums = {}
                for a in algorithms:
                    checksums[a] = getattr(hashlib, a)(data).hexdigest()
                flowRepr[i]["contentChecksums"][item] = checksums

            #decode with http content-type encoding
            ct = r.headers["content-type"]
            default_charset = "latin-1"  #HTTP 1.1 says that the default charset is ISO-8859-1
            #RFC2616 3.7.1
            charset = default_charset
            if ct:
                m = FlowCollection.regex_charset.search(ct[0])
                if m:
                    charset = m.group(1).strip('"').strip('"\'')
            #TODO: guess from html metadata
            try:
                decoded = decoded.decode(charset)
            except:
                try:
                    decoded = decoded.decode(default_charset)
                except:
                    print "Warning: Could not decode request."
                    import traceback
                    print traceback.format_exc()

            try:
                decoded = decoded.encode('utf-8')
            except:
                print "Warning: Cannot encode request to utf8"
            decoded_content[i] = decoded

        self._flows.append(flow)
        self._flows_serialized.append(flowRepr)
        self._decoded_contents.append(decoded_content)
        return len(self._flows_serialized) - 1
示例#12
0
 def test_simple(self):
     assert "string" == encoding.decode("identity", "string")
     assert "string" == encoding.encode("identity", "string")
     assert not encoding.encode("nonexistent", "string")
示例#13
0
 def test_simple(self):
     assert "string" == encoding.decode("gzip",
                                        encoding.encode("gzip", "string"))
     assert None == encoding.decode("gzip", "bogus")
示例#14
0
 def test_fallthrough(self):
     assert None == encoding.decode("nonexistent encoding", "string")
示例#15
0
 def addFlow(self, flow):
     """
     Adds a flow to all lists in the corresponding format
     """
     flowRepr = flow._get_state()
     flowRepr["id"] = len(self._flows_serialized)
     
     #In transparent mode, we are unsure about the actual host, but we want to show it in the GUI.
     #Firstly, we get the Host from the request headers.
     #As this might be faked, we go on and check whether the request IP matches one of the DNS entries belonging to the headerHost
     if(True or FlowCollection.regex_isip.match(flowRepr["request"]["host"])):
         try:
             headerHost = flow.request.headers["Host"]
             if(headerHost):
                 headerHost = headerHost[0]
                 info = socket.getaddrinfo(flowRepr["request"]["host"], flowRepr["request"]["port"],0,0,socket.SOL_TCP)
                 for i in info:
                     if(i[4][0] == flowRepr["request"]["host"] and i[4][1] == flowRepr["request"]["port"]):
                         flowRepr["request"]["hostFormatted"] = headerHost
                         break
         except socket.gaierror:
             pass
         except:
             import traceback
             print traceback.format_exc()
         
     
     #Save decoded content    
     decoded_content = {}
     for i in ["request","response"]:
         #strip content out of the flowRepr
         flowRepr[i]["contentLength"] = len(flowRepr[i]["content"])
         del flowRepr[i]["content"]
         
         r = getattr(flow,i)
         decoded = r.content
         
         #decode with http content-encoding
         try:
             ce = r.headers["content-encoding"]
             if ce and ce[0] in encoding.ENCODINGS:
                 decoded = encoding.decode(ce[0],r.content)
         except:
             print "Warning: Data cannot be decoded with given Content Encoding."
         
         #decode with http content-type encoding
         ct = r.headers["content-type"]
         default_charset = "latin-1" #HTTP 1.1 says that the default charset is ISO-8859-1
         #RFC2616 3.7.1
         charset = default_charset
         if ct:
             m = FlowCollection.regex_charset.search(ct[0])
             if m:
                 charset = m.group(1).strip('"').strip('"\'')
         #TODO: guess from html metadata
         try:
             decoded = decoded.decode(charset)
         except:
             try:
                 decoded = decoded.decode(default_charset)
             except:
                 print "Warning: Could not decode request."
                 import traceback
                 print traceback.format_exc()
         
         try:
             decoded = decoded.encode('utf-8')
         except:
             print "Warning: Cannot encode request to utf8"
         decoded_content[i] = decoded
     
     #calculate hashsums
     algorithms = ["md5","sha256"]
     for i in ["request","response"]:
         
         flowRepr[i]["contentChecksums"] = {}
         
         parts = {"Checksum":decoded_content[i]}
         
         #Handle multipart checksums
         if i == "request":        
             try:
                 headers = dict(map(str.lower, map(str,a)) for a in flow.request.headers) # odict -> (lowered) dict
                 fs = cgi.FieldStorage(StringIO.StringIO(decoded_content[i]),headers,environ={ 'REQUEST_METHOD':'POST' })
                 parts = getParts(fs)
             except Exception as e:
                 import traceback
                 traceback.print_exc()
                 print "Warning: Cannot decode multipart"
         
         #TODO: Analyze request and split it up into parameters to match file upload
         for item, data in parts.viewitems():
             checksums = {}
             for a in algorithms:
                 checksums[a] = getattr(hashlib,a)(data).hexdigest()
             flowRepr[i]["contentChecksums"][item] = checksums
     
     
     self._flows.append(flow)
     self._flows_serialized.append(flowRepr)
     self._decoded_contents.append(decoded_content)
     return len(self._flows_serialized)-1
示例#16
0
 def test_simple(self):
     assert "string" == encoding.decode("gzip", encoding.encode("gzip", "string"))
     assert None == encoding.decode("gzip", "bogus")
示例#17
0
def test_identity():
    assert "string" == encoding.decode("identity", "string")
    assert "string" == encoding.encode("identity", "string")
    assert not encoding.encode("nonexistent", "string")
    assert None == encoding.decode("nonexistent encoding", "string")
示例#18
0
 def test_fallthrough(self):
     assert None == encoding.decode("nonexistent encoding", "string")
示例#19
0
 def add(self, flow):
     """
     Gets called whenever a new flow has been added.
     """
     
     #dumping empty flows is stupid
     if(len(flow.response.content) == 0):
         return
     
     #FIXME: What about content type charset?
     content = flow.response.content
     enc = flow.response.headers.get("content-encoding")
     if enc and enc[0] != "identity":
         decoded = encoding.decode(enc[0], content)
         if decoded:
             content = decoded
     
     #get host and path
     host = flow.request.host
     if(flow.request.port != 80):
         host += "-"+str(flow.request.port)
     path = flow.request.path.split("#")[0].split("?")[0].lstrip("/\\")
     if(path == ""):
         path = "__root__"
     
     #subdir is our relative path
     subdir = os.path.join(host,path)
     
     #forbid relative directory changes.
     subdir = "/".join(i.lstrip(".") for i in subdir.replace("\\","/").split("/"))
     subdir = "/".join(i[:20]+"[...]"+i[-20:] if (len(i) > 40) else i for i in subdir.split("/"))
     
     #remove invalid characters
     subdir = os.path.normpath("".join(i for i in subdir if i not in r':*?"<>|'))
     
     #cut off too long filenames
     MAX_DIR_LENGTH  = 150
     MAX_FILE_LENGTH = 50
     MAX_EXT_LENGTH  = 30
     if(len(subdir) > MAX_DIR_LENGTH):
         if(subdir[MAX_DIR_LENGTH] == "/"):
             subdir = subdir[0:MAX_DIR_LENGTH+1]
         else:
             subdir = subdir[0:MAX_DIR_LENGTH]
         subdir += "[...]"
     
     #ensure that subdir is relative, otherwise it could exploit outside of self.path
     #os.path.join(foo,"/bar") => "/bar"
     subdir = os.path.normpath("./"+subdir)
     
     filename = os.path.join(self.path,subdir)
     
     #We have the problematic situation that a both foo.com/bar
     #and foo.com/bar/baz can be both valid files.
     #However, we cannot create both a folder and a file both called "baz" in the same directory
     #A possible approach would be using folders for everything and placing __resource__ files in them.
     #While this would be a much consistent structure, it doesn't represent the file system very well.
     #As this view is for visualization purposes only, we took the approach to append [dir] to conflicting folders.
     #to accomplish this, we use a slightly modified version of os.makedirs
     def makedirs(directory):
         head,tail = os.path.split(directory)
         if not os.path.isdir(head):
             head = makedirs(head)
             directory = os.path.join(head,tail)
         if(os.path.isfile(directory)): #our special case - rename current dir
             tail += "[dir]"
             directory = os.path.join(head,tail)
             return makedirs(directory)
         if(not os.path.isdir(directory)):
             os.mkdir(directory)  
         return directory
     d, filename = os.path.split(filename)
     filename = os.path.join(makedirs(d),filename)
     
     filename, ext = os.path.splitext(filename)
     if(len(filename) > MAX_DIR_LENGTH+MAX_FILE_LENGTH):
         filename = filename[0:MAX_DIR_LENGTH+MAX_FILE_LENGTH]+"[...]"
     if(len(ext) >= MAX_EXT_LENGTH):
         ext = "[..]" + ext[-MAX_EXT_LENGTH:]
     appendix = ""
     
     
     #rename if file already exists and content is different
     if(os.path.isdir(filename+ext)):
         os.rename(filename+ext, filename+ext+"[dir]")
     while(os.path.isfile(filename+str(appendix)+ext)):
         with open(filename+str(appendix)+ext) as f:
             s = f.read()
             if(s == content):
                 return
         if(appendix == ""):
             appendix = 1
         else:
             appendix += 1
         
     filename = filename + str(appendix) + ext
                 
     with open(filename, 'wb') as f:
         f.write(str(content))