Exemplo n.º 1
0
def har_write(input_filename, output_filename):
    with open(input_filename) as input_file, open(output_filename,
                                                  'w') as output_file:
        flow_reader = flow.FlowReader(input_file)
        flows = [fl for fl in flow_reader.stream() if fl.response]
        har = create_har(flows)
        output_file.write(json.dumps(har))
Exemplo n.º 2
0
 def test_write(self):
     with tutils.tmpdir() as d:
         p = os.path.join(d, "a")
         self.dummy_cycle(
             self.mkmaster(None, outfile=(p, "wb"), verbosity=0), 1, b""
         )
         assert len(list(flow.FlowReader(open(p, "rb")).stream())) == 1
Exemplo n.º 3
0
    def test_error(self):
        sio = StringIO()
        sio.write("bogus")
        sio.seek(0)
        r = flow.FlowReader(sio)
        tutils.raises(flow.FlowReadError, list, r.stream())

        f = flow.FlowReadError("foo")
        assert f.strerror == "foo"
Exemplo n.º 4
0
    def test_error(self):
        sio = StringIO()
        sio.write("bogus")
        sio.seek(0)
        r = flow.FlowReader(sio)
        tutils.raises(FlowReadException, list, r.stream())

        f = FlowReadException("foo")
        assert str(f) == "foo"
Exemplo n.º 5
0
    def test_versioncheck(self):
        f = tutils.tflow()
        d = f.get_state()
        d["version"] = (0, 0)
        sio = StringIO()
        tnetstring.dump(d, sio)
        sio.seek(0)

        r = flow.FlowReader(sio)
        tutils.raises("version", list, r.stream())
Exemplo n.º 6
0
    def _treader(self):
        sio = StringIO()
        w = flow.FlowWriter(sio)
        for i in range(3):
            f = tutils.tflow(resp=True)
            w.add(f)
        for i in range(3):
            f = tutils.tflow(err=True)
            w.add(f)

        sio.seek(0)
        return flow.FlowReader(sio)
Exemplo n.º 7
0
    def test_roundtrip(self):
        sio = StringIO()
        f = tutils.tflow()
        f.request.content = "".join(chr(i) for i in range(255))
        w = flow.FlowWriter(sio)
        w.add(f)

        sio.seek(0)
        r = flow.FlowReader(sio)
        l = list(r.stream())
        assert len(l) == 1

        f2 = l[0]
        assert f2.get_state() == f.get_state()
        assert f2.request == f.request
Exemplo n.º 8
0
    def test_filter(self):
        sio = StringIO()
        fl = filt.parse("~c 200")
        w = flow.FilteredFlowWriter(sio, fl)

        f = tutils.tflow(resp=True)
        f.response.status_code = 200
        w.add(f)

        f = tutils.tflow(resp=True)
        f.response.status_code = 201
        w.add(f)

        sio.seek(0)
        r = flow.FlowReader(sio)
        assert len(list(r.stream()))
Exemplo n.º 9
0
    def test_roundtrip(self):
        sio = io.BytesIO()
        f = tutils.tflow()
        f.marked = True
        f.request.content = bytes(bytearray(range(256)))
        w = flow.FlowWriter(sio)
        w.add(f)

        sio.seek(0)
        r = flow.FlowReader(sio)
        l = list(r.stream())
        assert len(l) == 1

        f2 = l[0]
        assert f2.get_state() == f.get_state()
        assert f2.request == f.request
        assert f2.marked
Exemplo n.º 10
0
 def r():
     r = flow.FlowReader(open(p, "rb"))
     return list(r.stream())
Exemplo n.º 11
0
def run(dump_files):
    num_http = 0
    num_https = 0
    err_cnt = 0
    contacted_domains = dict()
    for dump_file in dump_files:
        print (dump_file)
        json_file_prefix = dump_file[:-5]
        flow_json_http = []
        flow_json_https = []
        http_json_file_name ='%s_http.log' % (json_file_prefix)
        https_json_file_name ='%s_https.log' % (json_file_prefix)

        with open(dump_file, "rb") as logfile:
            f_reader = flow.FlowReader(logfile)
            line_http = 0
            line_https = 0
            for f in f_reader.stream():
                try:
                    ts = f.request.timestamp_start
                    # ts = '%.6f' % ts
                    # print ts
                    bro_uid = 'S{0}'.format(''.join(
                        random.choice(string.ascii_uppercase
                                      + string.ascii_lowercase
                                      + string.digits)
                        for _ in range(17)))
                    id_orig_h = f.client_conn.address.address[0]
                    id_orig_p = f.client_conn.address.address[1]
                    id_resp_h = f.request.host
                    id_resp_p = f.request.port
                    method = f.request.method
                    host = f.request.host
                    if 'host' in f.request.headers:
                        host = f.request.headers['host']

                    if host not in contacted_domains:
                        contacted_domains[host] = 0
                    contacted_domains[host] += 1
                    uri = f.request.path
                    referrer = ''
                    if 'referrer' in f.request.headers:
                        referrer = f.request.headers['referrer']
                    user_agent = '-'
                    if 'User-Agent' in f.request.headers:
                        user_agent = f.request.headers['User-Agent']
                    status_code = f.response.status_code
                    status_msg = f.response.reason
                    request_body_len, response_body_len = get_bytes(f)
                    trans_depth = '-'
                    info_code = '-'
                    info_msg = '-'
                    filename = '-'
                    tags = '(empty)'
                    username = '******'
                    password = '******'
                    proxied = '-'
                    orig_fuids = '-'  # 
                    orig_mime_types = '-'  
                    resp_fuids  = '-'
                    resp_mime_types = '-'
                    content_length = 0
                    content_encoding = '-'
                    content_type = '-'
                    if 'content-type' in f.request.headers:
                        content_type = f.request.headers['content-type']
                    if 'content-length' in f.request.headers:
                        content_length = f.request.headers['content-length']
                    transfer_encoding = '-'
                    post_body = '-'
                    if f.request.content is not None:
                        # make sure the content are in the same line, WHEN DECODING, replace /n with \n
                        post_body = urllib.quote(str(f.request.content))
                    # headers
                    client_header_names = ''
                    client_header_values = ''
                    for hk in f.request.headers:
                        hv = f.request.headers[hk]
                        client_header_names += '%s,' % hk

                        try:
                            hv = urllib.quote(hv)
                        finally:
                            client_header_values += '%s,' % hv
                    server_header_names = ''
                    server_header_values = ''
                    for hk in f.response.headers:
                        hv = f.response.headers[hk]
                        server_header_names += '%s,' % hk
                        try:
                            hv = urllib.quote(hv)
                        finally:
                            server_header_values += '%s,' % hv

                    http_entry = ''
                    http_entry += '%.6f\t%s\t%s\t%s\t%s\t%s\t' % (ts, bro_uid, id_orig_h, id_orig_p, id_resp_h, id_resp_p)
                    http_entry += '%s\t%s\t%s\t%s\t%s\t%s\t' % (trans_depth, method, host, uri, referrer, user_agent)
                    http_entry += '%s\t%s\t%s\t%s\t%s\t%s\t' % (request_body_len, response_body_len, status_code,
                                                                status_msg, info_code, info_msg)
                    http_entry += '%s\t%s\t%s\t%s\t%s\t%s\t' % (filename, tags, username, password, proxied, orig_fuids)
                    http_entry += '%s\t%s\t%s\t%s\t%s\t%s\t' % (orig_mime_types, resp_fuids, resp_mime_types,
                                                                content_length, content_encoding, content_type)
                    http_entry += '%s\t%s\t%s\t%s\t%s\t%s' % (transfer_encoding, post_body, client_header_names,
                                                                client_header_values, server_header_names,
                                                                server_header_values)


                    if f.client_conn.ssl_established:
                        line_https += 1
                        flow_json_https.append(http_entry)
                    else:
                        line_http += 1
                        flow_json_http.append(http_entry)
                except flow.FlowReadError as v:
                    print "Flow file corrupted. Stopped loading."
                    print v.message
                    err_cnt += 1
                    json.dump(f.get_state(), sys.stdout, indent=4)
            num_http += len(flow_json_http)
            num_https += len(flow_json_https)
            if num_http > 0:
                with open(http_json_file_name, 'w') as hf:
                    hf.write(table_header + '\n')
                    for he in flow_json_http:
                        hf.write('%s\n' % he)
            if num_https > 0:
                with open(https_json_file_name, 'w') as hf:
                    hf.write(table_header + '\n')
                    for he in flow_json_https:
                        hf.write('%s\n' % he)
    result = dict()
    result['num_http'] = num_http
    result['num_https'] = num_https
    result['num_errors'] = err_cnt
    result['contacted'] = contacted_domains
    result['num_domains'] = len(contacted_domains)
    return result
Exemplo n.º 12
0
def analyse_https(data, https_file):
    # NOTE: data here is not aggregated at domain layer. We just copy-paste all the requests into the dicitonary

    if not isinstance(data, dict):
        raise Exception("This method requires a dictionary as data")

    # Check if the capture file exists
    if not os.path.exists(https_file):
        raise Exception("PCAP File does not exist.")
    else:
        https_file = os.path.abspath(https_file)

    # Extract https_requests
    requests = []
    downloads = []
    with open(https_file, "rb") as logfile:
        freader = Flow.FlowReader(logfile)
        # For each flow, get the relative request
        for flow in freader.stream():
            r = dict()
            r['first_line_format'] = str(flow.request.first_line_format)
            r['method'] = flow.request.method
            r['scheme'] = flow.request.scheme
            r['host'] = flow.request.host
            r['hostname'] = flow.request.pretty_host
            r['port'] = flow.request.port
            r['path'] = flow.request.path
            r['http_version'] = flow.request.http_version
            #r['headers']=flow.request.data.headers
            # We also log the contents of the request. This might cause the log file to grow...
            r['content'] = base64.encodestring(flow.request.content)
            r['timestamp_start'] = flow.request.timestamp_start
            r['timestamp_end'] = flow.request.timestamp_end
            r['fullpath'] = flow.request.url

            requests.append(r)

            if (flow.response is not None) and (flow.response.content
                                                is not None) and len(
                                                    flow.response.content) > 0:
                # In order to analyze the response content, we need to store it on disk
                with tempfile.NamedTemporaryFile(delete=True) as fp:
                    fp.write(flow.response.content)
                    fp.flush()

                    # Define a recursive function used whenever the file is an archive, so we can deep inspect all the
                    # contained files
                    def recursive_analysis(flow, fname, parent_archive_sha1,
                                           nest_level, downloads):
                        m_type = mime.from_file(fname)
                        size = os.path.getsize(fname)
                        with open(fname, 'r') as f:
                            # Retrieve basic info about this file, such as MD5, SHA1, etc.
                            m = md5()
                            s = sha1()
                            f.seek(0, 0)
                            for chunk in iter(lambda: f.read(4096), b""):
                                m.update(chunk)
                                s.update(chunk)

                            str_md5 = m.hexdigest()
                            str_sha1 = s.hexdigest()

                            str_fuzzy = ssdeep.hash_from_file(f.name)

                            # Store info on the referenced dict
                            d = dict()
                            d['host'] = flow.request.host
                            d['hostname'] = flow.request.pretty_host
                            d['port'] = flow.request.port
                            d['path'] = flow.request.path
                            d['scheme'] = flow.request.scheme
                            d['method'] = flow.request.method
                            #d['request_headers'] = flow.request.headers
                            d['status_code'] = flow.response.status_code
                            d['fullpath'] = r['fullpath']
                            #d['response_headers'] = flow.response.status_code
                            d['sha1'] = str_sha1.lower()
                            d['md5'] = str_md5.lower()
                            d['fuzzy'] = str_fuzzy.lower()
                            d['mime'] = m_type
                            d['size'] = size
                            d['parent_archive'] = parent_archive_sha1
                            d['nest_level'] = nest_level

                            downloads.append(d)

                            # Try to extract the file if it is an archive
                            tmpdir = tempfile.mkdtemp()
                            try:
                                # Brute force approach: we don't even check the mime file.
                                # We try to unpack evey archive.
                                # Extract all the files
                                patoolib.extract_archive(f.name, outdir=tmpdir)

                                # Analyze each file
                                files = [
                                    os.path.join(tmpdir, ff)
                                    for ff in os.listdir(tmpdir)
                                    if os.path.isfile(os.path.join(tmpdir, ff))
                                ]
                                for ff in files:
                                    recursive_analysis(flow, ff, str_sha1,
                                                       nest_level + 1,
                                                       downloads)
                            except:
                                pass
                            finally:
                                # Remove the temporary file directory
                                shutil.rmtree(tmpdir)

                    recursive_analysis(flow, fp.name, None, 0, downloads)

    # Assign data to the dictionary
    data['https_requests'] = requests
    data['https_downloads'] = downloads
Exemplo n.º 13
0
 def test_write_append(self):
     with tutils.tmpdir() as d:
         p = os.path.join(d, "a.append")
         self._dummy_cycle(1, None, "", outfile=(p, "wb"), verbosity=0)
         self._dummy_cycle(1, None, "", outfile=(p, "ab"), verbosity=0)
         assert len(list(flow.FlowReader(open(p, "rb")).stream())) == 2