def parse_mitm_dump(dumpfile): http_dicts = [] if os.path.isfile(dumpfile): fr = flow.FlowReader(open(dumpfile)) try: for msg in fr.stream(): http_dict = {} http_dict["req_content"] = "" http_dict["resp_headers"] = [] http_dict["req_url"] = msg.request.url http_dict["referer"] = msg.request.headers['Referer'][0]\ if msg.request.headers['Referer'] else "" http_dict["method"] = msg.request.method http_dict["req_headers"] = msg.request.headers.lst if msg.request.content: http_dict["req_content"] = msg.request.content if msg.response: http_dict["resp_headers"] = msg.response.headers.lst if hasattr(msg.response, "code"): http_dict["resp_code"] = msg.response.code elif hasattr(msg.response, "status_code"): http_dict["resp_code"] = msg.response.code else: # this shouldn't happen http_dict["resp_code"] = 0 print "HTTP response status code is missing" http_dicts.append(http_dict) except flow.FlowReadError as exc: print "Error reading mitm dump %s" % exc else: print "Cannot find mitm dump %s" % dumpfile return http_dicts
class HoneyProxyMaster(FlowMaster): """ The HoneyProxy proxy core, in some parts pretty similar to mitmproxys DumpMaster. """ def __init__(self, server, options, sessionFactory): FlowMaster.__init__(self, server, flow.State()) self.sessionFactory = sessionFactory self.o = options self.flows = FlowCollection() self.anticache = options.anticache self.anticomp = options.anticomp if options.stickycookie: self.set_stickycookie(options.stickycookie) if options.stickyauth: self.set_stickyauth(options.stickyauth) if options.wfile: path = os.path.abspath(os.path.expanduser(options.wfile)) directory = os.path.split(path)[0] if not os.path.exists(directory): os.makedirs(directory) try: f = file(path, "wb") self.fwriter = flow.FlowWriter(f) except IOError, v: raise Exception(v.strerror) if options.dumpdir: path = os.path.expanduser(options.dumpdir) if not os.path.exists(path): os.makedirs(path) if os.listdir(path): print "Notice: Your dump directory (%s) is not empty." % path print "HoneyProxy won't overwrite your files." self.dirdumper = DirDumper(path) if options.replacements: for i in options.replacements: self.replacehooks.add(*i) if options.script: err = self.load_script(options.script) if err: raise Exception(err) if options.rfile: path = os.path.expanduser(options.rfile) try: f = file(path, "rb") freader = flow.FlowReader(f) except IOError, v: raise ProxyError(v.strerror) try: self.load_flows(freader) except flow.FlowReadError, v: print "Flow file corrupted. Stopped loading."
def test_error(self): sio = StringIO() sio.write("bogus") sio.seek(0) r = flow.FlowReader(sio) tutils.raises(flow.FlowReadError, list, r.stream()) f = flow.FlowReadError("foo") assert f.strerror == "foo"
def test_versioncheck(self): f = tutils.tflow() d = f._get_state() d["version"] = (0, 0) sio = StringIO() tnetstring.dump(d, sio) sio.seek(0) r = flow.FlowReader(sio) tutils.raises("version", list, r.stream())
def test_roundtrip(self): sio = StringIO() f = tutils.tflow() w = flow.FlowWriter(sio) w.add(f) sio.seek(0) r = flow.FlowReader(sio) l = list(r.stream()) assert len(l) == 1 assert l[0] == f
def _treader(self): sio = StringIO() w = flow.FlowWriter(sio) for i in range(3): f = tutils.tflow(resp=True) w.add(f) for i in range(3): f = tutils.tflow(err=True) w.add(f) sio.seek(0) return flow.FlowReader(sio)
def _treader(self): sio = StringIO() w = flow.FlowWriter(sio) for i in range(3): f = tutils.tflow_full() w.add(f) for i in range(3): f = tutils.tflow_err() w.add(f) sio.seek(0) return flow.FlowReader(sio)
def load_saved_flow(self, flow_path, third_party = False): flow = [] with open(flow_path, 'rb') as flowfile: freader = mitm_flow.FlowReader(flowfile) for msg in freader.stream(): if third_party: pass elif '.uber.' not in msg.request.host: continue flow.append(msg) self.orig_flow = flow
def parse_mitm_dump(basename, worker, crawl_id): dumpfile = basename + '.dmp' wl_log.info("Will parse mitm dump %s for crawl: %s" % (dumpfile, crawl_id)) requests = [] responses = [] if os.path.isfile(dumpfile): fr = flow.FlowReader(open(dumpfile)) try: for msg in fr.stream(): requests.append(msg.request.get_url()) # responses.append(msg.response.get_url()) worker( msg, crawl_id ) # this worker func should take care of db insertion, logging etc. except flow.FlowReadError as exc: pass #wl_log.critical("Error reading mitm dump %s" % exc) else: wl_log.critical("Cannot find mitm dump %s" % dumpfile) doma_info = lp.DomainInfo() doma_info.requests = requests doma_info.responses = responses doma_info.crawl_id = crawl_id doma_info.url = "" doma_info.fc_dbg_font_loads = [] doma_info.fp_detected = lp.get_fp_from_reqs(requests) doma_info.log_complete = 1 print os.path.basename(dumpfile[:-4]).split('-')[0] doma_info.rank = int( os.path.basename(dumpfile).split('-')[0]) if '-' in dumpfile else 0 db_conn = dbu.mysql_init_db() site_info_id = dbu.add_site_info_to_db(doma_info, db_conn) # parse log_file = basename + '.txt' if not os.path.isfile(log_file): log_file = basename + '.' + MITM_LOG_EXTENSION insert_js_fun = functools.partial(lp.insert_js_info_to_db, site_info_id=site_info_id, db_conn=db_conn) lp.parse_crawl_log(log_file, insert_js_fun, crawl_id) # parse log, insert js info to db db_conn.commit() db_conn.close() wl_log.info("Parsed %s OK" % (dumpfile)) if REMOVE_DMP_FILES: os.remove(dumpfile)
def test_roundtrip(self): sio = StringIO() f = tutils.tflow() f.request.content = "".join(chr(i) for i in range(255)) w = flow.FlowWriter(sio) w.add(f) sio.seek(0) r = flow.FlowReader(sio) l = list(r.stream()) assert len(l) == 1 f2 = l[0] assert f2._get_state() == f._get_state() assert f2.request.assemble() == f.request.assemble()
def test_filter(self): sio = StringIO() fl = filt.parse("~c 200") w = flow.FilteredFlowWriter(sio, fl) f = tutils.tflow(resp=True) f.response.code = 200 w.add(f) f = tutils.tflow(resp=True) f.response.code = 201 w.add(f) sio.seek(0) r = flow.FlowReader(sio) assert len(list(r.stream()))
def r(): r = flow.FlowReader(open(p, "rb")) return list(r.stream())
#!/usr/bin/env python # # Simple script showing how to read a mitmproxy dump file # ### UPD: this feature is now avaiable in mitmproxy: https://github.com/mitmproxy/mitmproxy/pull/619 from libmproxy import flow import json, sys with open("mitmproxy_dump.txt", "rb") as logfile: freader = flow.FlowReader(logfile) try: for f in freader.stream(): request = f.request print(request) curl = 'curl -X ' + request.method + ' -d \'' + request.content + '\' ' + ' '.join( [ '-H ' + '"' + header[0] + ': ' + header[1] + '"' for header in request.headers ]) curl += " https://" + request.host + request.path print(curl) print("--") except flow.FlowReadError as v: print("Flow file corrupted. Stopped loading.")
from libmproxy import flow from datetime import datetime import re, sys, os def dot(): sys.stdout.write(".") sys.stdout.flush() for arg in sys.argv[1:]: print "-"*50 print "Processing:", arg print "="*50 print stream = flow.FlowReader(open(arg)).stream() matcher = re.compile(r"googleapis\.com/userlocation") for f in stream: req = f.request dot() if matcher.search(req.url) and len(req.content) > 0: t = datetime.fromtimestamp(req.timestamp_start) outfile = t.strftime('goog/userloc-%Y-%m-%d_%H:%M:%S.json') print print "* [{time}] {url}".format(time=t, url=req.url) print " |_ writing:", outfile
#!/usr/bin/env python # # Simple script showing how to read a mitmproxy dump file # from libmproxy import flow import json, sys with open("logfile", "rb") as f: freader = flow.FlowReader(f) try: for i in freader.stream(): print i.request.host json.dump(i._get_state(), sys.stdout, indent=4) print "" except flow.FlowReadError, v: print "Flow file corrupted. Stopped loading."
def test_write(self): with tutils.tmpdir() as d: p = os.path.join(d, "a") self._dummy_cycle(1, None, "", wfile=p, verbosity=0) assert len(list(flow.FlowReader(open(p)).stream())) == 1
def test_write_append(self): with tutils.tmpdir() as d: p = os.path.join(d, "a.append") self._dummy_cycle(1, None, "", outfile=(p, "wb"), verbosity=0) self._dummy_cycle(1, None, "", outfile=(p, "ab"), verbosity=0) assert len(list(flow.FlowReader(open(p, "rb")).stream())) == 2