def packet_import_to_db(): not_ip_packet = 0 #记录抓取的报文中非ip包的个数 not_tcp_packet = 0 #记录抓取的报文中非tcp包的个数 f = open('F:/python/http-pcap2.pcap','rb') try: pcap = dpkt.pcap.Reader(f) except: f.close() return cur = testdb.opendata() #数据库的conn conn = cur[1] i = 1#报文编号,记录wireshark中的序号,便于调试 for ts,buf in pcap: #记录第一个报文时间 if i == 1: firsttime = ts eth = dpkt.ethernet.Ethernet(buf) if eth.type!=2048: #print 'not ip packet %d'%i not_ip_packet = not_ip_packet+1 i= i+1 continue ip = eth.data if ip.p != 6: #print 'not tcp packet %d'%i not_tcp_packet = not_tcp_packet + 1 i= i+1 continue tcp = ip.data #if tcp.dport == 80 and len(tcp.data) > 0: if len(tcp.data) > 0: #print 'packet num %d'%i if tcp.dport == 80 : try: http = dpkt.http.Request(tcp.data) except: i = i+1 continue find = 0 #print '====================================' for k,v in http.headers.iteritems(): if k == 'referer': find = 1 break if find != 1: for k,v in http.headers.iteritems(): if k == 'origin': break tabel_line['timestamp'] = ts tabel_line['sip'] = socket.inet_ntoa(ip.src) tabel_line['dip'] = socket.inet_ntoa(ip.dst) tabel_line['sport'] = tcp.sport tabel_line['dport'] = tcp.dport tabel_line['method'] = http.method url= urlformat(v) tabel_line['url'] = url[0] tabel_line['tcp_packet'] = tcp.data #for k, value in tabel_line.items(): # print k,value #有效的url插入 if url[1] == 0: testdb.insert(tabel_line,conn) tabel_line.clear() #重点关注客户报文,网页内容暂不关注 if tcp.sport == 80 : try: http = dpkt.http.Response(tcp.data) except: #print 'response err' i = i+1 continue #else : #if i==19: # print tcp.dport # print len(tcp.data) # print 'not http packet %d'%i i = i+1 if i == 500: print 'please wait a moment' #记录最后一个报文时间 lasttime = ts testdb.closedata(conn) f.close() print 'this pcap file pcap packet from %s to %s'%(timeformat_sec_to_date(firsttime),timeformat_sec_to_date(lasttime)) print 'read file finish'
tabel_line['sip'] = socket.inet_ntoa(ip.src) tabel_line['dip'] = socket.inet_ntoa(ip.dst) tabel_line['sport'] = tcp.sport tabel_line['dport'] = tcp.dport tabel_line['method'] = http.method url= urlformat(v) tabel_line['url'] = url[0] tabel_line['tcp_packet'] = tcp.data #for k, value in tabel_line.items(): # print k,value #有效的url插入 if url[1] == 0: testdb.insert(tabel_line,conn) tabel_line.clear() if tcp.sport == 80 : try: http = dpkt.http.Response(tcp.data) #print http.reason #print http.status #print http.version except: #print 'response err' i = i+1 continue #for k,v in http.headers.iteritems(): # print '%s:%s' % (k, v)
tabel_line['timestamp'] = ts tabel_line['sip'] = socket.inet_ntoa(ip.src) tabel_line['dip'] = socket.inet_ntoa(ip.dst) tabel_line['sport'] = tcp.sport tabel_line['dport'] = tcp.dport tabel_line['method'] = http.method url = urlformat(v) tabel_line['url'] = url[0] tabel_line['tcp_packet'] = tcp.data #for k, value in tabel_line.items(): # print k,value #有效的url插入 if url[1] == 0: testdb.insert(tabel_line, conn) tabel_line.clear() if tcp.sport == 80: try: http = dpkt.http.Response(tcp.data) #print http.reason #print http.status #print http.version except: #print 'response err' i = i + 1 continue #for k,v in http.headers.iteritems(): # print '%s:%s' % (k, v)