def url_Statistics(filename,cmdStr): dicturl.clear() #open data hel = testdb.opendata(filename) cur = hel[1].cursor() #数据库检索 cur.execute(cmdStr) res = cur.fetchall() for line in res: url_Count(line[6]) #排序 l = sorted(dicturl.iteritems(), key=lambda d:d[1], reverse = True ) #print结果 for item in l: print '%s : %s'%(item[0],item[1]) cur.close()
def url_Statistics(filename, cmdStr): dicturl.clear() #open data hel = testdb.opendata(filename) cur = hel[1].cursor() #数据库检索 cur.execute(cmdStr) res = cur.fetchall() for line in res: url_Count(line[6]) #排序 l = sorted(dicturl.iteritems(), key=lambda d: d[1], reverse=True) #print结果 for item in l: print '%s : %s' % (item[0], item[1]) cur.close()
def keyword_statistcis(filename,keyword,n): hel = testdb.opendata(filename) cur = hel[1].cursor() #统计计数 print '*******search keyword: %s ********\r\n'%keyword countstr = "select count(*) from http_packet where tcp_packet like '%%%s%%'"%keyword cur.execute(countstr) res = cur.fetchall() for line in res: print 'keyword count : %s'%line #打印记录 countstr = "select * from http_packet where tcp_packet like '%%%s%%'"%keyword cur.execute(countstr) res = cur.fetchall() i = 0; print n for line in res: i = i+1 if i == n: break print '\r\n\r\n'
def keyword_statistcis(filename, keyword, n): hel = testdb.opendata(filename) cur = hel[1].cursor() #统计计数 print '*******search keyword: %s ********\r\n' % keyword countstr = "select count(*) from http_packet where tcp_packet like '%%%s%%'" % keyword cur.execute(countstr) res = cur.fetchall() for line in res: print 'keyword count : %s' % line #打印记录 countstr = "select * from http_packet where tcp_packet like '%%%s%%'" % keyword cur.execute(countstr) res = cur.fetchall() i = 0 print n for line in res: i = i + 1 if i == n: break print '\r\n\r\n'
def timeformat_date_to_sec(timestamp): tup_birth = time.strptime(timestamp, "%Y-%m-%d %H:%M:%S"); birth_secds = time.mktime(tup_birth) return birth_secds #if __name__ == "__main__": # flag = 1 # while flag: # print 'please input time eg:2015-08-23 17:11:57' # input_firsttime = str(raw_input()) # if timeformat_date_to_sec(input_firsttime) < firsttime: # print "input error " hel = testdb.opendata() cur = hel[1].cursor() print 'print ********************all***************' dicturl = {} # ͳ¼Æ¼Ç¼ b = "select count(*) from http_packet where tcp_packet like '%sina%'" c = "select * from http_packet where tcp_packet like '%sina%'" #b = "select * from http_packet" cur.execute(b) res = cur.fetchall() for line in res: print line
dicturl = {} def url_statistic(url): val = dicturl.has_key(url) if val == True: dicturl[url] = dicturl[url] + 1 elif val == False: dicturl[url] = 1 else: print 'error' time1 = 1440321117 time2 = 1440321195 hel = testdb.opendata() cur = hel[1].cursor() print 'print ********************some***************' #b = "select * from http_packet where timestamp > %d and timestamp < %d"%(time1,time2) #cur.execute(b) #res = cur.fetchall() #for line in res: # url_statistic(line[6]) #for k, value in dicturl.items(): # print k,value
#from testdb.py import opendata #from testdb.py import insert #from testdb.py import showalldata print 'import pcap file,please wait' #f = open('F:/python/pcap-test.pcap','rb') f = open('F:/python/http-pcap2.pcap','rb') pcap = dpkt.pcap.Reader(f) i = 1 #报文编号,记录wireshark中的序号,便于调试 firsttime = 0 lasttime = 0 tabel_line = {} #数据库行存储结构 cur = testdb.opendata() #数据库的conn conn = cur[1] not_ip_packet = 0 #记录抓取的报文中非ip包的个数 not_tcp_packet = 0 #记录抓取的报文中非tcp包的个数 def timeformat_sec_to_date(timestamp): timeArray = time.localtime(timestamp) otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray) return otherStyleTime #input_firsttime = str(raw_input("firsttime")) def timeformat_date_to_sec(timestamp): tup_birth = time.strptime(timestamp, "%Y-%m-%d %H:%M:%S"); birth_secds = time.mktime(tup_birth) return birth_secds
def packet_import_to_db(): not_ip_packet = 0 #记录抓取的报文中非ip包的个数 not_tcp_packet = 0 #记录抓取的报文中非tcp包的个数 f = open('F:/python/http-pcap2.pcap','rb') try: pcap = dpkt.pcap.Reader(f) except: f.close() return cur = testdb.opendata() #数据库的conn conn = cur[1] i = 1#报文编号,记录wireshark中的序号,便于调试 for ts,buf in pcap: #记录第一个报文时间 if i == 1: firsttime = ts eth = dpkt.ethernet.Ethernet(buf) if eth.type!=2048: #print 'not ip packet %d'%i not_ip_packet = not_ip_packet+1 i= i+1 continue ip = eth.data if ip.p != 6: #print 'not tcp packet %d'%i not_tcp_packet = not_tcp_packet + 1 i= i+1 continue tcp = ip.data #if tcp.dport == 80 and len(tcp.data) > 0: if len(tcp.data) > 0: #print 'packet num %d'%i if tcp.dport == 80 : try: http = dpkt.http.Request(tcp.data) except: i = i+1 continue find = 0 #print '====================================' for k,v in http.headers.iteritems(): if k == 'referer': find = 1 break if find != 1: for k,v in http.headers.iteritems(): if k == 'origin': break tabel_line['timestamp'] = ts tabel_line['sip'] = socket.inet_ntoa(ip.src) tabel_line['dip'] = socket.inet_ntoa(ip.dst) tabel_line['sport'] = tcp.sport tabel_line['dport'] = tcp.dport tabel_line['method'] = http.method url= urlformat(v) tabel_line['url'] = url[0] tabel_line['tcp_packet'] = tcp.data #for k, value in tabel_line.items(): # print k,value #有效的url插入 if url[1] == 0: testdb.insert(tabel_line,conn) tabel_line.clear() #重点关注客户报文,网页内容暂不关注 if tcp.sport == 80 : try: http = dpkt.http.Response(tcp.data) except: #print 'response err' i = i+1 continue #else : #if i==19: # print tcp.dport # print len(tcp.data) # print 'not http packet %d'%i i = i+1 if i == 500: print 'please wait a moment' #记录最后一个报文时间 lasttime = ts testdb.closedata(conn) f.close() print 'this pcap file pcap packet from %s to %s'%(timeformat_sec_to_date(firsttime),timeformat_sec_to_date(lasttime)) print 'read file finish'
#from testdb.py import opendata #from testdb.py import insert #from testdb.py import showalldata print 'import pcap file,please wait' #f = open('F:/python/pcap-test.pcap','rb') f = open('F:/python/http-pcap2.pcap', 'rb') pcap = dpkt.pcap.Reader(f) i = 1 #报文编号,记录wireshark中的序号,便于调试 firsttime = 0 lasttime = 0 tabel_line = {} #数据库行存储结构 cur = testdb.opendata() #数据库的conn conn = cur[1] not_ip_packet = 0 #记录抓取的报文中非ip包的个数 not_tcp_packet = 0 #记录抓取的报文中非tcp包的个数 def timeformat_sec_to_date(timestamp): timeArray = time.localtime(timestamp) otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray) return otherStyleTime #input_firsttime = str(raw_input("firsttime")) def timeformat_date_to_sec(timestamp):