def __init__(self, packetdispatcher): ''' parses http.flows from packetdispatcher, and parses those for HAR info ''' # parse http flows self.flows= [] for flow in packetdispatcher.tcp.flowdict.itervalues(): try: self.flows.append(http.Flow(flow)) except (http.Error,): error = sys.exc_info()[1] log.warning(error) except (dpkt.dpkt.Error,): error = sys.exc_info()[1] log.warning(error) # combine the messages into a list pairs = reduce(lambda p, f: p+f.pairs, self.flows, []) # set-up self.user_agents = UserAgentTracker() if settings.process_pages: self.page_tracker = PageTracker() else: self.page_tracker = None self.entries = [] # sort pairs on request.ts_connect pairs.sort( key=lambda pair: pair.request.ts_connect ) # iter through messages and do important stuff for msg in pairs: entry = Entry(msg.request, msg.response) # if msg.request has a user-agent, add it to our list if 'user-agent' in msg.request.msg.headers: self.user_agents.add(msg.request.msg.headers['user-agent']) # if msg.request has a referer, keep track of that, too if self.page_tracker: entry.pageref = self.page_tracker.getref(entry) # add it to the list self.entries.append(entry) self.user_agent = self.user_agents.dominant_user_agent() # handle DNS AFTER sorting # this algo depends on first appearance of a name # being the actual first mention names_mentioned = set() dns = packetdispatcher.udp.dns for entry in self.entries: name = entry.request.host # if this is the first time seeing the name if name not in names_mentioned: if name in dns.by_hostname: # TODO: handle multiple DNS queries for now just use last one entry.add_dns(dns.by_hostname[name][-1]) names_mentioned.add(name)
class HttpSession(object): ''' Represents all http traffic from within a pcap. Members: * user_agents = UserAgentTracker * user_agent = most-used user-agent in the flow * flows = [http.Flow] * entries = [Entry], all http request/response pairs ''' def __init__(self, packetdispatcher): ''' parses http.flows from packetdispatcher, and parses those for HAR info ''' # parse http flows self.flows = [] for flow in packetdispatcher.tcp.flowdict.itervalues(): try: self.flows.append(http.Flow(flow)) except http.Error as error: log.warning(error) # combine the messages into a list pairs = reduce(lambda p, f: p + f.pairs, self.flows, []) # set-up self.user_agents = UserAgentTracker() self.page_tracker = PageTracker() self.entries = [] # sort pairs on request.ts_connect pairs.sort(key=lambda pair: pair.request.ts_connect) # iter through messages and do important stuff for msg in pairs: entry = Entry(msg.request, msg.response) # if msg.request has a user-agent, add it to our list if 'user-agent' in msg.request.msg.headers: self.user_agents.add(msg.request.msg.headers['user-agent']) # if msg.request has a referer, keep track of that, too entry.page_ref = self.page_tracker.getref(entry) # add it to the list self.entries.append(entry) self.user_agent = self.user_agents.dominant_user_agent() # handle DNS AFTER sorting # this algo depends on first appearance of a name # being the actual first mention names_mentioned = set() dns = packetdispatcher.udp.dns for entry in self.entries: name = entry.request.host # if this is the first time seeing the name if name not in names_mentioned: if name in dns.by_hostname: # TODO: handle multiple DNS queries for now just use last one entry.add_dns(dns.by_hostname[name][-1]) names_mentioned.add(name) def json_repr(self): ''' return a JSON serializable python object representation of self. ''' return { 'log': { 'version': '1.1', 'creator': { 'name': 'pcap2har', 'version': '0.1' }, 'browser': { 'name': self.user_agent, 'version': 'mumble' }, 'pages': self.page_tracker, 'entries': sorted(self.entries, key=lambda x: x.ts_start) } }
class HttpSession(object): ''' Represents all http traffic from within a pcap. Members: * user_agents = UserAgentTracker * user_agent = most-used user-agent in the flow * flows = [http.Flow] * entries = [Entry], all http request/response pairs ''' def __init__(self, packetdispatcher): ''' parses http.flows from packetdispatcher, and parses those for HAR info ''' # parse http flows self.flows= [] for flow in packetdispatcher.tcp.flowdict.itervalues(): try: self.flows.append(http.Flow(flow)) except (http.Error,): error = sys.exc_info()[1] log.warning(error) except (dpkt.dpkt.Error,): error = sys.exc_info()[1] log.warning(error) # combine the messages into a list pairs = reduce(lambda p, f: p+f.pairs, self.flows, []) # set-up self.user_agents = UserAgentTracker() if settings.process_pages: self.page_tracker = PageTracker() else: self.page_tracker = None self.entries = [] # sort pairs on request.ts_connect pairs.sort( key=lambda pair: pair.request.ts_connect ) # iter through messages and do important stuff for msg in pairs: entry = Entry(msg.request, msg.response) # if msg.request has a user-agent, add it to our list if 'user-agent' in msg.request.msg.headers: self.user_agents.add(msg.request.msg.headers['user-agent']) # if msg.request has a referer, keep track of that, too if self.page_tracker: entry.pageref = self.page_tracker.getref(entry) # add it to the list self.entries.append(entry) self.user_agent = self.user_agents.dominant_user_agent() # handle DNS AFTER sorting # this algo depends on first appearance of a name # being the actual first mention names_mentioned = set() dns = packetdispatcher.udp.dns for entry in self.entries: name = entry.request.host # if this is the first time seeing the name if name not in names_mentioned: if name in dns.by_hostname: # TODO: handle multiple DNS queries for now just use last one entry.add_dns(dns.by_hostname[name][-1]) names_mentioned.add(name) def json_repr(self): ''' return a JSON serializable python object representation of self. ''' d = { 'log': { 'version' : '1.1', 'creator': { 'name': 'pcap2har', 'version': '0.1' }, 'browser': { 'name': self.user_agent, 'version': 'mumble' }, 'entries': sorted(self.entries, key=lambda x: x.ts_start) } } if self.page_tracker: d['log']['pages'] = self.page_tracker return d
def __init__(self, packetdispatcher, drop_response_bodies=False): ''' Parses http.flows from packetdispatcher, and parses those for HAR info ''' self.errors = [] # parse http flows self.flows = [] for flow in packetdispatcher.tcp.flows(): try: self.flows.append(http.Flow(flow, drop_response_bodies)) except http.Error as error: self.errors.append(HttpErrorRecord(error)) logging.warning(error) except dpkt.dpkt.Error as error: self.errors.append(HttpErrorRecord(error)) logging.warning(error) # combine the messages into a list pairs = reduce(lambda p, f: p+f.pairs, self.flows, []) # set-up self.user_agents = UserAgentTracker() if settings.process_pages: self.page_tracker = PageTracker() else: self.page_tracker = None self.entries = [] # sort pairs on request.ts_connect pairs.sort( key=lambda pair: pair.request.ts_connect ) # iter through messages and do important stuff for msg in pairs: entry = Entry(msg.request, msg.response) # if msg.request has a user-agent, add it to our list if 'user-agent' in msg.request.msg.headers: self.user_agents.add(msg.request.msg.headers['user-agent']) # if msg.request has a referer, keep track of that, too if self.page_tracker: entry.pageref = self.page_tracker.getref(entry) # add it to the list, if we're supposed to keep it. if entry.response or settings.keep_unfulfilled_requests: self.entries.append(entry) self.user_agent = self.user_agents.dominant_user_agent() # handle DNS AFTER sorting # this algo depends on first appearance of a name # being the actual first mention names_mentioned = set() dns = packetdispatcher.udp.dns page_times = {} for entry in self.entries: name = entry.request.host # if this is the first time seeing the name if name not in names_mentioned: if name in dns.by_hostname: # Handle multiple DNS queries for now just use last one, # i.e. for IPv4/IPv6 addresses for d in dns.by_hostname[name]: entry.add_dns(d) names_mentioned.add(name) entry.calc_total_time() # handle page network load time p_time = page_times.get(entry.pageref, (entry.ts_start, 0)) page_times[entry.pageref] = (min(p_time[0], entry.ts_start), max(p_time[1], entry.ts_start + entry.total_time)) # write page network load times for page in self.page_tracker.pages: p_time = page_times.get(page.pageref, None) if p_time: page.network_load_time = p_time[1] - p_time[0]