Пример #1
0
	def __init__(self, http_entry):
		assert isinstance(http_entry, HTTPLogEntry)
		Node.__init__(self)
		self._dummy = 0	# help to process dummy nodes
		self.aem_pred = None # the classified type for preceeding entity by AEM model
		self.aem_last = None # the classified type for LAST entity by AEM model
		self.fake_link = True # if the link is fake derived from referrer
		self.pl = http_entry # http log entries
		self.tag = self.gen_tag() # formated tag to show by tree
Пример #2
0
 def __init__(self, *args, document, **kwargs):
     if 'tag' not in kwargs:
         kwargs['tag'] = self.__class__.__name__
     name = kwargs.get('name', kwargs['tag'])
     self.document = document
     # node consumes data
     Node.__init__(self, *args, **kwargs)
     State.__init__(
         self,
         name,
         on_enter=kwargs.get('on_enter'),
         on_exit=kwargs.get('on_exit'),
         ignore_invalid_triggers=kwargs.get('ignore_invalid_triggers'))
     self.data = self
Пример #3
0
	def __init__( self, record, offset, tag=None, identifier=None, expanded=True ):
		Node.__init__( self, tag=tag, identifier=identifier, expanded=expanded )
		self.record = record
		self.offset = offset
		self.payload_offset = 0
		self.xattrs = {}
		self.xattrs[ "offset" ] = str( offset )
		for k, v in record.headers:
			self.xattrs[ k ] = v
		if record.type == WarcRecord.RESPONSE and record.url.startswith( "http" ):
			mime, data = record.content
			if data.startswith( "HTTP" ):
				match = re.search( "\r?\n(\r?\n)+", data, re.MULTILINE )
				self.xattrs[ "http.headers" ] = data[ 0:match.end() ]
				self.payload_offset = match.end()