def __init__(self, http_entry): assert isinstance(http_entry, HTTPLogEntry) Node.__init__(self) self._dummy = 0 # help to process dummy nodes self.aem_pred = None # the classified type for preceeding entity by AEM model self.aem_last = None # the classified type for LAST entity by AEM model self.fake_link = True # if the link is fake derived from referrer self.pl = http_entry # http log entries self.tag = self.gen_tag() # formated tag to show by tree
def __init__(self, *args, document, **kwargs): if 'tag' not in kwargs: kwargs['tag'] = self.__class__.__name__ name = kwargs.get('name', kwargs['tag']) self.document = document # node consumes data Node.__init__(self, *args, **kwargs) State.__init__( self, name, on_enter=kwargs.get('on_enter'), on_exit=kwargs.get('on_exit'), ignore_invalid_triggers=kwargs.get('ignore_invalid_triggers')) self.data = self
def __init__( self, record, offset, tag=None, identifier=None, expanded=True ): Node.__init__( self, tag=tag, identifier=identifier, expanded=expanded ) self.record = record self.offset = offset self.payload_offset = 0 self.xattrs = {} self.xattrs[ "offset" ] = str( offset ) for k, v in record.headers: self.xattrs[ k ] = v if record.type == WarcRecord.RESPONSE and record.url.startswith( "http" ): mime, data = record.content if data.startswith( "HTTP" ): match = re.search( "\r?\n(\r?\n)+", data, re.MULTILINE ) self.xattrs[ "http.headers" ] = data[ 0:match.end() ] self.payload_offset = match.end()